Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.89.0
rustc 1.9.0
rustc 1.90.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
x86-64 GCCRS 15.2 (GCC assertions)
x86-64 GCCRS 15.2 (GCC)
Options
Source code
#![feature(portable_simd)] // Type your code here, or load an example. // As of Rust 1.75, small functions are automatically // marked as `#[inline]` so they will not show up in // the output when compiling with optimisations. Use // `#[unsafe(no_mangle)]` or `#[inline(never)]` to // work around this issue. // See https://github.com/compiler-explorer/compiler-explorer/issues/5939 use std::simd::Simd; use std::simd::cmp::SimdPartialEq; type Item = u16; const N: usize = 64; type Mask = u64; #[inline(never)] #[unsafe(no_mangle)] pub fn run_original( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let iter = a_idx.iter() .zip(b_idx.iter()) .zip(equal_to_results.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check if !*equal_to_result { continue; } *equal_to_result = a_array[a_idx] == b_array[b_idx]; } } #[inline(never)] #[unsafe(no_mangle)] pub fn run_original_always_evaluate( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let iter = a_idx.iter() .zip(b_idx.iter()) .zip(equal_to_results.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check let result = a_array[a_idx] == b_array[b_idx]; *equal_to_result = *equal_to_result && result; } } #[inline(never)] #[unsafe(no_mangle)] pub fn run_original_unchecked( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let iter = a_idx.iter() .zip(b_idx.iter()) .zip(equal_to_results.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check if !*equal_to_result { continue; } *equal_to_result = unsafe { *a_array.get_unchecked(a_idx) == *b_array.get_unchecked(b_idx) }; } } #[inline(never)] #[unsafe(no_mangle)] pub fn run_original_unchecked_always_evaluate( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let iter = a_idx.iter() .zip(b_idx.iter()) .zip(equal_to_results.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { let result = unsafe { *a_array.get_unchecked(a_idx) == *b_array.get_unchecked(b_idx) }; *equal_to_result = result && *equal_to_result; } } #[inline(never)] #[unsafe(no_mangle)] pub fn run_original_with_chunks( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let mut a_chunks = a_idx.chunks_exact(N); let a_chunks_ref = a_chunks.by_ref(); let mut b_chunks = b_idx.chunks_exact(N); let b_chunks_ref = b_chunks.by_ref(); let mut equal_to_results_chunks = equal_to_results.chunks_exact_mut(N); let equal_to_results_chunks_ref = equal_to_results_chunks.by_ref(); for ((chunk_a, chunk_b), chunk_equal_to_result) in a_chunks_ref.zip(b_chunks_ref).zip(equal_to_results_chunks_ref) { let iter = chunk_a.iter() .zip(chunk_b.iter()) .zip(chunk_equal_to_result.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check if !*equal_to_result { continue; } *equal_to_result = a_array[a_idx] == b_array[b_idx]; } } { let iter = a_chunks.remainder().iter() .zip(b_chunks.remainder().iter()) .zip(equal_to_results_chunks.into_remainder().iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { // Has found not equal to in previous column, don't need to check if !*equal_to_result { continue; } *equal_to_result = a_array[a_idx] == b_array[b_idx]; } } } #[inline(never)] #[unsafe(no_mangle)] pub fn run_original_with_chunks_always_evaluate( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let mut a_chunks = a_idx.chunks_exact(N); let a_chunks_ref = a_chunks.by_ref(); let mut b_chunks = b_idx.chunks_exact(N); let b_chunks_ref = b_chunks.by_ref(); let mut equal_to_results_chunks = equal_to_results.chunks_exact_mut(N); let equal_to_results_chunks_ref = equal_to_results_chunks.by_ref(); for ((chunk_a, chunk_b), chunk_equal_to_result) in a_chunks_ref.zip(b_chunks_ref).zip(equal_to_results_chunks_ref) { let iter = chunk_a.iter() .zip(chunk_b.iter()) .zip(chunk_equal_to_result.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { let cmp_result = a_array[a_idx] == b_array[b_idx]; *equal_to_result = *equal_to_result && cmp_result; } } { let iter = a_chunks.remainder().iter() .zip(b_chunks.remainder().iter()) .zip(equal_to_results_chunks.into_remainder().iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { let cmp_result = a_array[a_idx] == b_array[b_idx]; *equal_to_result = *equal_to_result && cmp_result; } } } #[inline(never)] #[unsafe(no_mangle)] pub fn run_original_with_chunks_always_evaluate_unchecked( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool] ) { let mut a_chunks = a_idx.chunks_exact(N); let a_chunks_ref = a_chunks.by_ref(); let mut b_chunks = b_idx.chunks_exact(N); let b_chunks_ref = b_chunks.by_ref(); let mut equal_to_results_chunks = equal_to_results.chunks_exact_mut(N); let equal_to_results_chunks_ref = equal_to_results_chunks.by_ref(); for ((chunk_a, chunk_b), chunk_equal_to_result) in a_chunks_ref.zip(b_chunks_ref).zip(equal_to_results_chunks_ref) { let iter = chunk_a.iter() .zip(chunk_b.iter()) .zip(chunk_equal_to_result.iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { let cmp_result = unsafe { *a_array.get_unchecked(a_idx) == *b_array.get_unchecked(b_idx) } ; *equal_to_result = *equal_to_result && cmp_result; } } { let iter = a_chunks.remainder().iter() .zip(b_chunks.remainder().iter()) .zip(equal_to_results_chunks.into_remainder().iter_mut()) .map(|((a, b), c)| (a, b, c)); for (&a_idx, &b_idx, equal_to_result) in iter { let cmp_result = unsafe { *a_array.get_unchecked(a_idx) == *b_array.get_unchecked(b_idx) } ; *equal_to_result = *equal_to_result && cmp_result; } } } #[inline(always)] pub fn compare_to_bitmask(a: [Item; N], b: [Item; N]) -> Mask { let mut bitmask = 0; for (index, (l, r)) in a.into_iter().zip(b.into_iter()).enumerate() { bitmask |= ((l == r) as Mask) << index; } bitmask } #[inline(always)] pub fn auto_gather_uncheck_access(a_array: &[Item], a_idx: &[usize; N], b_array: &[Item], b_idx: &[usize; N]) -> Mask { let a_idx_values = a_idx.map(|idx| unsafe { *a_array.get_unchecked(idx) }); let b_idx_values = b_idx.map(|idx| unsafe { *b_array.get_unchecked(idx) }); let bitmask = compare_to_bitmask(a_idx_values, b_idx_values); return bitmask; } #[inline(never)] #[unsafe(no_mangle)] pub fn run_auto_gather_uncheck_access( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool], ) { run_generic( a_array, a_idx, b_array, b_idx, equal_to_results, auto_gather_uncheck_access, ); } #[inline(always)] pub fn auto_gather(a_array: &[Item], a_idx: &[usize; N], b_array: &[Item], b_idx: &[usize; N]) -> Mask { a_idx.iter().for_each(|&idx| assert!(idx < a_array.len())); b_idx.iter().for_each(|&idx| assert!(idx < b_array.len())); let a_idx_values = a_idx.map(|idx| a_array[idx]); let b_idx_values = b_idx.map(|idx| b_array[idx]); let bitmask = compare_to_bitmask(a_idx_values, b_idx_values); return bitmask; } #[inline(never)] #[unsafe(no_mangle)] pub fn run_auto_gather( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool], ) { run_generic( a_array, a_idx, b_array, b_idx, equal_to_results, auto_gather, ); } #[inline(always)] pub fn auto_gather_default_access(a_array: &[Item], a_idx: &[usize; N], b_array: &[Item], b_idx: &[usize; N]) -> Mask { let a_idx_values = a_idx.map(|idx| *a_array.get(idx).unwrap_or(&0)); let b_idx_values = b_idx.map(|idx| *b_array.get(idx).unwrap_or(&0)); let bitmask = compare_to_bitmask(a_idx_values, b_idx_values); return bitmask; } #[inline(never)] #[unsafe(no_mangle)] pub fn run_auto_gather_default_access( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool], ) { run_generic( a_array, a_idx, b_array, b_idx, equal_to_results, auto_gather_default_access, ); } #[inline(always)] pub fn simd_gather(a_array: &[Item], a_idx: &[usize; N], b_array: &[Item], b_idx: &[usize; N]) -> Mask { let a_idx_simd = Simd::<_, N>::from_slice(a_idx); let a_idx_values_simd = Simd::gather_or_default(a_array, a_idx_simd); let b_idx_simd = Simd::<_, N>::from_slice(b_idx); let b_idx_values_simd = Simd::gather_or_default(b_array, b_idx_simd); let eq = a_idx_values_simd.simd_eq(b_idx_values_simd); // TODO - make sure this only contain 64 elements let bitmask = eq.to_bitmask(); return bitmask as Mask; } #[inline(never)] #[unsafe(no_mangle)] pub fn run_simd_gather( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool], ) { run_generic( a_array, a_idx, b_array, b_idx, equal_to_results, simd_gather, ); } #[inline(always)] pub fn run_generic( a_array: &[Item], a_idx: &[usize], b_array: &[Item], b_idx: &[usize], equal_to_results: &mut[bool], gather_and_compare_fn: impl Fn(&[Item], &[usize; N], &[Item], &[usize; N]) -> Mask ) { let (start_leftover_index, lhs_rows_leftover, rhs_rows_leftover) = run_on_tuple_chunks::<usize, N>( a_idx, b_idx, &mut |range: std::ops::Range<usize>, lhs_rows_idxs: &[usize; N], rhs_rows_idxs: &[usize; N]| { let bitmask = gather_and_compare_fn( a_array, lhs_rows_idxs, b_array, rhs_rows_idxs, ); let equal_to_results = &mut equal_to_results[range]; apply_equal_mask_to_already_equal_to(equal_to_results, bitmask); }, ); // Ignore left over for now } #[inline(never)] fn apply_equal_mask_to_already_equal_to(equal_to_results: &mut [bool], bitmask: Mask) { equal_to_results.iter_mut().enumerate().for_each(|(i, r)| { let is_bit_set = bitmask & (1 << i) != 0; // If already false, keep it false // if true, set to the bitmask result *r = *r && is_bit_set }); } /// Prepare slice of T into chunks of N, and run the provided function on each chunk pair /// /// This is to nudge the compiler to auto-vectorize the operation on each chunk /// #[inline(always)] fn run_on_tuple_chunks<'a, T, const N: usize>( slice_a: &'a [T], slice_b: &'a [T], run_on_chunk_pair: &mut impl FnMut(std::ops::Range<usize>, &[T; N], &[T; N]), ) -> (usize, &'a [T], &'a [T]) { assert_eq!(slice_a.len(), slice_b.len()); let (chunks_a, remainder_a) = slice_a.as_chunks::<N>(); let (chunks_b, remainder_b) = slice_b.as_chunks::<N>(); let mut i = 0; for (chunk_a, chunk_b) in chunks_a.iter().zip(chunks_b.iter()) { run_on_chunk_pair(i..i + N, chunk_a, chunk_b); i += N; } (i, remainder_a, remainder_b) } // If you use `main()`, declare it as `pub` to see it in the output: // pub fn main() { ... }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree