Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.89.0
rustc 1.9.0
rustc 1.90.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
x86-64 GCCRS 15.2 (GCC assertions)
x86-64 GCCRS 15.2 (GCC)
Options
Source code
#![feature(core_intrinsics, portable_simd)] #[cfg(target_arch = "x86")] #[allow(unused_imports)] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] #[allow(unused_imports)] use std::arch::x86_64::*; use std::f32::consts::*; use std::intrinsics::*; use std::mem::MaybeUninit; use std::simd::*; const LANES: usize = 8; const PRECISION: usize = 3; const COS: bool = true; pub unsafe fn sin_fast_approx_bench(x: Simd<f32, LANES>) -> Simd<f32, LANES> { sin_fast_approx_simd::<LANES, PRECISION, COS>(x) } /// Inputs valid between [-2^23, 2^23]. /// Precision can set between 0 and 3, with 0 being the fastest and least /// precise, and 3 being the slowest and most precise. /// If COS is set to true, the period is offset by PI/2. /// /// As the inputs get further from 0, the accuracy gets continuously worse /// due to nature of the fast range reduction. /// /// This function should auto vectorize under LLVM with opt-level=3. /// /// The coefficient constants were derived from the constants defined here: /// https://publik-void.github.io/sin-cos-approximations/#_cos_abs_error_minimized_degree_6 #[inline(always)] pub unsafe fn sin_fast_approx<const PRECISION: usize, const COS: bool>(x: f32) -> f32 { let pi_multiples = fadd_fast( fmul_fast(x, FRAC_1_PI), if COS { 0.0_f32 } else { -0.5_f32 }, ); let rounded_multiples = nearbyintf32(pi_multiples); let pi_fraction = pi_multiples - rounded_multiples; let fraction_squared = pi_fraction * pi_fraction; let coeffs = { const COEFF_TABLE: [f32; 14] = [ -4.0_f32, 0.9719952_f32, 3.5838444_f32, -4.8911867_f32, 0.99940324_f32, -1.2221271_f32, 4.0412836_f32, -4.933938_f32, 0.9999933_f32, 0.2196968_f32, -1.3318802_f32, 4.058412_f32, -4.934793_f32, 0.99999994_f32, ]; let shifted_degree = PRECISION + 1; let slice_start = (((shifted_degree * shifted_degree) + shifted_degree) / 2) - 1; let slice_end = slice_start + PRECISION + 2; &COEFF_TABLE[slice_start..slice_end] }; let mut output = coeffs[0]; for i in 1..coeffs.len() { output = fadd_fast(fmul_fast(fraction_squared, output), coeffs[i]); } let parity_sign = (rounded_multiples.to_int_unchecked::<i32>() as u32) << 31_u32; f32::from_bits(output.to_bits() ^ parity_sign) } #[inline(always)] pub unsafe fn sin_fast_approx_simd<const LANES: usize, const PRECISION: usize, const COS: bool>( x: Simd<f32, LANES>, ) -> Simd<f32, LANES> where LaneCount<LANES>: SupportedLaneCount, { let mut vec_uninit: MaybeUninit<Simd<f32, LANES>> = MaybeUninit::uninit(); let vec_ptr = vec_uninit.as_mut_ptr(); for i in 0..LANES { (*vec_ptr)[i] = sin_fast_approx::<PRECISION, COS>(x[i]); } vec_uninit.assume_init() } /// this will be run despite it not being public. /// because we don't make it public, it gets excluded from the assembly output. /// /// Options: /// --cfg print_values /// --cfg print_error /// --cfg print_cycles #[allow(dead_code)] fn main() { const STEPS: usize = 1 << 18; const WARMUP_ITRS: usize = 1 << 18; const START: f32 = -FRAC_PI_2; const END: f32 = FRAC_PI_2; const ITRS: usize = STEPS / LANES; const SLICE: f32 = (END - START) / (STEPS as f32); const INCR: Simd<f32, LANES> = Simd::from_array([SLICE * LANES as f32; LANES]); println!("Count: {STEPS}"); #[allow(unused_mut)] let mut vec = Simd::splat(SLICE).mul_add( Simd::from_slice(&(0..LANES).collect::<Box<[usize]>>()).cast::<f32>(), Simd::splat(START), ); if cfg!(print_cycles) { if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { for _i in 0..WARMUP_ITRS { unsafe { black_box(sin_fast_approx_simd::<LANES, PRECISION, COS>(black_box( vec, ))); } } } else { panic!("CPU cycle timings are not supported on this platform"); } } #[allow(unused_variables)] let mut total_error = 0.0_f64; let mut max_error = 0.0_f64; #[allow(unused_variables)] let mut built_string: String; #[cfg(print_values)] { built_string = String::with_capacity(STEPS * 16); } #[allow(unused_variables, unused_mut)] let mut cycles_1: u64; #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))] unsafe { let _ = __cpuid(0); cycles_1 = _rdtsc() as u64; } for _i in 0..ITRS { let result = unsafe { black_box(sin_fast_approx_simd::<LANES, PRECISION, COS>(black_box( vec, ))) }; if cfg!(print_error) { let mut array: [f32; LANES] = [0.0; LANES]; for i in 0..LANES { array[i] = if COS { vec[i].cos() } else { vec[i].sin() }; } let true_result = Simd::from_array(array); // the range of sin and cos are between -1 and 1 let distance = (result.cast::<f64>() - true_result.cast::<f64>()).abs(); let distance_epsilons = distance / Simd::splat(f32::EPSILON as f64); total_error += distance_epsilons.reduce_sum(); max_error = max_error.max(distance_epsilons.reduce_max()); #[cfg(print_values)] { for i in 0..LANES { built_string.push_str(&format!( "{:?} {:?} {:?} {:.3}\n", vec[i], result[i], true_result[i], distance_epsilons[i] )); } } } else if cfg!(print_values) { #[cfg(print_values)] { for i in 0..LANES { built_string.push_str(&format!("{:?} {:?}\n", vec[i], result[i])); } } } #[cfg(any(print_values, print_error))] { vec += INCR; } } #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))] unsafe { let _ = __cpuid(0); let cycles_2 = _rdtsc(); let _ = __cpuid(0); let cycles_blank = _rdtsc(); let cycles_total = cycles_2 + cycles_2 - cycles_1 - cycles_blank; let per_iter_cycles = cycles_total as f64 / (ITRS as f64); let per_op_cycles = cycles_total as f64 / (STEPS as f64); println!("Avg Cycles Per Iter: {per_iter_cycles}\nAvg Cycles Per Op: {per_op_cycles}"); } #[cfg(print_error)] { let per_op_error = total_error / (STEPS as f64); println!("Avg Error Per Op (epsilons): {per_op_error}\nMax Error (epsilons): {max_error}") } #[cfg(print_values)] { println!("Values:\n{built_string}"); } }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree