Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
rust source #2
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.89.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
x86-64 GCCRS 15.2 (GCC assertions)
x86-64 GCCRS 15.2 (GCC)
Options
Source code
#![feature(core_intrinsics)] #![feature(cfg_target_has_atomic)] #![allow(internal_features)] use core::intrinsics::likely; const WORD_SIZE: usize = core::mem::size_of::<usize>(); const WORD_MASK: usize = WORD_SIZE - 1; // If the number of bytes involved exceed this threshold we will opt in word-wise copy. // The value here selected is max(2 * WORD_SIZE, 16): // * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through // word-wise copy. // * The word-wise copy logic needs to perform some checks so it has some small overhead. // ensures that even on 32-bit platforms we have copied at least 8 bytes through // word-wise copy so the saving of word-wise copy outweighs the fixed overhead. const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { 2 * WORD_SIZE } else { 16 }; #[cfg(feature = "mem-unaligned")] unsafe fn read_usize_unaligned(x: *const usize) -> usize { // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which // is translated to memcpy in LLVM. let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read(); core::mem::transmute(x_read) } #[unsafe(no_mangle)] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { let dest_end = dest.wrapping_add(n); while dest < dest_end { *dest = *src; dest = dest.wrapping_add(1); src = src.wrapping_add(1); } } #[inline(always)] unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; let dest_end = dest.wrapping_add(n) as *mut usize; while dest_usize < dest_end { *dest_usize = *src_usize; dest_usize = dest_usize.wrapping_add(1); src_usize = src_usize.wrapping_add(1); } } #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let dest_end = dest.wrapping_add(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. let offset = src as usize & WORD_MASK; let shift = offset * 8; // Realign src let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; // This will read (but won't use) bytes out of bound. // cfg needed because not all targets will have atomic loads that can be lowered // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) #[cfg(target_has_atomic_load_store = "ptr")] let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); #[cfg(not(target_has_atomic_load_store = "ptr"))] let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_usize < dest_end { src_aligned = src_aligned.wrapping_add(1); let cur_word = *src_aligned; #[cfg(target_endian = "little")] let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); #[cfg(target_endian = "big")] let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift); prev_word = cur_word; *dest_usize = resembled; dest_usize = dest_usize.wrapping_add(1); } } if n >= WORD_COPY_THRESHOLD { // Align dest // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; copy_forward_bytes(dest, src, dest_misalignment); dest = dest.wrapping_add(dest_misalignment); src = src.wrapping_add(dest_misalignment); n -= dest_misalignment; let n_words = n & !WORD_MASK; let src_misalignment = src as usize & WORD_MASK; if likely(src_misalignment == 0) { copy_forward_aligned_words(dest, src, n_words); } else { copy_forward_misaligned_words(dest, src, n_words); } dest = dest.wrapping_add(n_words); src = src.wrapping_add(n_words); n -= n_words; } copy_forward_bytes(dest, src, n); }
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.89.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
x86-64 GCCRS 15.2 (GCC assertions)
x86-64 GCCRS 15.2 (GCC)
Options
Source code
#![feature(core_intrinsics)] #![allow(internal_features)] use std::intrinsics::likely; const WORD_SIZE: usize = core::mem::size_of::<usize>(); const WORD_MASK: usize = WORD_SIZE - 1; // If the number of bytes involved exceed this threshold we will opt in word-wise copy. // The value here selected is max(2 * WORD_SIZE, 16): // * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through // word-wise copy. // * The word-wise copy logic needs to perform some checks so it has some small overhead. // ensures that even on 32-bit platforms we have copied at least 8 bytes through // word-wise copy so the saving of word-wise copy outweighs the fixed overhead. const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { 2 * WORD_SIZE } else { 16 }; #[inline(always)] unsafe fn load_chunk_aligned<T: Copy>( src: *const usize, dst: *mut usize, load_sz: usize, offset: usize, ) -> usize { let chunk_sz = core::mem::size_of::<T>(); if (load_sz & chunk_sz) != 0 { *dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>(); offset | chunk_sz } else { offset } } #[inline(always)] unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize { debug_assert!(load_sz < WORD_SIZE); // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 // (since `load_sz < WORD_SIZE`). const { assert!(WORD_SIZE <= 8) }; let mut i = 0; let mut out = 0usize; // We load in decreasing order, so the pointers remain sufficiently aligned for the next step. i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i); i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i); i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i); debug_assert!(i == load_sz); out } #[inline(always)] unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize { debug_assert!(load_sz < WORD_SIZE); // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8 // (since `load_sz < WORD_SIZE`). const { assert!(WORD_SIZE <= 8) }; let mut i = 0; let mut out = 0usize; // Obtain pointers pointing to the beginning of the range we want to load. let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz); let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz); // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc. i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i); i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i); i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i); debug_assert!(i == load_sz); out } #[unsafe(no_mangle)] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { let dest_end = dest.wrapping_add(n); while dest < dest_end { *dest = *src; dest = dest.wrapping_add(1); src = src.wrapping_add(1); } } #[inline(always)] unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; let dest_end = dest.wrapping_add(n) as *mut usize; while dest_usize < dest_end { *dest_usize = *src_usize; dest_usize = dest_usize.wrapping_add(1); src_usize = src_usize.wrapping_add(1); } } /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0. /// `src` *must not* be `usize`-aligned. #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { debug_assert!(n > 0 && n % WORD_SIZE == 0); debug_assert!(src.addr() % WORD_SIZE != 0); let mut dest_usize = dest as *mut usize; let dest_end = dest.wrapping_add(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE. let offset = src as usize & WORD_MASK; let shift = offset * 8; // Realign src let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize; let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset); while dest_usize.wrapping_add(1) < dest_end { src_aligned = src_aligned.wrapping_add(1); let cur_word = *src_aligned; let reassembled = if cfg!(target_endian = "little") { prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) } else { prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) }; prev_word = cur_word; *dest_usize = reassembled; dest_usize = dest_usize.wrapping_add(1); } // There's one more element left to go, and we can't use the loop for that as on the `src` side, // it is partially out-of-bounds. src_aligned = src_aligned.wrapping_add(1); let cur_word = load_aligned_partial(src_aligned, offset); let reassembled = if cfg!(target_endian = "little") { prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift) } else { prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift) }; // prev_word does not matter any more *dest_usize = reassembled; // dest_usize does not matter any more } if n >= WORD_COPY_THRESHOLD { // Align dest // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; copy_forward_bytes(dest, src, dest_misalignment); dest = dest.wrapping_add(dest_misalignment); src = src.wrapping_add(dest_misalignment); n -= dest_misalignment; let n_words = n & !WORD_MASK; let src_misalignment = src as usize & WORD_MASK; if likely(src_misalignment == 0) { copy_forward_aligned_words(dest, src, n_words); } else { copy_forward_misaligned_words(dest, src, n_words); } dest = dest.wrapping_add(n_words); src = src.wrapping_add(n_words); n -= n_words; } copy_forward_bytes(dest, src, n); }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree