Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
zig source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
zig 0.10.0
zig 0.11.0
zig 0.12.0
zig 0.12.1
zig 0.13.0
zig 0.14.0
zig 0.14.1
zig 0.15.1
zig 0.2.0
zig 0.3.0
zig 0.4.0
zig 0.5.0
zig 0.6.0
zig 0.7.0
zig 0.7.1
zig 0.8.0
zig 0.9.0
zig trunk
Options
Source code
const Opaque = ?[*]u8; const CompareFn = *const fn (Opaque, Opaque, Opaque) callconv(.C) u8; const CopyFn = *const fn (Opaque, Opaque) callconv(.C) void; pub const Ordering = enum(u8) { EQ = 0, GT = 1, LT = 2, }; const GT = Ordering.GT; const LT = Ordering.LT; const EQ = Ordering.EQ; inline fn compare(cmp: CompareFn, cmp_data: Opaque, lhs: [*]u8, rhs: [*]u8) Ordering { return @as(Ordering, @enumFromInt(cmp(cmp_data, lhs, rhs))); } const MAX_ELEMENT_BUFFER_SIZE: usize = 96; /// Moves the smaller element from left and rigth to dest. /// Will increment both dest and the smaller element ptr to their next index. /// Inlining will remove the extra level of pointer indirection here. /// It is just used to allow mutating the input pointers. inline fn head_branchless_merge(dest: *[*]u8, left: *[*]u8, right: *[*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { // Note equivalent c code: // *ptd++ = cmp(ptl, ptr) <= 0 ? *ptl++ : *ptr++; // While not guaranteed branchless, tested in godbolt for x86_64, aarch32, aarch64, riscv64, and wasm32. const lte = compare(cmp, cmp_data, left.*, right.*) != GT; const from = if (lte) left else right; copy(dest.*, from.*); from.* += element_width; dest.* += element_width; } /// Moves the smaller element from left and rigth to dest. /// Will decrement both dest and the smaller element ptr to their previous index. /// Inlining will remove the extra level of pointer indirection here. /// It is just used to allow mutating the input pointers. inline fn tail_branchless_merge(dest: *[*]u8, left: *[*]u8, right: *[*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { // Note equivalent c code: // *tpd-- = cmp(tpl, tpr) > 0 ? *tpl-- : *tpr--; // While not guaranteed branchless, tested in godbolt for x86_64, aarch32, aarch64, riscv64, and wasm32. const gt = compare(cmp, cmp_data, left.*, right.*) == GT; const from = if (gt) left else right; copy(dest.*, from.*); from.* -= element_width; dest.* -= element_width; } /// Swaps the element at ptr with the element after it if the element is greater than the next. inline fn swap_branchless(ptr: [*]u8, tmp: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { // While not guaranteed branchless, tested in godbolt for x86_64, aarch32, aarch64, riscv64, and wasm32. _ = swap_branchless_return_gt(ptr, tmp, cmp_data, cmp, element_width, copy); } inline fn swap_branchless_return_gt(ptr: [*]u8, tmp: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) u8 { // While not guaranteed branchless, tested in godbolt for x86_64, aarch32, aarch64, riscv64, and wasm32. const gt = compare(cmp, cmp_data, ptr, ptr + element_width) == GT; var x = if (gt) element_width else 0; const from = if (gt) ptr else ptr + element_width; copy(tmp, from); copy(ptr, ptr + x); copy(ptr + element_width, tmp); return @intFromBool(gt); } export fn tiny_sort(array: [*]u8, len: usize, swap: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { var buffer: [MAX_ELEMENT_BUFFER_SIZE]u8 = undefined; const tmp_ptr = @as([*]u8, @ptrCast(&buffer[0])); switch (len) { 1, 0 => { return; }, 2 => { swap_branchless(array, tmp_ptr, cmp_data, cmp, element_width, copy); }, 3 => { var arr_ptr = array; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); }, 4 => { parity_swap_four(array, tmp_ptr, cmp_data, cmp, element_width, copy); }, 5 => { parity_swap_five(array, tmp_ptr, cmp_data, cmp, element_width, copy); }, 6 => { parity_swap_six(array, tmp_ptr, swap, cmp_data, cmp, element_width, copy); }, 7 => { parity_swap_seven(array, tmp_ptr, swap, cmp_data, cmp, element_width, copy); }, else => { unreachable; }, } } fn parity_swap_four(array: [*]u8, tmp_ptr: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { var arr_ptr = array; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; const gt = compare(cmp, cmp_data, arr_ptr, arr_ptr + element_width) == GT; if (gt) { copy(tmp_ptr, arr_ptr); copy(arr_ptr, arr_ptr + element_width); copy(arr_ptr + element_width, tmp_ptr); arr_ptr -= element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); } } fn parity_swap_five(array: [*]u8, tmp_ptr: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { var arr_ptr = array; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; var more_work = swap_branchless_return_gt(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; more_work += swap_branchless_return_gt(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr = array; if (more_work != 0) { swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr = array; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); } } fn parity_swap_six(array: [*]u8, tmp_ptr: [*]u8, swap: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { var arr_ptr = array; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 3 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr = array; { const lte = compare(cmp, cmp_data, arr_ptr + 2 * element_width, arr_ptr + 3 * element_width) != GT; if (lte) { swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 4 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); return; } } { const gt = compare(cmp, cmp_data, arr_ptr, arr_ptr + element_width) == GT; var x = if (gt) element_width else 0; var not_x = if (!gt) element_width else 0; copy(swap, arr_ptr + x); copy(swap + element_width, arr_ptr + not_x); copy(swap + 2 * element_width, arr_ptr + 2 * element_width); arr_ptr += 4 * element_width; } { const gt = compare(cmp, cmp_data, arr_ptr, arr_ptr + element_width) == GT; var x = if (gt) element_width else 0; var not_x = if (!gt) element_width else 0; copy(swap + 4 * element_width, arr_ptr + x); copy(swap + 5 * element_width, arr_ptr + not_x); copy(swap + 3 * element_width, arr_ptr - element_width); } arr_ptr = array; var left = swap; var right = swap + 3 * element_width; head_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); head_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); head_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); arr_ptr = array + 5 * element_width; left = swap + 2 * element_width; right = swap + 5 * element_width; tail_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); tail_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); const gt = compare(cmp, cmp_data, left, right) == GT; const from = if (gt) left else right; copy(arr_ptr, from); } fn parity_swap_seven(array: [*]u8, tmp_ptr: [*]u8, swap: [*]u8, cmp_data: Opaque, cmp: CompareFn, element_width: usize, copy: CopyFn) void { var arr_ptr = array; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= 3 * element_width; var more_work = swap_branchless_return_gt(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; more_work += swap_branchless_return_gt(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr += 2 * element_width; more_work += swap_branchless_return_gt(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr -= element_width; if (more_work == 0) return; swap_branchless(arr_ptr, tmp_ptr, cmp_data, cmp, element_width, copy); arr_ptr = array; { const gt = compare(cmp, cmp_data, arr_ptr, arr_ptr + element_width) == GT; var x = if (gt) element_width else 0; var not_x = if (!gt) element_width else 0; copy(swap, arr_ptr + x); copy(swap + element_width, arr_ptr + not_x); copy(swap + 2 * element_width, arr_ptr + 2 * element_width); arr_ptr += 3 * element_width; } { const gt = compare(cmp, cmp_data, arr_ptr, arr_ptr + element_width) == GT; var x = if (gt) element_width else 0; var not_x = if (!gt) element_width else 0; copy(swap + 3 * element_width, arr_ptr + x); copy(swap + 4 * element_width, arr_ptr + not_x); arr_ptr += 2 * element_width; } { const gt = compare(cmp, cmp_data, arr_ptr, arr_ptr + element_width) == GT; var x = if (gt) element_width else 0; var not_x = if (!gt) element_width else 0; copy(swap + 5 * element_width, arr_ptr + x); copy(swap + 6 * element_width, arr_ptr + not_x); } arr_ptr = array; var left = swap; var right = swap + 3 * element_width; head_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); head_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); head_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); arr_ptr = array + 6 * element_width; left = swap + 2 * element_width; right = swap + 6 * element_width; tail_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); tail_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); tail_branchless_merge(&arr_ptr, &left, &right, cmp_data, cmp, element_width, copy); const gt = compare(cmp, cmp_data, left, right) == GT; const from = if (gt) left else right; copy(arr_ptr, from); }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree