Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
c source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
6502 cc65 2.17
6502 cc65 2.18
6502 cc65 2.19
6502 cc65 trunk
ARM GCC 10.2.0 (linux)
ARM GCC 10.2.1 (none)
ARM GCC 10.3.0 (linux)
ARM GCC 10.3.1 (2021.07 none)
ARM GCC 10.3.1 (2021.10 none)
ARM GCC 10.5.0
ARM GCC 11.1.0 (linux)
ARM GCC 11.2.0 (linux)
ARM GCC 11.2.1 (none)
ARM GCC 11.3.0 (linux)
ARM GCC 11.4.0
ARM GCC 12.1.0 (linux)
ARM GCC 12.2.0 (linux)
ARM GCC 12.3.0
ARM GCC 12.4.0
ARM GCC 12.5.0
ARM GCC 13.1.0 (linux)
ARM GCC 13.2.0
ARM GCC 13.2.0 (unknown-eabi)
ARM GCC 13.3.0
ARM GCC 13.3.0 (unknown-eabi)
ARM GCC 13.4.0
ARM GCC 13.4.0 (unknown-eabi)
ARM GCC 14.1.0
ARM GCC 14.1.0 (unknown-eabi)
ARM GCC 14.2.0
ARM GCC 14.2.0 (unknown-eabi)
ARM GCC 14.3.0
ARM GCC 14.3.0 (unknown-eabi)
ARM GCC 15.1.0
ARM GCC 15.1.0 (unknown-eabi)
ARM GCC 15.2.0
ARM GCC 15.2.0 (unknown-eabi)
ARM GCC 4.5.4 (linux)
ARM GCC 4.6.4 (linux)
ARM GCC 5.4 (linux)
ARM GCC 5.4.1 (none)
ARM GCC 6.3.0 (linux)
ARM GCC 6.4.0 (linux)
ARM GCC 7.2.1 (none)
ARM GCC 7.3.0 (linux)
ARM GCC 7.5.0 (linux)
ARM GCC 8.2.0 (WinCE)
ARM GCC 8.2.0 (linux)
ARM GCC 8.3.1 (none)
ARM GCC 8.5.0 (linux)
ARM GCC 9.2.1 (none)
ARM GCC 9.3.0 (linux)
ARM GCC trunk (linux)
ARM msvc v19.0 (ex-WINE)
ARM msvc v19.10 (ex-WINE)
ARM msvc v19.14 (ex-WINE)
ARM64 GCC 10.2.0
ARM64 GCC 10.3.0
ARM64 GCC 10.4.0
ARM64 GCC 10.5.0
ARM64 GCC 11.1.0
ARM64 GCC 11.2.0
ARM64 GCC 11.3.0
ARM64 GCC 11.4.0
ARM64 GCC 12.1.0
ARM64 GCC 12.2.0
ARM64 GCC 12.3.0
ARM64 GCC 12.4.0
ARM64 GCC 12.5.0
ARM64 GCC 13.1.0
ARM64 GCC 13.2.0
ARM64 GCC 13.3.0
ARM64 GCC 13.4.0
ARM64 GCC 14.1.0
ARM64 GCC 14.2.0
ARM64 GCC 14.3.0
ARM64 GCC 15.1.0
ARM64 GCC 15.2.0
ARM64 GCC 4.9.4
ARM64 GCC 5.4
ARM64 GCC 5.5.0
ARM64 GCC 6.3
ARM64 GCC 6.4.0
ARM64 GCC 7.3.0
ARM64 GCC 7.5.0
ARM64 GCC 8.2.0
ARM64 GCC 8.5.0
ARM64 GCC 9.3.0
ARM64 GCC 9.4.0
ARM64 GCC 9.5.0
ARM64 GCC trunk
ARM64 Morello GCC 10.1.0 Alpha 1
ARM64 Morello GCC 10.1.2 Alpha 2
ARM64 msvc v19.14 (ex-WINE)
AVR gcc 10.3.0
AVR gcc 11.1.0
AVR gcc 12.1.0
AVR gcc 12.2.0
AVR gcc 12.3.0
AVR gcc 12.4.0
AVR gcc 12.5.0
AVR gcc 13.1.0
AVR gcc 13.2.0
AVR gcc 13.3.0
AVR gcc 13.4.0
AVR gcc 14.1.0
AVR gcc 14.2.0
AVR gcc 14.3.0
AVR gcc 15.1.0
AVR gcc 15.2.0
AVR gcc 4.5.4
AVR gcc 4.6.4
AVR gcc 5.4.0
AVR gcc 9.2.0
AVR gcc 9.3.0
Arduino Mega (1.8.9)
Arduino Uno (1.8.9)
BPF clang (trunk)
BPF clang 13.0.0
BPF clang 14.0.0
BPF clang 15.0.0
BPF clang 16.0.0
BPF clang 17.0.1
BPF clang 18.1.0
BPF clang 19.1.0
BPF clang 20.1.0
BPF clang 21.1.0
BPF gcc 13.1.0
BPF gcc 13.2.0
BPF gcc 13.3.0
BPF gcc 13.4.0
BPF gcc 14.1.0
BPF gcc 14.2.0
BPF gcc 14.3.0
BPF gcc 15.1.0
BPF gcc 15.2.0
BPF gcc trunk
C2Rust (master)
Chibicc 2020-12-07
FRC 2019
FRC 2020
FRC 2023
HPPA gcc 14.2.0
HPPA gcc 14.3.0
HPPA gcc 15.1.0
HPPA gcc 15.2.0
K1C gcc 7.4
K1C gcc 7.5
KVX ACB 4.1.0 (GCC 7.5.0)
KVX ACB 4.1.0-cd1 (GCC 7.5.0)
KVX ACB 4.10.0 (GCC 10.3.1)
KVX ACB 4.11.1 (GCC 10.3.1)
KVX ACB 4.12.0 (GCC 11.3.0)
KVX ACB 4.2.0 (GCC 7.5.0)
KVX ACB 4.3.0 (GCC 7.5.0)
KVX ACB 4.4.0 (GCC 7.5.0)
KVX ACB 4.6.0 (GCC 9.4.1)
KVX ACB 4.8.0 (GCC 9.4.1)
KVX ACB 4.9.0 (GCC 9.4.1)
KVX ACB 5.0.0 (GCC 12.2.1)
KVX ACB 5.2.0 (GCC 13.2.1)
LC3 (trunk)
M68K clang (trunk)
M68K gcc 13.1.0
M68K gcc 13.2.0
M68K gcc 13.3.0
M68K gcc 13.4.0
M68K gcc 14.1.0
M68K gcc 14.2.0
M68K gcc 14.3.0
M68K gcc 15.1.0
M68K gcc 15.2.0
MRISC32 gcc (trunk)
MSP430 gcc 12.1.0
MSP430 gcc 12.2.0
MSP430 gcc 12.3.0
MSP430 gcc 12.4.0
MSP430 gcc 12.5.0
MSP430 gcc 13.1.0
MSP430 gcc 13.2.0
MSP430 gcc 13.3.0
MSP430 gcc 13.4.0
MSP430 gcc 14.1.0
MSP430 gcc 14.2.0
MSP430 gcc 14.3.0
MSP430 gcc 15.1.0
MSP430 gcc 15.2.0
MSP430 gcc 4.5.3
MSP430 gcc 5.3.0
MSP430 gcc 6.2.1
MinGW clang 14.0.3
MinGW clang 14.0.6
MinGW clang 15.0.7
MinGW clang 16.0.0
MinGW clang 16.0.2
MinGW gcc 11.3.0
MinGW gcc 12.1.0
MinGW gcc 12.2.0
MinGW gcc 13.1.0
MinGW gcc 14.3.0
MinGW gcc 15.2.0
ORCA/C 2.1.0
ORCA/C 2.2.0
ORCA/C 2.2.1
POWER64 gcc 11.2.0
POWER64 gcc 12.1.0
POWER64 gcc 12.2.0
POWER64 gcc 12.3.0
POWER64 gcc 12.4.0
POWER64 gcc 12.5.0
POWER64 gcc 13.1.0
POWER64 gcc 13.2.0
POWER64 gcc 13.3.0
POWER64 gcc 13.4.0
POWER64 gcc 14.1.0
POWER64 gcc 14.2.0
POWER64 gcc 14.3.0
POWER64 gcc 15.1.0
POWER64 gcc 15.2.0
POWER64 gcc trunk
RISC-V (32-bits) gcc (trunk)
RISC-V (32-bits) gcc 10.2.0
RISC-V (32-bits) gcc 10.3.0
RISC-V (32-bits) gcc 11.2.0
RISC-V (32-bits) gcc 11.3.0
RISC-V (32-bits) gcc 11.4.0
RISC-V (32-bits) gcc 12.1.0
RISC-V (32-bits) gcc 12.2.0
RISC-V (32-bits) gcc 12.3.0
RISC-V (32-bits) gcc 12.4.0
RISC-V (32-bits) gcc 12.5.0
RISC-V (32-bits) gcc 13.1.0
RISC-V (32-bits) gcc 13.2.0
RISC-V (32-bits) gcc 13.3.0
RISC-V (32-bits) gcc 13.4.0
RISC-V (32-bits) gcc 14.1.0
RISC-V (32-bits) gcc 14.2.0
RISC-V (32-bits) gcc 14.3.0
RISC-V (32-bits) gcc 15.1.0
RISC-V (32-bits) gcc 15.2.0
RISC-V (32-bits) gcc 8.2.0
RISC-V (32-bits) gcc 8.5.0
RISC-V (32-bits) gcc 9.4.0
RISC-V (64-bits) gcc (trunk)
RISC-V (64-bits) gcc 10.2.0
RISC-V (64-bits) gcc 10.3.0
RISC-V (64-bits) gcc 11.2.0
RISC-V (64-bits) gcc 11.3.0
RISC-V (64-bits) gcc 11.4.0
RISC-V (64-bits) gcc 12.1.0
RISC-V (64-bits) gcc 12.2.0
RISC-V (64-bits) gcc 12.3.0
RISC-V (64-bits) gcc 12.4.0
RISC-V (64-bits) gcc 12.5.0
RISC-V (64-bits) gcc 13.1.0
RISC-V (64-bits) gcc 13.2.0
RISC-V (64-bits) gcc 13.3.0
RISC-V (64-bits) gcc 13.4.0
RISC-V (64-bits) gcc 14.1.0
RISC-V (64-bits) gcc 14.2.0
RISC-V (64-bits) gcc 14.3.0
RISC-V (64-bits) gcc 15.1.0
RISC-V (64-bits) gcc 15.2.0
RISC-V (64-bits) gcc 8.2.0
RISC-V (64-bits) gcc 8.5.0
RISC-V (64-bits) gcc 9.4.0
RISC-V rv32gc clang (trunk)
RISC-V rv32gc clang 10.0.0
RISC-V rv32gc clang 10.0.1
RISC-V rv32gc clang 11.0.0
RISC-V rv32gc clang 11.0.1
RISC-V rv32gc clang 12.0.0
RISC-V rv32gc clang 12.0.1
RISC-V rv32gc clang 13.0.0
RISC-V rv32gc clang 13.0.1
RISC-V rv32gc clang 14.0.0
RISC-V rv32gc clang 15.0.0
RISC-V rv32gc clang 16.0.0
RISC-V rv32gc clang 17.0.1
RISC-V rv32gc clang 18.1.0
RISC-V rv32gc clang 19.1.0
RISC-V rv32gc clang 20.1.0
RISC-V rv32gc clang 21.1.0
RISC-V rv32gc clang 9.0.0
RISC-V rv32gc clang 9.0.1
RISC-V rv64gc clang (trunk)
RISC-V rv64gc clang 10.0.0
RISC-V rv64gc clang 10.0.1
RISC-V rv64gc clang 11.0.0
RISC-V rv64gc clang 11.0.1
RISC-V rv64gc clang 12.0.0
RISC-V rv64gc clang 12.0.1
RISC-V rv64gc clang 13.0.0
RISC-V rv64gc clang 13.0.1
RISC-V rv64gc clang 14.0.0
RISC-V rv64gc clang 15.0.0
RISC-V rv64gc clang 16.0.0
RISC-V rv64gc clang 17.0.1
RISC-V rv64gc clang 18.1.0
RISC-V rv64gc clang 19.1.0
RISC-V rv64gc clang 20.1.0
RISC-V rv64gc clang 21.1.0
RISC-V rv64gc clang 9.0.0
RISC-V rv64gc clang 9.0.1
Raspbian Buster
Raspbian Stretch
SDCC 4.0.0
SDCC 4.1.0
SDCC 4.2.0
SDCC 4.3.0
SDCC 4.4.0
SDCC 4.5.0
SPARC LEON gcc 12.2.0
SPARC LEON gcc 12.3.0
SPARC LEON gcc 12.4.0
SPARC LEON gcc 12.5.0
SPARC LEON gcc 13.1.0
SPARC LEON gcc 13.2.0
SPARC LEON gcc 13.3.0
SPARC LEON gcc 13.4.0
SPARC LEON gcc 14.1.0
SPARC LEON gcc 14.2.0
SPARC LEON gcc 14.3.0
SPARC LEON gcc 15.1.0
SPARC LEON gcc 15.2.0
SPARC gcc 12.2.0
SPARC gcc 12.3.0
SPARC gcc 12.4.0
SPARC gcc 12.5.0
SPARC gcc 13.1.0
SPARC gcc 13.2.0
SPARC gcc 13.3.0
SPARC gcc 13.4.0
SPARC gcc 14.1.0
SPARC gcc 14.2.0
SPARC gcc 14.3.0
SPARC gcc 15.1.0
SPARC gcc 15.2.0
SPARC64 gcc 12.2.0
SPARC64 gcc 12.3.0
SPARC64 gcc 12.4.0
SPARC64 gcc 12.5.0
SPARC64 gcc 13.1.0
SPARC64 gcc 13.2.0
SPARC64 gcc 13.3.0
SPARC64 gcc 13.4.0
SPARC64 gcc 14.1.0
SPARC64 gcc 14.2.0
SPARC64 gcc 14.3.0
SPARC64 gcc 15.1.0
SPARC64 gcc 15.2.0
TCC (trunk)
TCC 0.9.27
TI C6x gcc 12.2.0
TI C6x gcc 12.3.0
TI C6x gcc 12.4.0
TI C6x gcc 12.5.0
TI C6x gcc 13.1.0
TI C6x gcc 13.2.0
TI C6x gcc 13.3.0
TI C6x gcc 13.4.0
TI C6x gcc 14.1.0
TI C6x gcc 14.2.0
TI C6x gcc 14.3.0
TI C6x gcc 15.1.0
TI C6x gcc 15.2.0
TI CL430 21.6.1
Tricore gcc 11.3.0 (EEESlab)
VAX gcc NetBSDELF 10.4.0
VAX gcc NetBSDELF 10.5.0 (Nov 15 03:50:22 2023)
VAX gcc NetBSDELF 12.4.0 (Apr 16 05:27 2025)
WebAssembly clang (trunk)
Xtensa ESP32 gcc 11.2.0 (2022r1)
Xtensa ESP32 gcc 12.2.0 (20230208)
Xtensa ESP32 gcc 14.2.0 (20241119)
Xtensa ESP32 gcc 8.2.0 (2019r2)
Xtensa ESP32 gcc 8.2.0 (2020r1)
Xtensa ESP32 gcc 8.2.0 (2020r2)
Xtensa ESP32 gcc 8.4.0 (2020r3)
Xtensa ESP32 gcc 8.4.0 (2021r1)
Xtensa ESP32 gcc 8.4.0 (2021r2)
Xtensa ESP32-S2 gcc 11.2.0 (2022r1)
Xtensa ESP32-S2 gcc 12.2.0 (20230208)
Xtensa ESP32-S2 gcc 14.2.0 (20241119)
Xtensa ESP32-S2 gcc 8.2.0 (2019r2)
Xtensa ESP32-S2 gcc 8.2.0 (2020r1)
Xtensa ESP32-S2 gcc 8.2.0 (2020r2)
Xtensa ESP32-S2 gcc 8.4.0 (2020r3)
Xtensa ESP32-S2 gcc 8.4.0 (2021r1)
Xtensa ESP32-S2 gcc 8.4.0 (2021r2)
Xtensa ESP32-S3 gcc 11.2.0 (2022r1)
Xtensa ESP32-S3 gcc 12.2.0 (20230208)
Xtensa ESP32-S3 gcc 14.2.0 (20241119)
Xtensa ESP32-S3 gcc 8.4.0 (2020r3)
Xtensa ESP32-S3 gcc 8.4.0 (2021r1)
Xtensa ESP32-S3 gcc 8.4.0 (2021r2)
arm64 msvc v19.20 VS16.0
arm64 msvc v19.21 VS16.1
arm64 msvc v19.22 VS16.2
arm64 msvc v19.23 VS16.3
arm64 msvc v19.24 VS16.4
arm64 msvc v19.25 VS16.5
arm64 msvc v19.27 VS16.7
arm64 msvc v19.28 VS16.8
arm64 msvc v19.28 VS16.9
arm64 msvc v19.29 VS16.10
arm64 msvc v19.29 VS16.11
arm64 msvc v19.30 VS17.0
arm64 msvc v19.31 VS17.1
arm64 msvc v19.32 VS17.2
arm64 msvc v19.33 VS17.3
arm64 msvc v19.34 VS17.4
arm64 msvc v19.35 VS17.5
arm64 msvc v19.36 VS17.6
arm64 msvc v19.37 VS17.7
arm64 msvc v19.38 VS17.8
arm64 msvc v19.39 VS17.9
arm64 msvc v19.40 VS17.10
arm64 msvc v19.41 VS17.11
arm64 msvc v19.42 VS17.12
arm64 msvc v19.43 VS17.13
arm64 msvc v19.latest
armv7-a clang (trunk)
armv7-a clang 10.0.0
armv7-a clang 10.0.1
armv7-a clang 11.0.0
armv7-a clang 11.0.1
armv7-a clang 12.0.0
armv7-a clang 12.0.1
armv7-a clang 13.0.0
armv7-a clang 13.0.1
armv7-a clang 14.0.0
armv7-a clang 15.0.0
armv7-a clang 16.0.0
armv7-a clang 17.0.1
armv7-a clang 18.1.0
armv7-a clang 19.1.0
armv7-a clang 20.1.0
armv7-a clang 21.1.0
armv7-a clang 9.0.0
armv7-a clang 9.0.1
armv8-a clang (all architectural features, trunk)
armv8-a clang (trunk)
armv8-a clang 10.0.0
armv8-a clang 10.0.1
armv8-a clang 11.0.0
armv8-a clang 11.0.1
armv8-a clang 12.0.0
armv8-a clang 12.0.1
armv8-a clang 13.0.0
armv8-a clang 13.0.1
armv8-a clang 14.0.0
armv8-a clang 15.0.0
armv8-a clang 16.0.0
armv8-a clang 17.0.1
armv8-a clang 18.1.0
armv8-a clang 19.1.0
armv8-a clang 20.1.0
armv8-a clang 21.1.0
armv8-a clang 9.0.0
armv8-a clang 9.0.1
clang 12 for DPU (rel 2023.2.0)
cproc-master
ez80-clang 15.0.0
ez80-clang 15.0.7
llvm-mos commander X16
llvm-mos commodore 64
llvm-mos mega65
llvm-mos nes-cnrom
llvm-mos nes-mmc1
llvm-mos nes-mmc3
llvm-mos nes-nrom
llvm-mos osi-c1p
loongarch64 gcc 12.2.0
loongarch64 gcc 12.3.0
loongarch64 gcc 12.4.0
loongarch64 gcc 12.5.0
loongarch64 gcc 13.1.0
loongarch64 gcc 13.2.0
loongarch64 gcc 13.3.0
loongarch64 gcc 13.4.0
loongarch64 gcc 14.1.0
loongarch64 gcc 14.2.0
loongarch64 gcc 14.3.0
loongarch64 gcc 15.1.0
loongarch64 gcc 15.2.0
mips (el) gcc 12.1.0
mips (el) gcc 12.2.0
mips (el) gcc 12.3.0
mips (el) gcc 12.4.0
mips (el) gcc 12.5.0
mips (el) gcc 13.1.0
mips (el) gcc 13.2.0
mips (el) gcc 13.3.0
mips (el) gcc 13.4.0
mips (el) gcc 14.1.0
mips (el) gcc 14.2.0
mips (el) gcc 14.3.0
mips (el) gcc 15.1.0
mips (el) gcc 15.2.0
mips (el) gcc 4.9.4
mips (el) gcc 5.4
mips (el) gcc 5.5.0
mips (el) gcc 9.5.0
mips clang 13.0.0
mips clang 14.0.0
mips clang 15.0.0
mips clang 16.0.0
mips clang 17.0.1
mips clang 18.1.0
mips clang 19.1.0
mips clang 20.1.0
mips clang 21.1.0
mips gcc 11.2.0
mips gcc 12.1.0
mips gcc 12.2.0
mips gcc 12.3.0
mips gcc 12.4.0
mips gcc 12.5.0
mips gcc 13.1.0
mips gcc 13.2.0
mips gcc 13.3.0
mips gcc 13.4.0
mips gcc 14.1.0
mips gcc 14.2.0
mips gcc 14.3.0
mips gcc 15.1.0
mips gcc 15.2.0
mips gcc 4.9.4
mips gcc 5.4
mips gcc 5.5.0
mips gcc 9.3.0 (codescape)
mips gcc 9.5.0
mips64 (el) gcc 12.1.0
mips64 (el) gcc 12.2.0
mips64 (el) gcc 12.3.0
mips64 (el) gcc 12.4.0
mips64 (el) gcc 12.5.0
mips64 (el) gcc 13.1.0
mips64 (el) gcc 13.2.0
mips64 (el) gcc 13.3.0
mips64 (el) gcc 13.4.0
mips64 (el) gcc 14.1.0
mips64 (el) gcc 14.2.0
mips64 (el) gcc 14.3.0
mips64 (el) gcc 15.1.0
mips64 (el) gcc 15.2.0
mips64 (el) gcc 4.9.4
mips64 (el) gcc 5.4.0
mips64 (el) gcc 5.5.0
mips64 (el) gcc 9.5.0
mips64 clang 13.0.0
mips64 clang 14.0.0
mips64 clang 15.0.0
mips64 clang 16.0.0
mips64 clang 17.0.1
mips64 clang 18.1.0
mips64 clang 19.1.0
mips64 clang 20.1.0
mips64 clang 21.1.0
mips64 gcc 11.2.0
mips64 gcc 12.1.0
mips64 gcc 12.2.0
mips64 gcc 12.3.0
mips64 gcc 12.4.0
mips64 gcc 12.5.0
mips64 gcc 13.1.0
mips64 gcc 13.2.0
mips64 gcc 13.3.0
mips64 gcc 13.4.0
mips64 gcc 14.1.0
mips64 gcc 14.2.0
mips64 gcc 14.3.0
mips64 gcc 15.1.0
mips64 gcc 15.2.0
mips64 gcc 4.9.4
mips64 gcc 5.4
mips64 gcc 5.5.0
mips64 gcc 9.5.0
mips64el clang 13.0.0
mips64el clang 14.0.0
mips64el clang 15.0.0
mips64el clang 16.0.0
mips64el clang 17.0.1
mips64el clang 18.1.0
mips64el clang 19.1.0
mips64el clang 20.1.0
mips64el clang 21.1.0
mipsel clang 13.0.0
mipsel clang 14.0.0
mipsel clang 15.0.0
mipsel clang 16.0.0
mipsel clang 17.0.1
mipsel clang 18.1.0
mipsel clang 19.1.0
mipsel clang 20.1.0
mipsel clang 21.1.0
movfuscator (trunk)
nanoMIPS gcc 6.3.0
power gcc 11.2.0
power gcc 12.1.0
power gcc 12.2.0
power gcc 12.3.0
power gcc 12.4.0
power gcc 12.5.0
power gcc 13.1.0
power gcc 13.2.0
power gcc 13.3.0
power gcc 13.4.0
power gcc 14.1.0
power gcc 14.2.0
power gcc 14.3.0
power gcc 15.1.0
power gcc 15.2.0
power gcc 4.8.5
power64 AT12.0 (gcc8)
power64 AT13.0 (gcc9)
power64le AT12.0 (gcc8)
power64le AT13.0 (gcc9)
power64le clang (trunk)
power64le gcc 11.2.0
power64le gcc 12.1.0
power64le gcc 12.2.0
power64le gcc 12.3.0
power64le gcc 12.4.0
power64le gcc 12.5.0
power64le gcc 13.1.0
power64le gcc 13.2.0
power64le gcc 13.3.0
power64le gcc 13.4.0
power64le gcc 14.1.0
power64le gcc 14.2.0
power64le gcc 14.3.0
power64le gcc 15.1.0
power64le gcc 15.2.0
power64le gcc 6.3.0
power64le gcc trunk
powerpc64 clang (trunk)
ppci 0.5.5
s390x gcc 11.2.0
s390x gcc 12.1.0
s390x gcc 12.2.0
s390x gcc 12.3.0
s390x gcc 12.4.0
s390x gcc 12.5.0
s390x gcc 13.1.0
s390x gcc 13.2.0
s390x gcc 13.3.0
s390x gcc 13.4.0
s390x gcc 14.1.0
s390x gcc 14.2.0
s390x gcc 14.3.0
s390x gcc 15.1.0
s390x gcc 15.2.0
sh gcc 12.2.0
sh gcc 12.3.0
sh gcc 12.4.0
sh gcc 12.5.0
sh gcc 13.1.0
sh gcc 13.2.0
sh gcc 13.3.0
sh gcc 13.4.0
sh gcc 14.1.0
sh gcc 14.2.0
sh gcc 14.3.0
sh gcc 15.1.0
sh gcc 15.2.0
sh gcc 4.9.4
sh gcc 9.5.0
vast (trunk)
x64 msvc v19.0 (ex-WINE)
x64 msvc v19.10 (ex-WINE)
x64 msvc v19.14 (ex-WINE)
x64 msvc v19.20 VS16.0
x64 msvc v19.21 VS16.1
x64 msvc v19.22 VS16.2
x64 msvc v19.23 VS16.3
x64 msvc v19.24 VS16.4
x64 msvc v19.25 VS16.5
x64 msvc v19.27 VS16.7
x64 msvc v19.28 VS16.8
x64 msvc v19.28 VS16.9
x64 msvc v19.29 VS16.10
x64 msvc v19.29 VS16.11
x64 msvc v19.30 VS17.0
x64 msvc v19.31 VS17.1
x64 msvc v19.32 VS17.2
x64 msvc v19.33 VS17.3
x64 msvc v19.34 VS17.4
x64 msvc v19.35 VS17.5
x64 msvc v19.36 VS17.6
x64 msvc v19.37 VS17.7
x64 msvc v19.38 VS17.8
x64 msvc v19.39 VS17.9
x64 msvc v19.40 VS17.10
x64 msvc v19.41 VS17.11
x64 msvc v19.42 VS17.12
x64 msvc v19.43 VS17.13
x64 msvc v19.latest
x86 CompCert 3.10
x86 CompCert 3.11
x86 CompCert 3.12
x86 CompCert 3.9
x86 gcc 1.27
x86 msvc v19.0 (ex-WINE)
x86 msvc v19.10 (ex-WINE)
x86 msvc v19.14 (ex-WINE)
x86 msvc v19.20 VS16.0
x86 msvc v19.21 VS16.1
x86 msvc v19.22 VS16.2
x86 msvc v19.23 VS16.3
x86 msvc v19.24 VS16.4
x86 msvc v19.25 VS16.5
x86 msvc v19.27 VS16.7
x86 msvc v19.28 VS16.8
x86 msvc v19.28 VS16.9
x86 msvc v19.29 VS16.10
x86 msvc v19.29 VS16.11
x86 msvc v19.30 VS17.0
x86 msvc v19.31 VS17.1
x86 msvc v19.32 VS17.2
x86 msvc v19.33 VS17.3
x86 msvc v19.34 VS17.4
x86 msvc v19.35 VS17.5
x86 msvc v19.36 VS17.6
x86 msvc v19.37 VS17.7
x86 msvc v19.38 VS17.8
x86 msvc v19.39 VS17.9
x86 msvc v19.40 VS17.10
x86 msvc v19.41 VS17.11
x86 msvc v19.42 VS17.12
x86 msvc v19.43 VS17.13
x86 msvc v19.latest
x86 nvc 24.11
x86 nvc 24.9
x86 nvc 25.1
x86 nvc 25.3
x86 nvc 25.5
x86 nvc 25.7
x86 tendra (trunk)
x86-64 clang (assertions trunk)
x86-64 clang (thephd.dev)
x86-64 clang (trunk)
x86-64 clang (widberg)
x86-64 clang 10.0.0
x86-64 clang 10.0.1
x86-64 clang 11.0.0
x86-64 clang 11.0.1
x86-64 clang 12.0.0
x86-64 clang 12.0.1
x86-64 clang 13.0.0
x86-64 clang 13.0.1
x86-64 clang 14.0.0
x86-64 clang 15.0.0
x86-64 clang 16.0.0
x86-64 clang 17.0.1
x86-64 clang 18.1.0
x86-64 clang 19.1.0
x86-64 clang 20.1.0
x86-64 clang 21.1.0
x86-64 clang 3.0.0
x86-64 clang 3.1
x86-64 clang 3.2
x86-64 clang 3.3
x86-64 clang 3.4.1
x86-64 clang 3.5
x86-64 clang 3.5.1
x86-64 clang 3.5.2
x86-64 clang 3.6
x86-64 clang 3.7
x86-64 clang 3.7.1
x86-64 clang 3.8
x86-64 clang 3.8.1
x86-64 clang 3.9.0
x86-64 clang 3.9.1
x86-64 clang 4.0.0
x86-64 clang 4.0.1
x86-64 clang 5.0.0
x86-64 clang 5.0.1
x86-64 clang 5.0.2
x86-64 clang 6.0.0
x86-64 clang 6.0.1
x86-64 clang 7.0.0
x86-64 clang 7.0.1
x86-64 clang 7.1.0
x86-64 clang 8.0.0
x86-64 clang 8.0.1
x86-64 clang 9.0.0
x86-64 clang 9.0.1
x86-64 gcc (trunk)
x86-64 gcc 10.1
x86-64 gcc 10.2
x86-64 gcc 10.3
x86-64 gcc 10.3 (assertions)
x86-64 gcc 10.4
x86-64 gcc 10.4 (assertions)
x86-64 gcc 10.5
x86-64 gcc 10.5 (assertions)
x86-64 gcc 11.1
x86-64 gcc 11.1 (assertions)
x86-64 gcc 11.2
x86-64 gcc 11.2 (assertions)
x86-64 gcc 11.3
x86-64 gcc 11.3 (assertions)
x86-64 gcc 11.4
x86-64 gcc 11.4 (assertions)
x86-64 gcc 12.1
x86-64 gcc 12.1 (assertions)
x86-64 gcc 12.2
x86-64 gcc 12.2 (assertions)
x86-64 gcc 12.3
x86-64 gcc 12.3 (assertions)
x86-64 gcc 12.4
x86-64 gcc 12.4 (assertions)
x86-64 gcc 12.5
x86-64 gcc 12.5 (assertions)
x86-64 gcc 13.1
x86-64 gcc 13.1 (assertions)
x86-64 gcc 13.2
x86-64 gcc 13.2 (assertions)
x86-64 gcc 13.3
x86-64 gcc 13.3 (assertions)
x86-64 gcc 13.4
x86-64 gcc 13.4 (assertions)
x86-64 gcc 14.1
x86-64 gcc 14.1 (assertions)
x86-64 gcc 14.2
x86-64 gcc 14.2 (assertions)
x86-64 gcc 14.3
x86-64 gcc 14.3 (assertions)
x86-64 gcc 15.1
x86-64 gcc 15.1 (assertions)
x86-64 gcc 15.2
x86-64 gcc 15.2 (assertions)
x86-64 gcc 3.4.6
x86-64 gcc 4.0.4
x86-64 gcc 4.1.2
x86-64 gcc 4.4.7
x86-64 gcc 4.5.3
x86-64 gcc 4.6.4
x86-64 gcc 4.7.1
x86-64 gcc 4.7.2
x86-64 gcc 4.7.3
x86-64 gcc 4.7.4
x86-64 gcc 4.8.1
x86-64 gcc 4.8.2
x86-64 gcc 4.8.3
x86-64 gcc 4.8.4
x86-64 gcc 4.8.5
x86-64 gcc 4.9.0
x86-64 gcc 4.9.1
x86-64 gcc 4.9.2
x86-64 gcc 4.9.3
x86-64 gcc 4.9.4
x86-64 gcc 5.1
x86-64 gcc 5.2
x86-64 gcc 5.3
x86-64 gcc 5.4
x86-64 gcc 6.1
x86-64 gcc 6.2
x86-64 gcc 6.3
x86-64 gcc 6.5
x86-64 gcc 7.1
x86-64 gcc 7.2
x86-64 gcc 7.3
x86-64 gcc 7.4
x86-64 gcc 7.5
x86-64 gcc 8.1
x86-64 gcc 8.2
x86-64 gcc 8.3
x86-64 gcc 8.4
x86-64 gcc 8.5
x86-64 gcc 9.1
x86-64 gcc 9.2
x86-64 gcc 9.3
x86-64 gcc 9.4
x86-64 gcc 9.5
x86-64 icc 13.0.1
x86-64 icc 16.0.3
x86-64 icc 17.0.0
x86-64 icc 18.0.0
x86-64 icc 19.0.0
x86-64 icc 19.0.1
x86-64 icc 2021.1.2
x86-64 icc 2021.10.0
x86-64 icc 2021.2.0
x86-64 icc 2021.3.0
x86-64 icc 2021.4.0
x86-64 icc 2021.5.0
x86-64 icc 2021.6.0
x86-64 icc 2021.7.0
x86-64 icc 2021.7.1
x86-64 icc 2021.8.0
x86-64 icc 2021.9.0
x86-64 icx (latest)
x86-64 icx 2021.1.2
x86-64 icx 2021.2.0
x86-64 icx 2021.3.0
x86-64 icx 2021.4.0
x86-64 icx 2022.0.0
x86-64 icx 2022.1.0
x86-64 icx 2022.2.0
x86-64 icx 2022.2.1
x86-64 icx 2023.0.0
x86-64 icx 2023.1.0
x86-64 icx 2024.0.0
x86_64 CompCert 3.10
x86_64 CompCert 3.11
x86_64 CompCert 3.12
x86_64 CompCert 3.9
z180-clang 15.0.0
z180-clang 15.0.7
z80-clang 15.0.0
z80-clang 15.0.7
z88dk 2.2
zig cc 0.10.0
zig cc 0.11.0
zig cc 0.12.0
zig cc 0.12.1
zig cc 0.13.0
zig cc 0.14.0
zig cc 0.14.1
zig cc 0.15.1
zig cc 0.6.0
zig cc 0.7.0
zig cc 0.7.1
zig cc 0.8.0
zig cc 0.9.0
zig cc trunk
Options
Source code
#include <arm_neon.h> #include <stdio.h> // This code is from https://github.com/ESultanik/mtwister //////////////////////////////////////////////////////////////////// /* An implementation of the MT19937 Algorithm for the Mersenne Twister * by Evan Sultanik. Based upon the pseudocode in: M. Matsumoto and * T. Nishimura, "Mersenne Twister: A 623-dimensionally * equidistributed uniform pseudorandom number generator," ACM * Transactions on Modeling and Computer Simulation Vol. 8, No. 1, * January pp.3-30 1998. * * http://www.sultanik.com/Mersenne_twister */ #define STATE_VECTOR_LENGTH 624 #define STATE_VECTOR_M 397 /* changes to STATE_VECTOR_LENGTH also require changes to this */ typedef struct tagMTRand { unsigned long mt[STATE_VECTOR_LENGTH]; int index; } MTRand; #define UPPER_MASK 0x80000000 #define LOWER_MASK 0x7fffffff #define TEMPERING_MASK_B 0x9d2c5680 #define TEMPERING_MASK_C 0xefc60000 inline static void m_seedRand(MTRand* rand, unsigned long seed) { /* set initial seeds to mt[STATE_VECTOR_LENGTH] using the generator * from Line 25 of Table 1 in: Donald Knuth, "The Art of Computer * Programming," Vol. 2 (2nd Ed.) pp.102. */ rand->mt[0] = seed & 0xffffffff; for(rand->index=1; rand->index<STATE_VECTOR_LENGTH; rand->index++) { rand->mt[rand->index] = (6069 * rand->mt[rand->index-1]) & 0xffffffff; } } /** * Creates a new random number generator from a given seed. */ MTRand seedRand(unsigned long seed) { MTRand rand; m_seedRand(&rand, seed); return rand; } /** * Generates a pseudo-randomly generated long. */ unsigned long genRandLong(MTRand* rand) { unsigned long y; static unsigned long mag[2] = {0x0, 0x9908b0df}; /* mag[x] = x * 0x9908b0df for x = 0,1 */ if(rand->index >= STATE_VECTOR_LENGTH || rand->index < 0) { /* generate STATE_VECTOR_LENGTH words at a time */ int kk; if(rand->index >= STATE_VECTOR_LENGTH+1 || rand->index < 0) { m_seedRand(rand, 4357); } for(kk=0; kk<STATE_VECTOR_LENGTH-STATE_VECTOR_M; kk++) { y = (rand->mt[kk] & UPPER_MASK) | (rand->mt[kk+1] & LOWER_MASK); rand->mt[kk] = rand->mt[kk+STATE_VECTOR_M] ^ (y >> 1) ^ mag[y & 0x1]; } for(; kk<STATE_VECTOR_LENGTH-1; kk++) { y = (rand->mt[kk] & UPPER_MASK) | (rand->mt[kk+1] & LOWER_MASK); rand->mt[kk] = rand->mt[kk+(STATE_VECTOR_M-STATE_VECTOR_LENGTH)] ^ (y >> 1) ^ mag[y & 0x1]; } y = (rand->mt[STATE_VECTOR_LENGTH-1] & UPPER_MASK) | (rand->mt[0] & LOWER_MASK); rand->mt[STATE_VECTOR_LENGTH-1] = rand->mt[STATE_VECTOR_M-1] ^ (y >> 1) ^ mag[y & 0x1]; rand->index = 0; } y = rand->mt[rand->index++]; y ^= (y >> 11); y ^= (y << 7) & TEMPERING_MASK_B; y ^= (y << 15) & TEMPERING_MASK_C; y ^= (y >> 18); return y; } /** * Generates a pseudo-randomly generated double in the range [0..1]. */ double genRand(MTRand* rand) { return((double)genRandLong(rand) / (unsigned long)0xffffffff); } //////////////////////////////////////////////////////////////////// /* * ============================================================================= * Copyright (c) 2022 by Cryptographic Engineering Research Group (CERG) * ECE Department, George Mason University * Fairfax, VA, U.S.A. * Author: Duc Tri Nguyen * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ============================================================================= * @author Duc Tri Nguyen <dnguye69@gmu.edu> */ typedef double fpr; static const fpr fpr_ptwo63 = 9223372036854775808.0; static const double C_expm[] = { 1.000000000000000000000000000000, // c0 -0.999999999999994892974086724280, // c1 0.500000000000019206858326015208, // c2 -0.166666666666984014666397229121, // c3 0.041666666666110491190622155955, // c4 -0.008333333327800835146903501993, // c5 0.001388888894063186997887560103, // c6 -0.000198412739277311890541063977, // c7 0.000024801566833585381209939524, // c8 -0.000002755586350219122514855659, // c9 0.000000275607356160477811864927, // c10 -0.000000025299506379442070029551, // c11 0.000000002073772366009083061987, // c12 0.000000000000000000000000000000, }; /* * Output assembly: via radare2 ┌ 108: sym._fpr_expm_p63_fmla (); │ 0x100003dfc a8070010 adr x8, sym._C_expm ; 0x100003ef0 │ 0x100003e00 1f2003d5 nop │ 0x100003e04 022ddf4c ld1 {v2.2d, v3.2d, v4.2d, v5.2d}, [x8], 0x40 │ 0x100003e08 106d404c ld1 {v16.2d, v17.2d, v18.2d}, [x8] │ 0x100003e0c 087ce8d2 movz x8, 0x43e0, lsl 48 │ 0x100003e10 0601679e fmov d6, x8 │ 0x100003e14 2108661e fmul d1, d1, d6 │ 0x100003e18 06f6036f fmov v6.2d, 1.00000000 │ 0x100003e1c 0604186e ins v6.d[1], v0.d[0] │ 0x100003e20 0008601e fmul d0, d0, d0 │ 0x100003e24 0708601e fmul d7, d0, d0 │ 0x100003e28 f304084e dup v19.2d, v7.d[0] │ 0x100003e2c f408671e fmul d20, d7, d7 │ 0x100003e30 7392d44f fmul v19.2d, v19.2d, v20.d[0] │ 0x100003e34 6210c04f fmla v2.2d, v3.2d, v0.d[0] │ 0x100003e38 a410c04f fmla v4.2d, v5.2d, v0.d[0] │ 0x100003e3c 3012c04f fmla v16.2d, v17.2d, v0.d[0] │ 0x100003e40 c0dc626e fmul v0.2d, v6.2d, v2.2d │ 0x100003e44 c2dc646e fmul v2.2d, v6.2d, v4.2d │ 0x100003e48 c3dc706e fmul v3.2d, v6.2d, v16.2d │ 0x100003e4c 4010c74f fmla v0.2d, v2.2d, v7.d[0] │ 0x100003e50 6010d44f fmla v0.2d, v3.2d, v20.d[0] │ 0x100003e54 60ce724e fmla v0.2d, v19.2d, v18.2d │ 0x100003e58 0090c14f fmul v0.2d, v0.2d, v1.d[0] │ 0x100003e5c 00d8707e faddp d0, v0.2d │ 0x100003e60 0000799e fcvtzu x0, d0 └ 0x100003e64 c0035fd6 ret */ static uint64_t __attribute__((noinline)) fpr_expm_p63_fmla(const fpr x, const fpr ccs) { float64x2_t neon_x, neon_1x, neon_x2, neon_x4, neon_x8, neon_x12, neon_ccs; float64x2x4_t neon_exp0; float64x2x3_t neon_exp1; float64x2_t y1, y2, y3, y; double ret; neon_exp0 = vld1q_f64_x4(&C_expm[0]); neon_exp1 = vld1q_f64_x3(&C_expm[8]); neon_ccs = vdupq_n_f64(ccs * fpr_ptwo63); // x | x neon_x = vdupq_n_f64(x); // 1 | x neon_1x = vsetq_lane_f64(1.0, neon_x, 0); neon_x2 = vmulq_f64(neon_x, neon_x); neon_x4 = vmulq_f64(neon_x2, neon_x2); neon_x8 = vmulq_f64(neon_x4, neon_x4); neon_x12 = vmulq_f64(neon_x8, neon_x4); y1 = vfmaq_f64(neon_exp0.val[0], neon_exp0.val[1], neon_x2); y2 = vfmaq_f64(neon_exp0.val[2], neon_exp0.val[3], neon_x2); y3 = vfmaq_f64(neon_exp1.val[0], neon_exp1.val[1], neon_x2); y1 = vmulq_f64(y1, neon_1x); y2 = vmulq_f64(y2, neon_1x); y3 = vmulq_f64(y3, neon_1x); y = vfmaq_f64(y1, y2, neon_x4); y = vfmaq_f64(y, y3, neon_x8); y = vfmaq_f64(y, neon_exp1.val[2], neon_x12); y = vmulq_f64(y, neon_ccs); ret = vaddvq_f64(y); return (uint64_t) ret; } /* * Output assembly: via radare2 ┌ 132: sym._fpr_expm_p63 (); │ 0x100003d78 c80b0010 adr x8, sym._C_expm ; 0x100003ef0 │ 0x100003d7c 1f2003d5 nop │ 0x100003d80 022ddf4c ld1 {v2.2d, v3.2d, v4.2d, v5.2d}, [x8], 0x40 │ 0x100003d84 106d404c ld1 {v16.2d, v17.2d, v18.2d}, [x8] │ 0x100003d88 087ce8d2 movz x8, 0x43e0, lsl 48 │ 0x100003d8c 0601679e fmov d6, x8 │ 0x100003d90 2108661e fmul d1, d1, d6 │ 0x100003d94 06f6036f fmov v6.2d, 1.00000000 │ 0x100003d98 0604186e ins v6.d[1], v0.d[0] │ 0x100003d9c 0008601e fmul d0, d0, d0 │ 0x100003da0 0708601e fmul d7, d0, d0 │ 0x100003da4 f304084e dup v19.2d, v7.d[0] │ 0x100003da8 f408671e fmul d20, d7, d7 │ 0x100003dac 7392d44f fmul v19.2d, v19.2d, v20.d[0] │ 0x100003db0 7590c04f fmul v21.2d, v3.2d, v0.d[0] │ 0x100003db4 b690c04f fmul v22.2d, v5.2d, v0.d[0] │ 0x100003db8 2092c04f fmul v0.2d, v17.2d, v0.d[0] │ 0x100003dbc 55d4754e fadd v21.2d, v2.2d, v21.2d │ 0x100003dc0 82d4764e fadd v2.2d, v4.2d, v22.2d │ 0x100003dc4 00d6604e fadd v0.2d, v16.2d, v0.2d │ 0x100003dc8 c3dc756e fmul v3.2d, v6.2d, v21.2d │ 0x100003dcc c2dc626e fmul v2.2d, v6.2d, v2.2d │ 0x100003dd0 c0dc606e fmul v0.2d, v6.2d, v0.2d │ 0x100003dd4 4290c74f fmul v2.2d, v2.2d, v7.d[0] │ 0x100003dd8 0090d44f fmul v0.2d, v0.2d, v20.d[0] │ 0x100003ddc 64de726e fmul v4.2d, v19.2d, v18.2d │ 0x100003de0 62d4624e fadd v2.2d, v3.2d, v2.2d │ 0x100003de4 40d4604e fadd v0.2d, v2.2d, v0.2d │ 0x100003de8 80d4604e fadd v0.2d, v4.2d, v0.2d │ 0x100003dec 0090c14f fmul v0.2d, v0.2d, v1.d[0] │ 0x100003df0 00d8707e faddp d0, v0.2d │ 0x100003df4 0000799e fcvtzu x0, d0 └ 0x100003df8 c0035fd6 ret */ static uint64_t __attribute__((noinline)) fpr_expm_p63(const fpr x, const fpr ccs) { float64x2_t neon_x, neon_1x, neon_x2, neon_x4, neon_x8, neon_x12, neon_ccs; float64x2x4_t neon_exp0; float64x2x3_t neon_exp1; float64x2_t y1, y2, y3, y; double ret; neon_exp0 = vld1q_f64_x4(&C_expm[0]); neon_exp1 = vld1q_f64_x3(&C_expm[8]); neon_ccs = vdupq_n_f64(ccs * fpr_ptwo63); // x | x neon_x = vdupq_n_f64(x); // 1 | x neon_1x = vsetq_lane_f64(1.0, neon_x, 0); neon_x2 = vmulq_f64(neon_x, neon_x); neon_x4 = vmulq_f64(neon_x2, neon_x2); neon_x8 = vmulq_f64(neon_x4, neon_x4); neon_x12 = vmulq_f64(neon_x8, neon_x4); float64x2_t a1, a2, a3; // y1 = vfmaq_f64(neon_exp0.val[0], neon_exp0.val[1], neon_x2); // y2 = vfmaq_f64(neon_exp0.val[2], neon_exp0.val[3], neon_x2); // y3 = vfmaq_f64(neon_exp1.val[0], neon_exp1.val[1], neon_x2); // Convert to below code a1 = vmulq_f64(neon_exp0.val[1], neon_x2); y1 = vaddq_f64(neon_exp0.val[0], a1); a2 = vmulq_f64(neon_exp0.val[3], neon_x2); y2 = vaddq_f64(neon_exp0.val[2], a2); a3 = vmulq_f64(neon_exp1.val[1], neon_x2); y3 = vaddq_f64(neon_exp1.val[0], a3); // End conversion y1 = vmulq_f64(y1, neon_1x); y2 = vmulq_f64(y2, neon_1x); y3 = vmulq_f64(y3, neon_1x); // y = vfmaq_f64(y1, y2, neon_x4); // y = vfmaq_f64(y, y3, neon_x8); // y = vfmaq_f64(y, neon_exp1.val[2], neon_x12); // Convert to below code a1 = vmulq_f64(y2, neon_x4); y = vaddq_f64(y1, a1); a2 = vmulq_f64(y3, neon_x8); y = vaddq_f64(y, a2); a3 = vmulq_f64(neon_exp1.val[2], neon_x12); y = vaddq_f64(y, a3); // End conversion y = vmulq_f64(y, neon_ccs); ret = vaddvq_f64(y); return (uint64_t) ret; } #define TESTS 100000ULL #define DEBUG 1 int main() { fpr ccs, tmp; uint64_t a, b, diff; uint64_t count = 0; // Change this number to the seed you want MTRand r = seedRand(0x123456); for (uint64_t i = 0; i < TESTS; i++) { ccs = genRand(&r); tmp = genRand(&r); a = fpr_expm_p63(tmp, ccs); b = fpr_expm_p63_fmla(tmp, ccs); diff = a - b; // abs(diff) if (diff & 0x1000000000000000) diff = -diff; if (a != b) { if (DEBUG) { printf("ccs: %.10f (%8lx)\n", ccs, ccs); printf("x: %.10f (%8lx)\n", tmp, tmp); printf("%3lu: a, b: %8lx | %8lx\n", i, a, b); printf("diff: %lu\n", diff); printf("=====\n"); } count++; } if (!DEBUG) printf("%3lu: %08lx\n", i, a); } printf("Total incorrect: %lu/%llu\n", count, TESTS); return 0; } /* Compile flags: -O3 Input that show rounding differences between fmla and (fmul, fadd) - Apple M1, Cortex-A72 over the same input ccs: 0.2580461167 tmp: 0.9669854429 2: a, b: c8f12a7b6931c00 | c8f12a7b6931e00 diff: 512 ===== ccs: 0.6310408659 tmp: 0.5769365662 10: a, b: 2d5d11fb1ecdb000 | 2d5d11fb1ecdac00 diff: 1024 ===== ccs: 0.6147814027 tmp: 0.9990875227 44: a, b: 1cf9c129ebbb3c00 | 1cf9c129ebbb4400 diff: 2048 ===== ccs: 0.1861301493 tmp: 0.8928304473 49: a, b: 9c18e77bf126e00 | 9c18e77bf126c00 diff: 512 ===== ccs: 0.1376206992 tmp: 0.7580903107 63: a, b: 840ffb32db7d300 | 840ffb32db7d200 diff: 256 ===== ccs: 0.3532692032 tmp: 0.6281543962 75: a, b: 1820a1c965ccc100 | 1820a1c965ccc500 diff: 1024 ===== ccs: 0.9403613421 tmp: 0.8524000428 79: a, b: 3352b0a680b3d800 | 3352b0a680b3e000 diff: 2048 ===== Total incorrect: 7/100 */
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree