Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
Go
Haskell
HLSL
Hook
Hylo
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Nim
Objective-C
Objective-C++
OCaml
OpenCL C
Pascal
Pony
Python
Racket
Ruby
Rust
Snowball
Scala
Solidity
Spice
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Zig
Javascript
GIMPLE
c++ source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
6502-c++ 11.1.0
ARM GCC 10.2.0
ARM GCC 10.3.0
ARM GCC 10.4.0
ARM GCC 10.5.0
ARM GCC 11.1.0
ARM GCC 11.2.0
ARM GCC 11.3.0
ARM GCC 11.4.0
ARM GCC 12.1.0
ARM GCC 12.2.0
ARM GCC 12.3.0
ARM GCC 13.1.0
ARM GCC 13.2.0
ARM GCC 13.2.0 (unknown-eabi)
ARM GCC 4.5.4
ARM GCC 4.6.4
ARM GCC 5.4
ARM GCC 6.3.0
ARM GCC 6.4.0
ARM GCC 7.3.0
ARM GCC 7.5.0
ARM GCC 8.2.0
ARM GCC 8.5.0
ARM GCC 9.3.0
ARM GCC 9.4.0
ARM GCC 9.5.0
ARM GCC trunk
ARM gcc 10.2.1 (none)
ARM gcc 10.3.1 (2021.07 none)
ARM gcc 10.3.1 (2021.10 none)
ARM gcc 11.2.1 (none)
ARM gcc 5.4.1 (none)
ARM gcc 7.2.1 (none)
ARM gcc 8.2 (WinCE)
ARM gcc 8.3.1 (none)
ARM gcc 9.2.1 (none)
ARM msvc v19.0 (WINE)
ARM msvc v19.10 (WINE)
ARM msvc v19.14 (WINE)
ARM64 Morello gcc 10.1 Alpha 2
ARM64 gcc 10.2
ARM64 gcc 10.3
ARM64 gcc 10.4
ARM64 gcc 10.5.0
ARM64 gcc 11.1
ARM64 gcc 11.2
ARM64 gcc 11.3
ARM64 gcc 11.4.0
ARM64 gcc 12.1
ARM64 gcc 12.2.0
ARM64 gcc 12.3.0
ARM64 gcc 13.1.0
ARM64 gcc 13.2.0
ARM64 gcc 5.4
ARM64 gcc 6.3
ARM64 gcc 6.4
ARM64 gcc 7.3
ARM64 gcc 7.5
ARM64 gcc 8.2
ARM64 gcc 8.5
ARM64 gcc 9.3
ARM64 gcc 9.4
ARM64 gcc 9.5
ARM64 gcc trunk
ARM64 msvc v19.14 (WINE)
AVR gcc 10.3.0
AVR gcc 11.1.0
AVR gcc 12.1.0
AVR gcc 12.2.0
AVR gcc 12.3.0
AVR gcc 13.1.0
AVR gcc 13.2.0
AVR gcc 4.5.4
AVR gcc 4.6.4
AVR gcc 5.4.0
AVR gcc 9.2.0
AVR gcc 9.3.0
Arduino Mega (1.8.9)
Arduino Uno (1.8.9)
BPF clang (trunk)
BPF clang 13.0.0
BPF clang 14.0.0
BPF clang 15.0.0
BPF clang 16.0.0
BPF clang 17.0.1
BPF clang 18.1.0
BPF gcc 13.1.0
BPF gcc 13.2.0
BPF gcc trunk
EDG (experimental reflection)
EDG 6.5
EDG 6.5 (GNU mode gcc 13)
EDG 6.6
EDG 6.6 (GNU mode gcc 13)
FRC 2019
FRC 2020
FRC 2023
KVX ACB 4.1.0 (GCC 7.5.0)
KVX ACB 4.1.0-cd1 (GCC 7.5.0)
KVX ACB 4.10.0 (GCC 10.3.1)
KVX ACB 4.11.1 (GCC 10.3.1)
KVX ACB 4.12.0 (GCC 11.3.0)
KVX ACB 4.2.0 (GCC 7.5.0)
KVX ACB 4.3.0 (GCC 7.5.0)
KVX ACB 4.4.0 (GCC 7.5.0)
KVX ACB 4.6.0 (GCC 9.4.1)
KVX ACB 4.8.0 (GCC 9.4.1)
KVX ACB 4.9.0 (GCC 9.4.1)
M68K gcc 13.1.0
M68K gcc 13.2.0
M68k clang (trunk)
MRISC32 gcc (trunk)
MSP430 gcc 4.5.3
MSP430 gcc 5.3.0
MSP430 gcc 6.2.1
MinGW clang 14.0.3
MinGW clang 14.0.6
MinGW clang 15.0.7
MinGW clang 16.0.0
MinGW clang 16.0.2
MinGW gcc 11.3.0
MinGW gcc 12.1.0
MinGW gcc 12.2.0
MinGW gcc 13.1.0
RISC-V (32-bits) gcc (trunk)
RISC-V (32-bits) gcc 10.2.0
RISC-V (32-bits) gcc 10.3.0
RISC-V (32-bits) gcc 11.2.0
RISC-V (32-bits) gcc 11.3.0
RISC-V (32-bits) gcc 11.4.0
RISC-V (32-bits) gcc 12.1.0
RISC-V (32-bits) gcc 12.2.0
RISC-V (32-bits) gcc 12.3.0
RISC-V (32-bits) gcc 13.1.0
RISC-V (32-bits) gcc 13.2.0
RISC-V (32-bits) gcc 8.2.0
RISC-V (32-bits) gcc 8.5.0
RISC-V (32-bits) gcc 9.4.0
RISC-V (64-bits) gcc (trunk)
RISC-V (64-bits) gcc 10.2.0
RISC-V (64-bits) gcc 10.3.0
RISC-V (64-bits) gcc 11.2.0
RISC-V (64-bits) gcc 11.3.0
RISC-V (64-bits) gcc 11.4.0
RISC-V (64-bits) gcc 12.1.0
RISC-V (64-bits) gcc 12.2.0
RISC-V (64-bits) gcc 12.3.0
RISC-V (64-bits) gcc 13.1.0
RISC-V (64-bits) gcc 13.2.0
RISC-V (64-bits) gcc 8.2.0
RISC-V (64-bits) gcc 8.5.0
RISC-V (64-bits) gcc 9.4.0
RISC-V rv32gc clang (trunk)
RISC-V rv32gc clang 10.0.0
RISC-V rv32gc clang 10.0.1
RISC-V rv32gc clang 11.0.0
RISC-V rv32gc clang 11.0.1
RISC-V rv32gc clang 12.0.0
RISC-V rv32gc clang 12.0.1
RISC-V rv32gc clang 13.0.0
RISC-V rv32gc clang 13.0.1
RISC-V rv32gc clang 14.0.0
RISC-V rv32gc clang 15.0.0
RISC-V rv32gc clang 16.0.0
RISC-V rv32gc clang 17.0.1
RISC-V rv32gc clang 18.1.0
RISC-V rv32gc clang 9.0.0
RISC-V rv32gc clang 9.0.1
RISC-V rv64gc clang (trunk)
RISC-V rv64gc clang 10.0.0
RISC-V rv64gc clang 10.0.1
RISC-V rv64gc clang 11.0.0
RISC-V rv64gc clang 11.0.1
RISC-V rv64gc clang 12.0.0
RISC-V rv64gc clang 12.0.1
RISC-V rv64gc clang 13.0.0
RISC-V rv64gc clang 13.0.1
RISC-V rv64gc clang 14.0.0
RISC-V rv64gc clang 15.0.0
RISC-V rv64gc clang 16.0.0
RISC-V rv64gc clang 17.0.1
RISC-V rv64gc clang 18.1.0
RISC-V rv64gc clang 9.0.0
RISC-V rv64gc clang 9.0.1
Raspbian Buster
Raspbian Stretch
SPARC LEON gcc 12.2.0
SPARC LEON gcc 12.3.0
SPARC LEON gcc 13.1.0
SPARC LEON gcc 13.2.0
SPARC gcc 12.2.0
SPARC gcc 12.3.0
SPARC gcc 13.1.0
SPARC gcc 13.2.0
SPARC64 gcc 12.2.0
SPARC64 gcc 12.3.0
SPARC64 gcc 13.1.0
SPARC64 gcc 13.2.0
TI C6x gcc 12.2.0
TI C6x gcc 12.3.0
TI C6x gcc 13.1.0
TI C6x gcc 13.2.0
TI CL430 21.6.1
VAX gcc NetBSDELF 10.4.0
VAX gcc NetBSDELF 10.5.0 (Nov 15 03:50:22 2023)
WebAssembly clang (trunk)
Xtensa ESP32 gcc 11.2.0 (2022r1)
Xtensa ESP32 gcc 12.2.0 (20230208)
Xtensa ESP32 gcc 8.2.0 (2019r2)
Xtensa ESP32 gcc 8.2.0 (2020r1)
Xtensa ESP32 gcc 8.2.0 (2020r2)
Xtensa ESP32 gcc 8.4.0 (2020r3)
Xtensa ESP32 gcc 8.4.0 (2021r1)
Xtensa ESP32 gcc 8.4.0 (2021r2)
Xtensa ESP32-S2 gcc 11.2.0 (2022r1)
Xtensa ESP32-S2 gcc 12.2.0 (20230208)
Xtensa ESP32-S2 gcc 8.2.0 (2019r2)
Xtensa ESP32-S2 gcc 8.2.0 (2020r1)
Xtensa ESP32-S2 gcc 8.2.0 (2020r2)
Xtensa ESP32-S2 gcc 8.4.0 (2020r3)
Xtensa ESP32-S2 gcc 8.4.0 (2021r1)
Xtensa ESP32-S2 gcc 8.4.0 (2021r2)
Xtensa ESP32-S3 gcc 11.2.0 (2022r1)
Xtensa ESP32-S3 gcc 12.2.0 (20230208)
Xtensa ESP32-S3 gcc 8.4.0 (2020r3)
Xtensa ESP32-S3 gcc 8.4.0 (2021r1)
Xtensa ESP32-S3 gcc 8.4.0 (2021r2)
arm64 msvc v19.28 VS16.9
arm64 msvc v19.29 VS16.10
arm64 msvc v19.29 VS16.11
arm64 msvc v19.30
arm64 msvc v19.31
arm64 msvc v19.32
arm64 msvc v19.33
arm64 msvc v19.34
arm64 msvc v19.35
arm64 msvc v19.36
arm64 msvc v19.37
arm64 msvc v19.38
arm64 msvc v19.latest
armv7-a clang (trunk)
armv7-a clang 10.0.0
armv7-a clang 10.0.1
armv7-a clang 11.0.0
armv7-a clang 11.0.1
armv7-a clang 9.0.0
armv7-a clang 9.0.1
armv8-a clang (all architectural features, trunk)
armv8-a clang (trunk)
armv8-a clang 10.0.0
armv8-a clang 10.0.1
armv8-a clang 11.0.0
armv8-a clang 11.0.1
armv8-a clang 12.0.0
armv8-a clang 13.0.0
armv8-a clang 14.0.0
armv8-a clang 15.0.0
armv8-a clang 16.0.0
armv8-a clang 17.0.1
armv8-a clang 18.1.0
armv8-a clang 9.0.0
armv8-a clang 9.0.1
ellcc 0.1.33
ellcc 0.1.34
ellcc 2017-07-16
hexagon-clang 16.0.5
llvm-mos atari2600-3e
llvm-mos atari2600-4k
llvm-mos atari2600-common
llvm-mos atari5200-supercart
llvm-mos atari8-cart-megacart
llvm-mos atari8-cart-std
llvm-mos atari8-cart-xegs
llvm-mos atari8-common
llvm-mos atari8-dos
llvm-mos c128
llvm-mos c64
llvm-mos commodore
llvm-mos cpm65
llvm-mos cx16
llvm-mos dodo
llvm-mos eater
llvm-mos mega65
llvm-mos nes
llvm-mos nes-action53
llvm-mos nes-cnrom
llvm-mos nes-gtrom
llvm-mos nes-mmc1
llvm-mos nes-mmc3
llvm-mos nes-nrom
llvm-mos nes-unrom
llvm-mos nes-unrom-512
llvm-mos osi-c1p
llvm-mos pce
llvm-mos pce-cd
llvm-mos pce-common
llvm-mos pet
llvm-mos rp6502
llvm-mos rpc8e
llvm-mos supervision
llvm-mos vic20
loongarch64 gcc 12.2.0
loongarch64 gcc 12.3.0
loongarch64 gcc 13.1.0
loongarch64 gcc 13.2.0
mips clang 13.0.0
mips clang 14.0.0
mips clang 15.0.0
mips clang 16.0.0
mips clang 17.0.1
mips clang 18.1.0
mips gcc 11.2.0
mips gcc 12.1.0
mips gcc 12.2.0
mips gcc 12.3.0
mips gcc 13.1.0
mips gcc 13.2.0
mips gcc 4.9.4
mips gcc 5.4
mips gcc 5.5.0
mips gcc 9.3.0 (codescape)
mips gcc 9.5.0
mips64 (el) gcc 12.1.0
mips64 (el) gcc 12.2.0
mips64 (el) gcc 12.3.0
mips64 (el) gcc 13.1.0
mips64 (el) gcc 13.2.0
mips64 (el) gcc 4.9.4
mips64 (el) gcc 5.4.0
mips64 (el) gcc 5.5.0
mips64 (el) gcc 9.5.0
mips64 clang 13.0.0
mips64 clang 14.0.0
mips64 clang 15.0.0
mips64 clang 16.0.0
mips64 clang 17.0.1
mips64 clang 18.1.0
mips64 gcc 11.2.0
mips64 gcc 12.1.0
mips64 gcc 12.2.0
mips64 gcc 12.3.0
mips64 gcc 13.1.0
mips64 gcc 13.2.0
mips64 gcc 4.9.4
mips64 gcc 5.4.0
mips64 gcc 5.5.0
mips64 gcc 9.5.0
mips64el clang 13.0.0
mips64el clang 14.0.0
mips64el clang 15.0.0
mips64el clang 16.0.0
mips64el clang 17.0.1
mips64el clang 18.1.0
mipsel clang 13.0.0
mipsel clang 14.0.0
mipsel clang 15.0.0
mipsel clang 16.0.0
mipsel clang 17.0.1
mipsel clang 18.1.0
mipsel gcc 12.1.0
mipsel gcc 12.2.0
mipsel gcc 12.3.0
mipsel gcc 13.1.0
mipsel gcc 13.2.0
mipsel gcc 4.9.4
mipsel gcc 5.4.0
mipsel gcc 5.5.0
mipsel gcc 9.5.0
nanoMIPS gcc 6.3.0 (mtk)
power gcc 11.2.0
power gcc 12.1.0
power gcc 12.2.0
power gcc 12.3.0
power gcc 13.1.0
power gcc 13.2.0
power gcc 4.8.5
power64 AT12.0 (gcc8)
power64 AT13.0 (gcc9)
power64 gcc 11.2.0
power64 gcc 12.1.0
power64 gcc 12.2.0
power64 gcc 12.3.0
power64 gcc 13.1.0
power64 gcc 13.2.0
power64 gcc trunk
power64le AT12.0 (gcc8)
power64le AT13.0 (gcc9)
power64le clang (trunk)
power64le gcc 11.2.0
power64le gcc 12.1.0
power64le gcc 12.2.0
power64le gcc 12.3.0
power64le gcc 13.1.0
power64le gcc 13.2.0
power64le gcc 6.3.0
power64le gcc trunk
powerpc64 clang (trunk)
s390x gcc 11.2.0
s390x gcc 12.1.0
s390x gcc 12.2.0
s390x gcc 12.3.0
s390x gcc 13.1.0
s390x gcc 13.2.0
sh gcc 12.2.0
sh gcc 12.3.0
sh gcc 13.1.0
sh gcc 13.2.0
sh gcc 4.9.4
sh gcc 9.5.0
vast (trunk)
x64 msvc v19.0 (WINE)
x64 msvc v19.10 (WINE)
x64 msvc v19.14
x64 msvc v19.14 (WINE)
x64 msvc v19.15
x64 msvc v19.16
x64 msvc v19.20
x64 msvc v19.21
x64 msvc v19.22
x64 msvc v19.23
x64 msvc v19.24
x64 msvc v19.25
x64 msvc v19.26
x64 msvc v19.27
x64 msvc v19.28
x64 msvc v19.28 VS16.9
x64 msvc v19.29 VS16.10
x64 msvc v19.29 VS16.11
x64 msvc v19.30
x64 msvc v19.31
x64 msvc v19.32
x64 msvc v19.33
x64 msvc v19.34
x64 msvc v19.35
x64 msvc v19.36
x64 msvc v19.37
x64 msvc v19.38
x64 msvc v19.latest
x86 djgpp 4.9.4
x86 djgpp 5.5.0
x86 djgpp 6.4.0
x86 djgpp 7.2.0
x86 msvc v19.0 (WINE)
x86 msvc v19.10 (WINE)
x86 msvc v19.14
x86 msvc v19.14 (WINE)
x86 msvc v19.15
x86 msvc v19.16
x86 msvc v19.20
x86 msvc v19.21
x86 msvc v19.22
x86 msvc v19.23
x86 msvc v19.24
x86 msvc v19.25
x86 msvc v19.26
x86 msvc v19.27
x86 msvc v19.28
x86 msvc v19.28 VS16.9
x86 msvc v19.29 VS16.10
x86 msvc v19.29 VS16.11
x86 msvc v19.30
x86 msvc v19.31
x86 msvc v19.32
x86 msvc v19.33
x86 msvc v19.34
x86 msvc v19.35
x86 msvc v19.36
x86 msvc v19.37
x86 msvc v19.38
x86 msvc v19.latest
x86 nvc++ 22.11
x86 nvc++ 22.7
x86 nvc++ 22.9
x86 nvc++ 23.1
x86 nvc++ 23.11
x86 nvc++ 23.3
x86 nvc++ 23.5
x86 nvc++ 23.7
x86 nvc++ 23.9
x86 nvc++ 24.1
x86 nvc++ 24.3
x86-64 Zapcc 190308
x86-64 clang (amd-stg-open)
x86-64 clang (assertions trunk)
x86-64 clang (clangir)
x86-64 clang (experimental -Wlifetime)
x86-64 clang (experimental P1061)
x86-64 clang (experimental P1144)
x86-64 clang (experimental P1221)
x86-64 clang (experimental P2996)
x86-64 clang (experimental metaprogramming - P2632)
x86-64 clang (experimental pattern matching)
x86-64 clang (old concepts branch)
x86-64 clang (reflection)
x86-64 clang (resugar)
x86-64 clang (thephd.dev)
x86-64 clang (trunk)
x86-64 clang (variadic friends - P2893)
x86-64 clang (widberg)
x86-64 clang 10.0.0
x86-64 clang 10.0.0 (assertions)
x86-64 clang 10.0.1
x86-64 clang 11.0.0
x86-64 clang 11.0.0 (assertions)
x86-64 clang 11.0.1
x86-64 clang 12.0.0
x86-64 clang 12.0.0 (assertions)
x86-64 clang 12.0.1
x86-64 clang 13.0.0
x86-64 clang 13.0.0 (assertions)
x86-64 clang 13.0.1
x86-64 clang 14.0.0
x86-64 clang 14.0.0 (assertions)
x86-64 clang 15.0.0
x86-64 clang 15.0.0 (assertions)
x86-64 clang 16.0.0
x86-64 clang 16.0.0 (assertions)
x86-64 clang 17.0.1
x86-64 clang 17.0.1 (assertions)
x86-64 clang 18.1.0
x86-64 clang 18.1.0 (assertions)
x86-64 clang 2.6.0 (assertions)
x86-64 clang 2.7.0 (assertions)
x86-64 clang 2.8.0 (assertions)
x86-64 clang 2.9.0 (assertions)
x86-64 clang 3.0.0
x86-64 clang 3.0.0 (assertions)
x86-64 clang 3.1
x86-64 clang 3.1 (assertions)
x86-64 clang 3.2
x86-64 clang 3.2 (assertions)
x86-64 clang 3.3
x86-64 clang 3.3 (assertions)
x86-64 clang 3.4 (assertions)
x86-64 clang 3.4.1
x86-64 clang 3.5
x86-64 clang 3.5 (assertions)
x86-64 clang 3.5.1
x86-64 clang 3.5.2
x86-64 clang 3.6
x86-64 clang 3.6 (assertions)
x86-64 clang 3.7
x86-64 clang 3.7 (assertions)
x86-64 clang 3.7.1
x86-64 clang 3.8
x86-64 clang 3.8 (assertions)
x86-64 clang 3.8.1
x86-64 clang 3.9.0
x86-64 clang 3.9.0 (assertions)
x86-64 clang 3.9.1
x86-64 clang 4.0.0
x86-64 clang 4.0.0 (assertions)
x86-64 clang 4.0.1
x86-64 clang 5.0.0
x86-64 clang 5.0.0 (assertions)
x86-64 clang 5.0.1
x86-64 clang 5.0.2
x86-64 clang 6.0.0
x86-64 clang 6.0.0 (assertions)
x86-64 clang 6.0.1
x86-64 clang 7.0.0
x86-64 clang 7.0.0 (assertions)
x86-64 clang 7.0.1
x86-64 clang 7.1.0
x86-64 clang 8.0.0
x86-64 clang 8.0.0 (assertions)
x86-64 clang 8.0.1
x86-64 clang 9.0.0
x86-64 clang 9.0.0 (assertions)
x86-64 clang 9.0.1
x86-64 clang rocm-4.5.2
x86-64 clang rocm-5.0.2
x86-64 clang rocm-5.1.3
x86-64 clang rocm-5.2.3
x86-64 clang rocm-5.3.3
x86-64 clang rocm-5.7.0
x86-64 gcc (contract labels)
x86-64 gcc (contracts natural syntax)
x86-64 gcc (contracts)
x86-64 gcc (coroutines)
x86-64 gcc (modules)
x86-64 gcc (trunk)
x86-64 gcc 10.1
x86-64 gcc 10.2
x86-64 gcc 10.3
x86-64 gcc 10.4
x86-64 gcc 10.5
x86-64 gcc 11.1
x86-64 gcc 11.2
x86-64 gcc 11.3
x86-64 gcc 11.4
x86-64 gcc 12.1
x86-64 gcc 12.2
x86-64 gcc 12.3
x86-64 gcc 13.1
x86-64 gcc 13.2
x86-64 gcc 4.1.2
x86-64 gcc 4.4.7
x86-64 gcc 4.5.3
x86-64 gcc 4.6.4
x86-64 gcc 4.7.1
x86-64 gcc 4.7.2
x86-64 gcc 4.7.3
x86-64 gcc 4.7.4
x86-64 gcc 4.8.1
x86-64 gcc 4.8.2
x86-64 gcc 4.8.3
x86-64 gcc 4.8.4
x86-64 gcc 4.8.5
x86-64 gcc 4.9.0
x86-64 gcc 4.9.1
x86-64 gcc 4.9.2
x86-64 gcc 4.9.3
x86-64 gcc 4.9.4
x86-64 gcc 5.1
x86-64 gcc 5.2
x86-64 gcc 5.3
x86-64 gcc 5.4
x86-64 gcc 5.5
x86-64 gcc 6.1
x86-64 gcc 6.2
x86-64 gcc 6.3
x86-64 gcc 6.4
x86-64 gcc 7.1
x86-64 gcc 7.2
x86-64 gcc 7.3
x86-64 gcc 7.4
x86-64 gcc 7.5
x86-64 gcc 8.1
x86-64 gcc 8.2
x86-64 gcc 8.3
x86-64 gcc 8.4
x86-64 gcc 8.5
x86-64 gcc 9.1
x86-64 gcc 9.2
x86-64 gcc 9.3
x86-64 gcc 9.4
x86-64 gcc 9.5
x86-64 icc 13.0.1
x86-64 icc 16.0.3
x86-64 icc 17.0.0
x86-64 icc 18.0.0
x86-64 icc 19.0.0
x86-64 icc 19.0.1
x86-64 icc 2021.1.2
x86-64 icc 2021.10.0
x86-64 icc 2021.2.0
x86-64 icc 2021.3.0
x86-64 icc 2021.4.0
x86-64 icc 2021.5.0
x86-64 icc 2021.6.0
x86-64 icc 2021.7.0
x86-64 icc 2021.7.1
x86-64 icc 2021.8.0
x86-64 icc 2021.9.0
x86-64 icx (latest)
x86-64 icx 2021.1.2
x86-64 icx 2021.2.0
x86-64 icx 2021.3.0
x86-64 icx 2021.4.0
x86-64 icx 2022.0.0
x86-64 icx 2022.1.0
x86-64 icx 2022.2.0
x86-64 icx 2022.2.1
x86-64 icx 2023.0.0
x86-64 icx 2023.1.0
x86-64 icx 2023.2.1
x86-64 icx 2024.0.0
zig c++ 0.10.0
zig c++ 0.11.0
zig c++ 0.12.0
zig c++ 0.6.0
zig c++ 0.7.0
zig c++ 0.7.1
zig c++ 0.8.0
zig c++ 0.9.0
zig c++ trunk
Options
Source code
#include <immintrin.h> #define ALIGNED(x) __attribute__ ((aligned (x))) struct ALIGNED(16) TexSampler { int w; int h; int pitch; const int* data; }; struct float4 { float4() :x(0), y(0), z(0), w(0) {} float4(float a, float b, float c, float d) : x(a), y(b), z(c), w(d) {} float x, y, z, w; }; struct float2 { float2() :x(0), y(0){} float2(float a, float b) : x(a), y(b) {} float x, y; }; struct TriangleT { TriangleT() { psoId = -1; } ALIGNED(16) float4 v1; ALIGNED(16) float4 v2; ALIGNED(16) float4 v3; ALIGNED(16) float4 c1; ALIGNED(16) float4 c2; ALIGNED(16) float4 c3; float2 t1; float2 t2; float2 t3; int bb_iminX; int bb_imaxX; int bb_iminY; int bb_imaxY; int psoId; TexSampler texS; }; typedef __m256 vfloat8; typedef __m256i vint8; static inline void set_ftz() { _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);} static inline void store(float* data, vfloat8 a_val) { _mm256_store_ps(data, a_val); } static inline void store(int* data, vint8 a_val) { _mm256_store_ps((float*)data, _mm256_castsi256_ps(a_val)); } static inline void store_u(float* data, vfloat8 a_val) { _mm256_storeu_ps(data, a_val); } static inline void store_u(int* data, vint8 a_val) { _mm256_storeu_ps((float*)data, _mm256_castsi256_ps(a_val)); } static inline auto load (const float *data) -> vfloat8 { return _mm256_load_ps(data); } static inline auto load (const int *data) -> vint8 { return _mm256_castps_si256(_mm256_load_ps((float*)data)); } static inline auto load_u(const float *data) -> vfloat8 { return _mm256_loadu_ps(data); } static inline auto load_u(const int *data) -> vint8 { return _mm256_castps_si256(_mm256_loadu_ps((float*)data)); } static inline auto splat(const int i) -> vint8 { return _mm256_castps_si256(_mm256_broadcast_ss((float*)&i)); } static inline auto splat(const unsigned int i) -> vint8 { return _mm256_castps_si256(_mm256_broadcast_ss((float*)&i)); } static inline auto splat(const float i) -> vfloat8 { return _mm256_broadcast_ss(&i); } static inline vint8 to_int32(const vfloat8 a_val) { return _mm256_cvtps_epi32(a_val);} static inline vfloat8 to_float32(const vint8 a_val) { return _mm256_cvtepi32_ps(a_val);} static inline vint8 make_vint(const int a, const int b, const int c, const int d, const int e, const int f, const int g, const int h) { return _mm256_set_epi32(h, g, f, e, d, c, b, a); } static inline vfloat8 rcp_e(const vfloat8 a) { return _mm256_rcp_ps(a); } static inline vfloat8 blend(const vfloat8 a, const vfloat8 b, const vint8 mask) { return _mm256_or_ps(_mm256_and_ps( _mm256_castsi256_ps(mask), a), _mm256_andnot_ps(_mm256_castsi256_ps(mask), b)); } static inline vint8 blend(const vint8 a, const vint8 b, const vint8 mask) { return _mm256_castps_si256(_mm256_or_ps(_mm256_and_ps( _mm256_castsi256_ps(mask), _mm256_castsi256_ps(a) ), _mm256_andnot_ps(_mm256_castsi256_ps(mask), _mm256_castsi256_ps(b) ))); } static inline bool test_bits_any(const vint8 a) { return (_mm256_movemask_ps(_mm256_castsi256_ps(a)) & 15) != 0; } static inline vfloat8 vclamp(const vfloat8 x, const vfloat8 minVal, const vfloat8 maxVal) { return _mm256_max_ps(_mm256_min_ps(x, maxVal), minVal); } static inline void prefetch(const float* ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static inline void prefetch(const int* ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } template<typename vint, int n> struct LineOffs { static inline vint w(const int CX1, const int FDY12) { ALIGNED(n*4) int w1i[n]; #pragma GCC ivdep for(int i=0;i<n;i++) w1i[i] = CX1 - i*FDY12; return (vint)load(w1i); } static inline void load4(const int* a_data, const int pitch, const vint offset, vint a_result[4]) { } static inline vint load1(const int* a_data, const int pitch, const vint offset) { vint temp = splat(0); return temp; } }; template<typename vint> struct LineOffs<vint, 8> { static inline vint w(const int CX1, const int FDY12) { return make_vint(CX1, CX1 - FDY12, CX1 - FDY12*2, CX1 - FDY12*3, CX1 - FDY12*4, CX1 - FDY12*5, CX1 - FDY12*6, CX1 - FDY12*7); } static inline void load4(const int* a_data, const int pitch, const vint offset, vint a_result[4]) { ALIGNED(32) int myOffsets[8]; store(myOffsets, offset); const int* p0 = a_data + myOffsets[0]; const int* p1 = a_data + myOffsets[1]; const int* p2 = a_data + myOffsets[2]; const int* p3 = a_data + myOffsets[3]; const int* p4 = a_data + myOffsets[4]; const int* p5 = a_data + myOffsets[5]; const int* p6 = a_data + myOffsets[6]; const int* p7 = a_data + myOffsets[7]; const int d01 = p0[0 + 0 * pitch]; const int d02 = p0[1 + 0 * pitch]; const int d03 = p0[0 + 1 * pitch]; const int d04 = p0[1 + 1 * pitch]; const int d11 = p1[0 + 0 * pitch]; const int d12 = p1[1 + 0 * pitch]; const int d13 = p1[0 + 1 * pitch]; const int d14 = p1[1 + 1 * pitch]; const int d21 = p2[0 + 0 * pitch]; const int d22 = p2[1 + 0 * pitch]; const int d23 = p2[0 + 1 * pitch]; const int d24 = p2[1 + 1 * pitch]; const int d31 = p3[0 + 0 * pitch]; const int d32 = p3[1 + 0 * pitch]; const int d33 = p3[0 + 1 * pitch]; const int d34 = p3[1 + 1 * pitch]; const int d41 = p4[0 + 0 * pitch]; const int d42 = p4[1 + 0 * pitch]; const int d43 = p4[0 + 1 * pitch]; const int d44 = p4[1 + 1 * pitch]; const int d51 = p5[0 + 0 * pitch]; const int d52 = p5[1 + 0 * pitch]; const int d53 = p5[0 + 1 * pitch]; const int d54 = p5[1 + 1 * pitch]; const int d61 = p6[0 + 0 * pitch]; const int d62 = p6[1 + 0 * pitch]; const int d63 = p6[0 + 1 * pitch]; const int d64 = p6[1 + 1 * pitch]; const int d71 = p7[0 + 0 * pitch]; const int d72 = p7[1 + 0 * pitch]; const int d73 = p7[0 + 1 * pitch]; const int d74 = p7[1 + 1 * pitch]; a_result[0] = make_vint(d01, d11, d21, d31, d41, d51, d61, d71); a_result[1] = make_vint(d02, d12, d22, d32, d42, d52, d62, d72); a_result[2] = make_vint(d03, d13, d23, d33, d43, d53, d63, d73); a_result[3] = make_vint(d04, d14, d24, d34, d44, d54, d64, d74); } static inline vint load1(const int* a_data, const int pitch, const vint offset) { ALIGNED(32) int myOffsets[8]; store(myOffsets, offset); const int d01 = a_data[myOffsets[0]]; const int d11 = a_data[myOffsets[1]]; const int d21 = a_data[myOffsets[2]]; const int d31 = a_data[myOffsets[3]]; const int d41 = a_data[myOffsets[4]]; const int d51 = a_data[myOffsets[5]]; const int d61 = a_data[myOffsets[6]]; const int d71 = a_data[myOffsets[7]]; return make_vint(d01, d11, d21, d31, d41, d51, d61, d71); } }; typedef vfloat8 vfloat; typedef vint8 vint; void Colored3D(const TriangleT& tri, const int CX1, const int CX2, const int FDY12, const int FDY23, const float areaInv, int* pLineColor, float* pLineDepth) { const vfloat c_one = splat(1.0f); const vfloat c_255 = splat(255.0f); const vfloat w1 = areaInv* to_float32( LineOffs<vint,8>::w(CX1, FDY12) ); const vfloat w2 = areaInv* to_float32( LineOffs<vint,8>::w(CX2, FDY23) ); const vfloat w3 = (c_one - w1 - w2); const vfloat zInv = tri.v1.z*w1 + tri.v2.z*w2 + tri.v3.z*w3; const vfloat zOld = load_u(pLineDepth); const vint zTest = (zInv > zOld); if(test_bits_any(zTest)) { const auto z = rcp_e(zInv); const auto r = (tri.c1.x * w1 + tri.c2.x * w2 + tri.c3.x * w3)*z; const auto g = (tri.c1.y * w1 + tri.c2.y * w2 + tri.c3.y * w3)*z; const auto b = (tri.c1.z * w1 + tri.c2.z * w2 + tri.c3.z * w3)*z; const auto a = (tri.c1.w * w1 + tri.c2.w * w2 + tri.c3.w * w3)*z; const vint colorOld = load_u(pLineColor); const vint colori = (to_int32(r * c_255) << 16) | // BGRA (to_int32(g * c_255) << 8) | (to_int32(b * c_255) << 0) | (to_int32(a * c_255) << 24); store_u(pLineColor, blend(colori, colorOld, zTest)); store_u(pLineDepth, blend(zInv, zOld, zTest)); } } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// /* static inline void WrapTexCoord(vfloat& x, vfloat& y) { x = x - to_float32(to_int32(x)); y = y - to_float32(to_int32(y)); const vfloat zero = splat(0.0f); const vfloat one = splat(1.0f); x = blend(x + one, x, (x < zero)); //x = x < 0.0f ? x + 1.0f : x; y = blend(y + one, y, (y < zero)); //y = y < 0.0f ? y + 1.0f : y; } static inline void ReadImage4f_Bilinear(const int* pData, const int w, const int h, int pitch, const vfloat a_texCoordX, const vfloat a_texCoordY, vfloat a_result[4]) { const vfloat fw = to_float32( (vint)splat(w) ); const vfloat fh = to_float32( (vint)splat(h) ); //const float ffx = a_texCoord.x*fw - 0.5f; //const float ffy = a_texCoord.y*fh - 0.5f; const vfloat zero = splat(0.0f); const vfloat one = splat(1.0f); const vfloat half = splat(0.5f); const vfloat ffx = vclamp((a_texCoordX*fw - half), zero, (fw - one) ); const vfloat ffy = vclamp((a_texCoordY*fh - half), zero, (fh - one) ); const vint px = to_int32(ffx); const vint py = to_int32(ffy); // Calculate the weights for each pixel // const vfloat fx = ffx - to_float32(px); const vfloat fy = ffy - to_float32(py); const vfloat fx1 = one - fx; const vfloat fy1 = one - fy; const vfloat w1 = fx1 * fy1; const vfloat w2 = fx * fy1; const vfloat w3 = fx1 * fy; const vfloat w4 = fx * fy; const vfloat mult = splat(0.003921568f); // (1.0f/255.0f); const vint offset = (py*pitch) + px; vint ipixels[4]; LineOffs<vint,8>::load4(pData, pitch, offset, ipixels); const vint mask_R = splat(int(0x000000FF)); const vint mask_G = splat(int(0x0000FF00)); const vint mask_B = splat(int(0x00FF0000)); const vint mask_A = splat(int(0xFF000000)); const vfloat f1_x = mult*to_float32((ipixels[0] & mask_R) >> 0); const vfloat f1_y = mult*to_float32((ipixels[0] & mask_G) >> 8); const vfloat f1_z = mult*to_float32((ipixels[0] & mask_B) >> 16); const vfloat f1_w = mult*to_float32((ipixels[0] & mask_A) >> 24); // #TODO: MUST USED UNSIGNED SHIFTS !!! const vfloat f2_x = mult*to_float32((ipixels[1] & mask_R) >> 0); const vfloat f2_y = mult*to_float32((ipixels[1] & mask_G) >> 8); const vfloat f2_z = mult*to_float32((ipixels[1] & mask_B) >> 16); const vfloat f2_w = mult*to_float32((ipixels[1] & mask_A) >> 24); // #TODO: MUST USED UNSIGNED SHIFTS !!! const vfloat f3_x = mult*to_float32((ipixels[2] & mask_R) >> 0); const vfloat f3_y = mult*to_float32((ipixels[2] & mask_G) >> 8); const vfloat f3_z = mult*to_float32((ipixels[2] & mask_B) >> 16); const vfloat f3_w = mult*to_float32((ipixels[2] & mask_A) >> 24); // #TODO: MUST USED UNSIGNED SHIFTS !!! const vfloat f4_x = mult*to_float32((ipixels[3] & mask_R) >> 0); const vfloat f4_y = mult*to_float32((ipixels[3] & mask_G) >> 8); const vfloat f4_z = mult*to_float32((ipixels[3] & mask_B) >> 16); const vfloat f4_w = mult*to_float32((ipixels[3] & mask_A) >> 24); // #TODO: MUST USED UNSIGNED SHIFTS !!! // Calculate the weighted sum of pixels (for each color channel) // a_result[0] = f1_x * w1 + f2_x * w2 + f3_x * w3 + f4_x * w4; a_result[1] = f1_y * w1 + f2_y * w2 + f3_y * w3 + f4_y * w4; a_result[2] = f1_z * w1 + f2_z * w2 + f3_z * w3 + f4_z * w4; a_result[3] = f1_w * w1 + f2_w * w2 + f3_w * w3 + f4_w * w4; } static inline void ReadImage4f_Point(const int* pData, const int w, const int h, int pitch, const vfloat a_texCoordX, const vfloat a_texCoordY, vfloat a_result[4]) { const vfloat zero = splat(0.0f); const vfloat one = splat(1.0f); const vfloat half = splat(0.5f); const vfloat fw = to_float32( (vint)splat(w) ); const vfloat fh = to_float32( (vint)splat(h) ); //const vfloat ffx = a_texCoordX*fw + half; //const vfloat ffy = a_texCoordY*fh + half; const vfloat ffx = vclamp(a_texCoordX*fw + half, zero, (fw - one) ); const vfloat ffy = vclamp(a_texCoordY*fh + half, zero, (fh - one) ); const vint px = to_int32(ffx); const vint py = to_int32(ffy); const vfloat mult = splat(0.003921568f); // (1.0f/255.0f); const vint offset = (py*pitch) + px; const vint ipixel = LineOffs<vint,8>::load1(pData, pitch, offset); const vint mask_R = splat(int(0x000000FF)); // #TODO: MUST USED UNSIGNED SHIFTS !!! const vint mask_G = splat(int(0x0000FF00)); // #TODO: MUST USED UNSIGNED SHIFTS !!! const vint mask_B = splat(int(0x00FF0000)); // #TODO: MUST USED UNSIGNED SHIFTS !!! const vint mask_A = splat(int(0xFF000000)); // #TODO: MUST USED UNSIGNED SHIFTS !!! a_result[0] = mult*to_float32((ipixel & mask_R) >> 0); a_result[1] = mult*to_float32((ipixel & mask_G) >> 8); a_result[2] = mult*to_float32((ipixel & mask_B) >> 16); a_result[3] = mult*to_float32((ipixel & mask_A) >> 24); } template<bool bilinear> static inline void Tex2DSample(const TriangleT& tri, vfloat x, vfloat y, vfloat a_res[4]) { WrapTexCoord(x,y); if(bilinear) // assume compiler could optimize this { ReadImage4f_Bilinear(tri.texS.data, tri.texS.w, tri.texS.h, tri.texS.pitch, x, y, a_res); } else { ReadImage4f_Point(tri.texS.data, tri.texS.w, tri.texS.h, tri.texS.pitch, x, y, a_res); } } void Textured3D(const TriangleT& tri, const int CX1, const int CX2, const int FDY12, const int FDY23, const float areaInv, int* pLineColor, float* pLineDepth) { prefetch(pLineDepth); const vfloat c_one = splat(1.0f); const vfloat c_255 = splat(255.0f); const vfloat w1 = areaInv* to_float32( LineOffs<vint,8>::w(CX1, FDY12) ); const vfloat w2 = areaInv* to_float32( LineOffs<vint,8>::w(CX2, FDY23) ); const vfloat w3 = (c_one - w1 - w2); const vfloat zInv = tri.v1.z*w1 + tri.v2.z*w2 + tri.v3.z*w3; const vfloat zOld = load_u(pLineDepth); const vint zTest = (zInv > zOld); if(test_bits_any(zTest)) { store_u(pLineDepth, blend(zInv, zOld, zTest)); prefetch(pLineColor); const vfloat z = rcp_e(zInv); const vfloat r = (tri.c1.x * w1 + tri.c2.x * w2 + tri.c3.x * w3)*z; const vfloat g = (tri.c1.y * w1 + tri.c2.y * w2 + tri.c3.y * w3)*z; const vfloat b = (tri.c1.z * w1 + tri.c2.z * w2 + tri.c3.z * w3)*z; const vfloat a = (tri.c1.w * w1 + tri.c2.w * w2 + tri.c3.w * w3)*z; const vfloat tx = (tri.t1.x*w1 + tri.t2.x*w2 + tri.t3.x*w3)*z; const vfloat ty = (tri.t1.y*w1 + tri.t2.y*w2 + tri.t3.y*w3)*z; vfloat texColor[4]; Tex2DSample<true>(tri, tx, ty, texColor); const vint colorOld = load_u(pLineColor); const vint colori = (to_int32(r * texColor[0] * c_255) << 16) | // BGRA (to_int32(g * texColor[1] * c_255) << 8) | (to_int32(b * texColor[2] * c_255) << 0) | (to_int32(a * texColor[3] * c_255) << 24); store_u(pLineColor, blend(colori, colorOld, zTest)); } } */
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
About the author
Statistics
Changelog
Version tree