Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
c++ source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
6502-c++ 11.1.0
ARM GCC 10.2.0
ARM GCC 10.3.0
ARM GCC 10.4.0
ARM GCC 10.5.0
ARM GCC 11.1.0
ARM GCC 11.2.0
ARM GCC 11.3.0
ARM GCC 11.4.0
ARM GCC 12.1.0
ARM GCC 12.2.0
ARM GCC 12.3.0
ARM GCC 12.4.0
ARM GCC 12.5.0
ARM GCC 13.1.0
ARM GCC 13.2.0
ARM GCC 13.2.0 (unknown-eabi)
ARM GCC 13.3.0
ARM GCC 13.3.0 (unknown-eabi)
ARM GCC 13.4.0
ARM GCC 13.4.0 (unknown-eabi)
ARM GCC 14.1.0
ARM GCC 14.1.0 (unknown-eabi)
ARM GCC 14.2.0
ARM GCC 14.2.0 (unknown-eabi)
ARM GCC 14.3.0
ARM GCC 14.3.0 (unknown-eabi)
ARM GCC 15.1.0
ARM GCC 15.1.0 (unknown-eabi)
ARM GCC 15.2.0
ARM GCC 15.2.0 (unknown-eabi)
ARM GCC 4.5.4
ARM GCC 4.6.4
ARM GCC 5.4
ARM GCC 6.3.0
ARM GCC 6.4.0
ARM GCC 7.3.0
ARM GCC 7.5.0
ARM GCC 8.2.0
ARM GCC 8.5.0
ARM GCC 9.3.0
ARM GCC 9.4.0
ARM GCC 9.5.0
ARM GCC trunk
ARM gcc 10.2.1 (none)
ARM gcc 10.3.1 (2021.07 none)
ARM gcc 10.3.1 (2021.10 none)
ARM gcc 11.2.1 (none)
ARM gcc 5.4.1 (none)
ARM gcc 7.2.1 (none)
ARM gcc 8.2 (WinCE)
ARM gcc 8.3.1 (none)
ARM gcc 9.2.1 (none)
ARM msvc v19.0 (ex-WINE)
ARM msvc v19.10 (ex-WINE)
ARM msvc v19.14 (ex-WINE)
ARM64 Morello gcc 10.1 Alpha 2
ARM64 gcc 10.2
ARM64 gcc 10.3
ARM64 gcc 10.4
ARM64 gcc 10.5.0
ARM64 gcc 11.1
ARM64 gcc 11.2
ARM64 gcc 11.3
ARM64 gcc 11.4.0
ARM64 gcc 12.1
ARM64 gcc 12.2.0
ARM64 gcc 12.3.0
ARM64 gcc 12.4.0
ARM64 gcc 12.5.0
ARM64 gcc 13.1.0
ARM64 gcc 13.2.0
ARM64 gcc 13.3.0
ARM64 gcc 13.4.0
ARM64 gcc 14.1.0
ARM64 gcc 14.2.0
ARM64 gcc 14.3.0
ARM64 gcc 15.1.0
ARM64 gcc 15.2.0
ARM64 gcc 4.9.4
ARM64 gcc 5.4
ARM64 gcc 5.5.0
ARM64 gcc 6.3
ARM64 gcc 6.4
ARM64 gcc 7.3
ARM64 gcc 7.5
ARM64 gcc 8.2
ARM64 gcc 8.5
ARM64 gcc 9.3
ARM64 gcc 9.4
ARM64 gcc 9.5
ARM64 gcc trunk
ARM64 msvc v19.14 (ex-WINE)
AVR gcc 10.3.0
AVR gcc 11.1.0
AVR gcc 12.1.0
AVR gcc 12.2.0
AVR gcc 12.3.0
AVR gcc 12.4.0
AVR gcc 12.5.0
AVR gcc 13.1.0
AVR gcc 13.2.0
AVR gcc 13.3.0
AVR gcc 13.4.0
AVR gcc 14.1.0
AVR gcc 14.2.0
AVR gcc 14.3.0
AVR gcc 15.1.0
AVR gcc 15.2.0
AVR gcc 4.5.4
AVR gcc 4.6.4
AVR gcc 5.4.0
AVR gcc 9.2.0
AVR gcc 9.3.0
Arduino Mega (1.8.9)
Arduino Uno (1.8.9)
BPF clang (trunk)
BPF clang 13.0.0
BPF clang 14.0.0
BPF clang 15.0.0
BPF clang 16.0.0
BPF clang 17.0.1
BPF clang 18.1.0
BPF clang 19.1.0
BPF clang 20.1.0
BPF clang 21.1.0
EDG (experimental reflection)
EDG 6.5
EDG 6.5 (GNU mode gcc 13)
EDG 6.6
EDG 6.6 (GNU mode gcc 13)
EDG 6.7
EDG 6.7 (GNU mode gcc 14)
FRC 2019
FRC 2020
FRC 2023
HPPA gcc 14.2.0
HPPA gcc 14.3.0
HPPA gcc 15.1.0
HPPA gcc 15.2.0
KVX ACB 4.1.0 (GCC 7.5.0)
KVX ACB 4.1.0-cd1 (GCC 7.5.0)
KVX ACB 4.10.0 (GCC 10.3.1)
KVX ACB 4.11.1 (GCC 10.3.1)
KVX ACB 4.12.0 (GCC 11.3.0)
KVX ACB 4.2.0 (GCC 7.5.0)
KVX ACB 4.3.0 (GCC 7.5.0)
KVX ACB 4.4.0 (GCC 7.5.0)
KVX ACB 4.6.0 (GCC 9.4.1)
KVX ACB 4.8.0 (GCC 9.4.1)
KVX ACB 4.9.0 (GCC 9.4.1)
KVX ACB 5.0.0 (GCC 12.2.1)
KVX ACB 5.2.0 (GCC 13.2.1)
LoongArch64 clang (trunk)
LoongArch64 clang 17.0.1
LoongArch64 clang 18.1.0
LoongArch64 clang 19.1.0
LoongArch64 clang 20.1.0
LoongArch64 clang 21.1.0
M68K gcc 13.1.0
M68K gcc 13.2.0
M68K gcc 13.3.0
M68K gcc 13.4.0
M68K gcc 14.1.0
M68K gcc 14.2.0
M68K gcc 14.3.0
M68K gcc 15.1.0
M68K gcc 15.2.0
M68k clang (trunk)
MRISC32 gcc (trunk)
MSP430 gcc 4.5.3
MSP430 gcc 5.3.0
MSP430 gcc 6.2.1
MinGW clang 14.0.3
MinGW clang 14.0.6
MinGW clang 15.0.7
MinGW clang 16.0.0
MinGW clang 16.0.2
MinGW gcc 11.3.0
MinGW gcc 12.1.0
MinGW gcc 12.2.0
MinGW gcc 13.1.0
RISC-V (32-bits) gcc (trunk)
RISC-V (32-bits) gcc 10.2.0
RISC-V (32-bits) gcc 10.3.0
RISC-V (32-bits) gcc 11.2.0
RISC-V (32-bits) gcc 11.3.0
RISC-V (32-bits) gcc 11.4.0
RISC-V (32-bits) gcc 12.1.0
RISC-V (32-bits) gcc 12.2.0
RISC-V (32-bits) gcc 12.3.0
RISC-V (32-bits) gcc 12.4.0
RISC-V (32-bits) gcc 12.5.0
RISC-V (32-bits) gcc 13.1.0
RISC-V (32-bits) gcc 13.2.0
RISC-V (32-bits) gcc 13.3.0
RISC-V (32-bits) gcc 13.4.0
RISC-V (32-bits) gcc 14.1.0
RISC-V (32-bits) gcc 14.2.0
RISC-V (32-bits) gcc 14.3.0
RISC-V (32-bits) gcc 15.1.0
RISC-V (32-bits) gcc 15.2.0
RISC-V (32-bits) gcc 8.2.0
RISC-V (32-bits) gcc 8.5.0
RISC-V (32-bits) gcc 9.4.0
RISC-V (64-bits) gcc (trunk)
RISC-V (64-bits) gcc 10.2.0
RISC-V (64-bits) gcc 10.3.0
RISC-V (64-bits) gcc 11.2.0
RISC-V (64-bits) gcc 11.3.0
RISC-V (64-bits) gcc 11.4.0
RISC-V (64-bits) gcc 12.1.0
RISC-V (64-bits) gcc 12.2.0
RISC-V (64-bits) gcc 12.3.0
RISC-V (64-bits) gcc 12.4.0
RISC-V (64-bits) gcc 12.5.0
RISC-V (64-bits) gcc 13.1.0
RISC-V (64-bits) gcc 13.2.0
RISC-V (64-bits) gcc 13.3.0
RISC-V (64-bits) gcc 13.4.0
RISC-V (64-bits) gcc 14.1.0
RISC-V (64-bits) gcc 14.2.0
RISC-V (64-bits) gcc 14.3.0
RISC-V (64-bits) gcc 15.1.0
RISC-V (64-bits) gcc 15.2.0
RISC-V (64-bits) gcc 8.2.0
RISC-V (64-bits) gcc 8.5.0
RISC-V (64-bits) gcc 9.4.0
RISC-V rv32gc clang (trunk)
RISC-V rv32gc clang 10.0.0
RISC-V rv32gc clang 10.0.1
RISC-V rv32gc clang 11.0.0
RISC-V rv32gc clang 11.0.1
RISC-V rv32gc clang 12.0.0
RISC-V rv32gc clang 12.0.1
RISC-V rv32gc clang 13.0.0
RISC-V rv32gc clang 13.0.1
RISC-V rv32gc clang 14.0.0
RISC-V rv32gc clang 15.0.0
RISC-V rv32gc clang 16.0.0
RISC-V rv32gc clang 17.0.1
RISC-V rv32gc clang 18.1.0
RISC-V rv32gc clang 19.1.0
RISC-V rv32gc clang 20.1.0
RISC-V rv32gc clang 21.1.0
RISC-V rv32gc clang 9.0.0
RISC-V rv32gc clang 9.0.1
RISC-V rv64gc clang (trunk)
RISC-V rv64gc clang 10.0.0
RISC-V rv64gc clang 10.0.1
RISC-V rv64gc clang 11.0.0
RISC-V rv64gc clang 11.0.1
RISC-V rv64gc clang 12.0.0
RISC-V rv64gc clang 12.0.1
RISC-V rv64gc clang 13.0.0
RISC-V rv64gc clang 13.0.1
RISC-V rv64gc clang 14.0.0
RISC-V rv64gc clang 15.0.0
RISC-V rv64gc clang 16.0.0
RISC-V rv64gc clang 17.0.1
RISC-V rv64gc clang 18.1.0
RISC-V rv64gc clang 19.1.0
RISC-V rv64gc clang 20.1.0
RISC-V rv64gc clang 21.1.0
RISC-V rv64gc clang 9.0.0
RISC-V rv64gc clang 9.0.1
Raspbian Buster
Raspbian Stretch
SPARC LEON gcc 12.2.0
SPARC LEON gcc 12.3.0
SPARC LEON gcc 12.4.0
SPARC LEON gcc 12.5.0
SPARC LEON gcc 13.1.0
SPARC LEON gcc 13.2.0
SPARC LEON gcc 13.3.0
SPARC LEON gcc 13.4.0
SPARC LEON gcc 14.1.0
SPARC LEON gcc 14.2.0
SPARC LEON gcc 14.3.0
SPARC LEON gcc 15.1.0
SPARC LEON gcc 15.2.0
SPARC gcc 12.2.0
SPARC gcc 12.3.0
SPARC gcc 12.4.0
SPARC gcc 12.5.0
SPARC gcc 13.1.0
SPARC gcc 13.2.0
SPARC gcc 13.3.0
SPARC gcc 13.4.0
SPARC gcc 14.1.0
SPARC gcc 14.2.0
SPARC gcc 14.3.0
SPARC gcc 15.1.0
SPARC gcc 15.2.0
SPARC64 gcc 12.2.0
SPARC64 gcc 12.3.0
SPARC64 gcc 12.4.0
SPARC64 gcc 12.5.0
SPARC64 gcc 13.1.0
SPARC64 gcc 13.2.0
SPARC64 gcc 13.3.0
SPARC64 gcc 13.4.0
SPARC64 gcc 14.1.0
SPARC64 gcc 14.2.0
SPARC64 gcc 14.3.0
SPARC64 gcc 15.1.0
SPARC64 gcc 15.2.0
TI C6x gcc 12.2.0
TI C6x gcc 12.3.0
TI C6x gcc 12.4.0
TI C6x gcc 12.5.0
TI C6x gcc 13.1.0
TI C6x gcc 13.2.0
TI C6x gcc 13.3.0
TI C6x gcc 13.4.0
TI C6x gcc 14.1.0
TI C6x gcc 14.2.0
TI C6x gcc 14.3.0
TI C6x gcc 15.1.0
TI C6x gcc 15.2.0
TI CL430 21.6.1
Tricore gcc 11.3.0 (EEESlab)
VAX gcc NetBSDELF 10.4.0
VAX gcc NetBSDELF 10.5.0 (Nov 15 03:50:22 2023)
VAX gcc NetBSDELF 12.4.0 (Apr 16 05:27 2025)
WebAssembly clang (trunk)
Xtensa ESP32 gcc 11.2.0 (2022r1)
Xtensa ESP32 gcc 12.2.0 (20230208)
Xtensa ESP32 gcc 14.2.0 (20241119)
Xtensa ESP32 gcc 8.2.0 (2019r2)
Xtensa ESP32 gcc 8.2.0 (2020r1)
Xtensa ESP32 gcc 8.2.0 (2020r2)
Xtensa ESP32 gcc 8.4.0 (2020r3)
Xtensa ESP32 gcc 8.4.0 (2021r1)
Xtensa ESP32 gcc 8.4.0 (2021r2)
Xtensa ESP32-S2 gcc 11.2.0 (2022r1)
Xtensa ESP32-S2 gcc 12.2.0 (20230208)
Xtensa ESP32-S2 gcc 14.2.0 (20241119)
Xtensa ESP32-S2 gcc 8.2.0 (2019r2)
Xtensa ESP32-S2 gcc 8.2.0 (2020r1)
Xtensa ESP32-S2 gcc 8.2.0 (2020r2)
Xtensa ESP32-S2 gcc 8.4.0 (2020r3)
Xtensa ESP32-S2 gcc 8.4.0 (2021r1)
Xtensa ESP32-S2 gcc 8.4.0 (2021r2)
Xtensa ESP32-S3 gcc 11.2.0 (2022r1)
Xtensa ESP32-S3 gcc 12.2.0 (20230208)
Xtensa ESP32-S3 gcc 14.2.0 (20241119)
Xtensa ESP32-S3 gcc 8.4.0 (2020r3)
Xtensa ESP32-S3 gcc 8.4.0 (2021r1)
Xtensa ESP32-S3 gcc 8.4.0 (2021r2)
arm64 msvc v19.20 VS16.0
arm64 msvc v19.21 VS16.1
arm64 msvc v19.22 VS16.2
arm64 msvc v19.23 VS16.3
arm64 msvc v19.24 VS16.4
arm64 msvc v19.25 VS16.5
arm64 msvc v19.27 VS16.7
arm64 msvc v19.28 VS16.8
arm64 msvc v19.28 VS16.9
arm64 msvc v19.29 VS16.10
arm64 msvc v19.29 VS16.11
arm64 msvc v19.30 VS17.0
arm64 msvc v19.31 VS17.1
arm64 msvc v19.32 VS17.2
arm64 msvc v19.33 VS17.3
arm64 msvc v19.34 VS17.4
arm64 msvc v19.35 VS17.5
arm64 msvc v19.36 VS17.6
arm64 msvc v19.37 VS17.7
arm64 msvc v19.38 VS17.8
arm64 msvc v19.39 VS17.9
arm64 msvc v19.40 VS17.10
arm64 msvc v19.41 VS17.11
arm64 msvc v19.42 VS17.12
arm64 msvc v19.43 VS17.13
arm64 msvc v19.latest
armv7-a clang (trunk)
armv7-a clang 10.0.0
armv7-a clang 10.0.1
armv7-a clang 11.0.0
armv7-a clang 11.0.1
armv7-a clang 12.0.0
armv7-a clang 12.0.1
armv7-a clang 13.0.0
armv7-a clang 13.0.1
armv7-a clang 14.0.0
armv7-a clang 15.0.0
armv7-a clang 16.0.0
armv7-a clang 17.0.1
armv7-a clang 18.1.0
armv7-a clang 19.1.0
armv7-a clang 20.1.0
armv7-a clang 21.1.0
armv7-a clang 9.0.0
armv7-a clang 9.0.1
armv8-a clang (all architectural features, trunk)
armv8-a clang (trunk)
armv8-a clang 10.0.0
armv8-a clang 10.0.1
armv8-a clang 11.0.0
armv8-a clang 11.0.1
armv8-a clang 12.0.0
armv8-a clang 13.0.0
armv8-a clang 14.0.0
armv8-a clang 15.0.0
armv8-a clang 16.0.0
armv8-a clang 17.0.1
armv8-a clang 18.1.0
armv8-a clang 19.1.0
armv8-a clang 20.1.0
armv8-a clang 21.1.0
armv8-a clang 9.0.0
armv8-a clang 9.0.1
clad trunk (clang 21.1.0)
clad v1.10 (clang 20.1.0)
clad v1.8 (clang 18.1.0)
clad v1.9 (clang 19.1.0)
clad v2.00 (clang 20.1.0)
clang-cl 18.1.0
ellcc 0.1.33
ellcc 0.1.34
ellcc 2017-07-16
ez80-clang 15.0.0
ez80-clang 15.0.7
hexagon-clang 16.0.5
llvm-mos atari2600-3e
llvm-mos atari2600-4k
llvm-mos atari2600-common
llvm-mos atari5200-supercart
llvm-mos atari8-cart-megacart
llvm-mos atari8-cart-std
llvm-mos atari8-cart-xegs
llvm-mos atari8-common
llvm-mos atari8-dos
llvm-mos c128
llvm-mos c64
llvm-mos commodore
llvm-mos cpm65
llvm-mos cx16
llvm-mos dodo
llvm-mos eater
llvm-mos mega65
llvm-mos nes
llvm-mos nes-action53
llvm-mos nes-cnrom
llvm-mos nes-gtrom
llvm-mos nes-mmc1
llvm-mos nes-mmc3
llvm-mos nes-nrom
llvm-mos nes-unrom
llvm-mos nes-unrom-512
llvm-mos osi-c1p
llvm-mos pce
llvm-mos pce-cd
llvm-mos pce-common
llvm-mos pet
llvm-mos rp6502
llvm-mos rpc8e
llvm-mos supervision
llvm-mos vic20
loongarch64 gcc 12.2.0
loongarch64 gcc 12.3.0
loongarch64 gcc 12.4.0
loongarch64 gcc 12.5.0
loongarch64 gcc 13.1.0
loongarch64 gcc 13.2.0
loongarch64 gcc 13.3.0
loongarch64 gcc 13.4.0
loongarch64 gcc 14.1.0
loongarch64 gcc 14.2.0
loongarch64 gcc 14.3.0
loongarch64 gcc 15.1.0
loongarch64 gcc 15.2.0
mips clang 13.0.0
mips clang 14.0.0
mips clang 15.0.0
mips clang 16.0.0
mips clang 17.0.1
mips clang 18.1.0
mips clang 19.1.0
mips clang 20.1.0
mips clang 21.1.0
mips gcc 11.2.0
mips gcc 12.1.0
mips gcc 12.2.0
mips gcc 12.3.0
mips gcc 12.4.0
mips gcc 12.5.0
mips gcc 13.1.0
mips gcc 13.2.0
mips gcc 13.3.0
mips gcc 13.4.0
mips gcc 14.1.0
mips gcc 14.2.0
mips gcc 14.3.0
mips gcc 15.1.0
mips gcc 15.2.0
mips gcc 4.9.4
mips gcc 5.4
mips gcc 5.5.0
mips gcc 9.3.0 (codescape)
mips gcc 9.5.0
mips64 (el) gcc 12.1.0
mips64 (el) gcc 12.2.0
mips64 (el) gcc 12.3.0
mips64 (el) gcc 12.4.0
mips64 (el) gcc 12.5.0
mips64 (el) gcc 13.1.0
mips64 (el) gcc 13.2.0
mips64 (el) gcc 13.3.0
mips64 (el) gcc 13.4.0
mips64 (el) gcc 14.1.0
mips64 (el) gcc 14.2.0
mips64 (el) gcc 14.3.0
mips64 (el) gcc 15.1.0
mips64 (el) gcc 15.2.0
mips64 (el) gcc 4.9.4
mips64 (el) gcc 5.4.0
mips64 (el) gcc 5.5.0
mips64 (el) gcc 9.5.0
mips64 clang 13.0.0
mips64 clang 14.0.0
mips64 clang 15.0.0
mips64 clang 16.0.0
mips64 clang 17.0.1
mips64 clang 18.1.0
mips64 clang 19.1.0
mips64 clang 20.1.0
mips64 clang 21.1.0
mips64 gcc 11.2.0
mips64 gcc 12.1.0
mips64 gcc 12.2.0
mips64 gcc 12.3.0
mips64 gcc 12.4.0
mips64 gcc 12.5.0
mips64 gcc 13.1.0
mips64 gcc 13.2.0
mips64 gcc 13.3.0
mips64 gcc 13.4.0
mips64 gcc 14.1.0
mips64 gcc 14.2.0
mips64 gcc 14.3.0
mips64 gcc 15.1.0
mips64 gcc 15.2.0
mips64 gcc 4.9.4
mips64 gcc 5.4.0
mips64 gcc 5.5.0
mips64 gcc 9.5.0
mips64el clang 13.0.0
mips64el clang 14.0.0
mips64el clang 15.0.0
mips64el clang 16.0.0
mips64el clang 17.0.1
mips64el clang 18.1.0
mips64el clang 19.1.0
mips64el clang 20.1.0
mips64el clang 21.1.0
mipsel clang 13.0.0
mipsel clang 14.0.0
mipsel clang 15.0.0
mipsel clang 16.0.0
mipsel clang 17.0.1
mipsel clang 18.1.0
mipsel clang 19.1.0
mipsel clang 20.1.0
mipsel clang 21.1.0
mipsel gcc 12.1.0
mipsel gcc 12.2.0
mipsel gcc 12.3.0
mipsel gcc 12.4.0
mipsel gcc 12.5.0
mipsel gcc 13.1.0
mipsel gcc 13.2.0
mipsel gcc 13.3.0
mipsel gcc 13.4.0
mipsel gcc 14.1.0
mipsel gcc 14.2.0
mipsel gcc 14.3.0
mipsel gcc 15.1.0
mipsel gcc 15.2.0
mipsel gcc 4.9.4
mipsel gcc 5.4.0
mipsel gcc 5.5.0
mipsel gcc 9.5.0
nanoMIPS gcc 6.3.0 (mtk)
power gcc 11.2.0
power gcc 12.1.0
power gcc 12.2.0
power gcc 12.3.0
power gcc 12.4.0
power gcc 12.5.0
power gcc 13.1.0
power gcc 13.2.0
power gcc 13.3.0
power gcc 13.4.0
power gcc 14.1.0
power gcc 14.2.0
power gcc 14.3.0
power gcc 15.1.0
power gcc 15.2.0
power gcc 4.8.5
power64 AT12.0 (gcc8)
power64 AT13.0 (gcc9)
power64 gcc 11.2.0
power64 gcc 12.1.0
power64 gcc 12.2.0
power64 gcc 12.3.0
power64 gcc 12.4.0
power64 gcc 12.5.0
power64 gcc 13.1.0
power64 gcc 13.2.0
power64 gcc 13.3.0
power64 gcc 13.4.0
power64 gcc 14.1.0
power64 gcc 14.2.0
power64 gcc 14.3.0
power64 gcc 15.1.0
power64 gcc 15.2.0
power64 gcc trunk
power64le AT12.0 (gcc8)
power64le AT13.0 (gcc9)
power64le clang (trunk)
power64le gcc 11.2.0
power64le gcc 12.1.0
power64le gcc 12.2.0
power64le gcc 12.3.0
power64le gcc 12.4.0
power64le gcc 12.5.0
power64le gcc 13.1.0
power64le gcc 13.2.0
power64le gcc 13.3.0
power64le gcc 13.4.0
power64le gcc 14.1.0
power64le gcc 14.2.0
power64le gcc 14.3.0
power64le gcc 15.1.0
power64le gcc 15.2.0
power64le gcc 6.3.0
power64le gcc trunk
powerpc64 clang (trunk)
qnx 8.0.0
s390x gcc 11.2.0
s390x gcc 12.1.0
s390x gcc 12.2.0
s390x gcc 12.3.0
s390x gcc 12.4.0
s390x gcc 12.5.0
s390x gcc 13.1.0
s390x gcc 13.2.0
s390x gcc 13.3.0
s390x gcc 13.4.0
s390x gcc 14.1.0
s390x gcc 14.2.0
s390x gcc 14.3.0
s390x gcc 15.1.0
s390x gcc 15.2.0
sh gcc 12.2.0
sh gcc 12.3.0
sh gcc 12.4.0
sh gcc 12.5.0
sh gcc 13.1.0
sh gcc 13.2.0
sh gcc 13.3.0
sh gcc 13.4.0
sh gcc 14.1.0
sh gcc 14.2.0
sh gcc 14.3.0
sh gcc 15.1.0
sh gcc 15.2.0
sh gcc 4.9.4
sh gcc 9.5.0
vast (trunk)
x64 msvc v19.0 (ex-WINE)
x64 msvc v19.10 (ex-WINE)
x64 msvc v19.14 (ex-WINE)
x64 msvc v19.20 VS16.0
x64 msvc v19.21 VS16.1
x64 msvc v19.22 VS16.2
x64 msvc v19.23 VS16.3
x64 msvc v19.24 VS16.4
x64 msvc v19.25 VS16.5
x64 msvc v19.27 VS16.7
x64 msvc v19.28 VS16.8
x64 msvc v19.28 VS16.9
x64 msvc v19.29 VS16.10
x64 msvc v19.29 VS16.11
x64 msvc v19.30 VS17.0
x64 msvc v19.31 VS17.1
x64 msvc v19.32 VS17.2
x64 msvc v19.33 VS17.3
x64 msvc v19.34 VS17.4
x64 msvc v19.35 VS17.5
x64 msvc v19.36 VS17.6
x64 msvc v19.37 VS17.7
x64 msvc v19.38 VS17.8
x64 msvc v19.39 VS17.9
x64 msvc v19.40 VS17.10
x64 msvc v19.41 VS17.11
x64 msvc v19.42 VS17.12
x64 msvc v19.43 VS17.13
x64 msvc v19.latest
x86 djgpp 4.9.4
x86 djgpp 5.5.0
x86 djgpp 6.4.0
x86 djgpp 7.2.0
x86 msvc v19.0 (ex-WINE)
x86 msvc v19.10 (ex-WINE)
x86 msvc v19.14 (ex-WINE)
x86 msvc v19.20 VS16.0
x86 msvc v19.21 VS16.1
x86 msvc v19.22 VS16.2
x86 msvc v19.23 VS16.3
x86 msvc v19.24 VS16.4
x86 msvc v19.25 VS16.5
x86 msvc v19.27 VS16.7
x86 msvc v19.28 VS16.8
x86 msvc v19.28 VS16.9
x86 msvc v19.29 VS16.10
x86 msvc v19.29 VS16.11
x86 msvc v19.30 VS17.0
x86 msvc v19.31 VS17.1
x86 msvc v19.32 VS17.2
x86 msvc v19.33 VS17.3
x86 msvc v19.34 VS17.4
x86 msvc v19.35 VS17.5
x86 msvc v19.36 VS17.6
x86 msvc v19.37 VS17.7
x86 msvc v19.38 VS17.8
x86 msvc v19.39 VS17.9
x86 msvc v19.40 VS17.10
x86 msvc v19.41 VS17.11
x86 msvc v19.42 VS17.12
x86 msvc v19.43 VS17.13
x86 msvc v19.latest
x86 nvc++ 22.11
x86 nvc++ 22.7
x86 nvc++ 22.9
x86 nvc++ 23.1
x86 nvc++ 23.11
x86 nvc++ 23.3
x86 nvc++ 23.5
x86 nvc++ 23.7
x86 nvc++ 23.9
x86 nvc++ 24.1
x86 nvc++ 24.11
x86 nvc++ 24.3
x86 nvc++ 24.5
x86 nvc++ 24.7
x86 nvc++ 24.9
x86 nvc++ 25.1
x86 nvc++ 25.3
x86 nvc++ 25.5
x86 nvc++ 25.7
x86-64 Zapcc 190308
x86-64 clang (-fimplicit-constexpr)
x86-64 clang (Chris Bazley N3089)
x86-64 clang (EricWF contracts)
x86-64 clang (amd-staging)
x86-64 clang (assertions trunk)
x86-64 clang (clangir)
x86-64 clang (experimental -Wlifetime)
x86-64 clang (experimental P1061)
x86-64 clang (experimental P1144)
x86-64 clang (experimental P1221)
x86-64 clang (experimental P2998)
x86-64 clang (experimental P3068)
x86-64 clang (experimental P3309)
x86-64 clang (experimental P3367)
x86-64 clang (experimental P3372)
x86-64 clang (experimental P3385)
x86-64 clang (experimental P3776)
x86-64 clang (experimental metaprogramming - P2632)
x86-64 clang (old concepts branch)
x86-64 clang (p1974)
x86-64 clang (pattern matching - P2688)
x86-64 clang (reflection - C++26)
x86-64 clang (reflection - TS)
x86-64 clang (resugar)
x86-64 clang (string interpolation - P3412)
x86-64 clang (thephd.dev)
x86-64 clang (trunk)
x86-64 clang (variadic friends - P2893)
x86-64 clang (widberg)
x86-64 clang 10.0.0
x86-64 clang 10.0.0 (assertions)
x86-64 clang 10.0.1
x86-64 clang 11.0.0
x86-64 clang 11.0.0 (assertions)
x86-64 clang 11.0.1
x86-64 clang 12.0.0
x86-64 clang 12.0.0 (assertions)
x86-64 clang 12.0.1
x86-64 clang 13.0.0
x86-64 clang 13.0.0 (assertions)
x86-64 clang 13.0.1
x86-64 clang 14.0.0
x86-64 clang 14.0.0 (assertions)
x86-64 clang 15.0.0
x86-64 clang 15.0.0 (assertions)
x86-64 clang 16.0.0
x86-64 clang 16.0.0 (assertions)
x86-64 clang 17.0.1
x86-64 clang 17.0.1 (assertions)
x86-64 clang 18.1.0
x86-64 clang 18.1.0 (assertions)
x86-64 clang 19.1.0
x86-64 clang 19.1.0 (assertions)
x86-64 clang 2.6.0 (assertions)
x86-64 clang 2.7.0 (assertions)
x86-64 clang 2.8.0 (assertions)
x86-64 clang 2.9.0 (assertions)
x86-64 clang 20.1.0
x86-64 clang 20.1.0 (assertions)
x86-64 clang 21.1.0
x86-64 clang 21.1.0 (assertions)
x86-64 clang 3.0.0
x86-64 clang 3.0.0 (assertions)
x86-64 clang 3.1
x86-64 clang 3.1 (assertions)
x86-64 clang 3.2
x86-64 clang 3.2 (assertions)
x86-64 clang 3.3
x86-64 clang 3.3 (assertions)
x86-64 clang 3.4 (assertions)
x86-64 clang 3.4.1
x86-64 clang 3.5
x86-64 clang 3.5 (assertions)
x86-64 clang 3.5.1
x86-64 clang 3.5.2
x86-64 clang 3.6
x86-64 clang 3.6 (assertions)
x86-64 clang 3.7
x86-64 clang 3.7 (assertions)
x86-64 clang 3.7.1
x86-64 clang 3.8
x86-64 clang 3.8 (assertions)
x86-64 clang 3.8.1
x86-64 clang 3.9.0
x86-64 clang 3.9.0 (assertions)
x86-64 clang 3.9.1
x86-64 clang 4.0.0
x86-64 clang 4.0.0 (assertions)
x86-64 clang 4.0.1
x86-64 clang 5.0.0
x86-64 clang 5.0.0 (assertions)
x86-64 clang 5.0.1
x86-64 clang 5.0.2
x86-64 clang 6.0.0
x86-64 clang 6.0.0 (assertions)
x86-64 clang 6.0.1
x86-64 clang 7.0.0
x86-64 clang 7.0.0 (assertions)
x86-64 clang 7.0.1
x86-64 clang 7.1.0
x86-64 clang 8.0.0
x86-64 clang 8.0.0 (assertions)
x86-64 clang 8.0.1
x86-64 clang 9.0.0
x86-64 clang 9.0.0 (assertions)
x86-64 clang 9.0.1
x86-64 clang rocm-4.5.2
x86-64 clang rocm-5.0.2
x86-64 clang rocm-5.1.3
x86-64 clang rocm-5.2.3
x86-64 clang rocm-5.3.3
x86-64 clang rocm-5.7.0
x86-64 clang rocm-6.0.2
x86-64 clang rocm-6.1.2
x86-64 clang rocm-6.2.4
x86-64 clang rocm-6.3.3
x86-64 clang rocm-6.4.0
x86-64 gcc (P2034 lambdas)
x86-64 gcc (contract labels)
x86-64 gcc (contracts natural syntax)
x86-64 gcc (contracts)
x86-64 gcc (coroutines)
x86-64 gcc (modules)
x86-64 gcc (trunk)
x86-64 gcc 10.1
x86-64 gcc 10.2
x86-64 gcc 10.3
x86-64 gcc 10.3 (assertions)
x86-64 gcc 10.4
x86-64 gcc 10.4 (assertions)
x86-64 gcc 10.5
x86-64 gcc 10.5 (assertions)
x86-64 gcc 11.1
x86-64 gcc 11.1 (assertions)
x86-64 gcc 11.2
x86-64 gcc 11.2 (assertions)
x86-64 gcc 11.3
x86-64 gcc 11.3 (assertions)
x86-64 gcc 11.4
x86-64 gcc 11.4 (assertions)
x86-64 gcc 12.1
x86-64 gcc 12.1 (assertions)
x86-64 gcc 12.2
x86-64 gcc 12.2 (assertions)
x86-64 gcc 12.3
x86-64 gcc 12.3 (assertions)
x86-64 gcc 12.4
x86-64 gcc 12.4 (assertions)
x86-64 gcc 12.5
x86-64 gcc 12.5 (assertions)
x86-64 gcc 13.1
x86-64 gcc 13.1 (assertions)
x86-64 gcc 13.2
x86-64 gcc 13.2 (assertions)
x86-64 gcc 13.3
x86-64 gcc 13.3 (assertions)
x86-64 gcc 13.4
x86-64 gcc 13.4 (assertions)
x86-64 gcc 14.1
x86-64 gcc 14.1 (assertions)
x86-64 gcc 14.2
x86-64 gcc 14.2 (assertions)
x86-64 gcc 14.3
x86-64 gcc 14.3 (assertions)
x86-64 gcc 15.1
x86-64 gcc 15.1 (assertions)
x86-64 gcc 15.2
x86-64 gcc 15.2 (assertions)
x86-64 gcc 3.4.6
x86-64 gcc 4.0.4
x86-64 gcc 4.1.2
x86-64 gcc 4.4.7
x86-64 gcc 4.5.3
x86-64 gcc 4.6.4
x86-64 gcc 4.7.1
x86-64 gcc 4.7.2
x86-64 gcc 4.7.3
x86-64 gcc 4.7.4
x86-64 gcc 4.8.1
x86-64 gcc 4.8.2
x86-64 gcc 4.8.3
x86-64 gcc 4.8.4
x86-64 gcc 4.8.5
x86-64 gcc 4.9.0
x86-64 gcc 4.9.1
x86-64 gcc 4.9.2
x86-64 gcc 4.9.3
x86-64 gcc 4.9.4
x86-64 gcc 5.1
x86-64 gcc 5.2
x86-64 gcc 5.3
x86-64 gcc 5.4
x86-64 gcc 5.5
x86-64 gcc 6.1
x86-64 gcc 6.2
x86-64 gcc 6.3
x86-64 gcc 6.4
x86-64 gcc 6.5
x86-64 gcc 7.1
x86-64 gcc 7.2
x86-64 gcc 7.3
x86-64 gcc 7.4
x86-64 gcc 7.5
x86-64 gcc 8.1
x86-64 gcc 8.2
x86-64 gcc 8.3
x86-64 gcc 8.4
x86-64 gcc 8.5
x86-64 gcc 9.1
x86-64 gcc 9.2
x86-64 gcc 9.3
x86-64 gcc 9.4
x86-64 gcc 9.5
x86-64 icc 13.0.1
x86-64 icc 16.0.3
x86-64 icc 17.0.0
x86-64 icc 18.0.0
x86-64 icc 19.0.0
x86-64 icc 19.0.1
x86-64 icc 2021.1.2
x86-64 icc 2021.10.0
x86-64 icc 2021.2.0
x86-64 icc 2021.3.0
x86-64 icc 2021.4.0
x86-64 icc 2021.5.0
x86-64 icc 2021.6.0
x86-64 icc 2021.7.0
x86-64 icc 2021.7.1
x86-64 icc 2021.8.0
x86-64 icc 2021.9.0
x86-64 icx 2021.1.2
x86-64 icx 2021.2.0
x86-64 icx 2021.3.0
x86-64 icx 2021.4.0
x86-64 icx 2022.0.0
x86-64 icx 2022.1.0
x86-64 icx 2022.2.0
x86-64 icx 2022.2.1
x86-64 icx 2023.0.0
x86-64 icx 2023.1.0
x86-64 icx 2023.2.1
x86-64 icx 2024.0.0
x86-64 icx 2024.1.0
x86-64 icx 2024.2.0
x86-64 icx 2024.2.1
x86-64 icx 2025.0.0
x86-64 icx 2025.0.1
x86-64 icx 2025.0.3
x86-64 icx 2025.0.4
x86-64 icx 2025.1.0
x86-64 icx 2025.1.1
x86-64 icx 2025.2.0
x86-64 icx 2025.2.1
x86-64 icx 2025.2.1
z180-clang 15.0.0
z180-clang 15.0.7
z80-clang 15.0.0
z80-clang 15.0.7
zig c++ 0.10.0
zig c++ 0.11.0
zig c++ 0.12.0
zig c++ 0.12.1
zig c++ 0.13.0
zig c++ 0.14.0
zig c++ 0.14.1
zig c++ 0.15.1
zig c++ 0.6.0
zig c++ 0.7.0
zig c++ 0.7.1
zig c++ 0.8.0
zig c++ 0.9.0
zig c++ trunk
Options
Source code
/* * FastCollisionDetectionLib.h * * Created on: Mar 11, 2022 * Author: tugrul */ /* * Generator.h * * Created on: Mar 8, 2022 * Author: tugrul */ #ifndef GENERATOR_H_ #define GENERATOR_H_ #include<memory> namespace oofrng { template<int LANES=64> class Generator { public: Generator() { alignedSeedBuf = std::make_shared<AlignedSeedBuffer>(); } const uint32_t generate1() { return rnd(alignedSeedBuf->seed); } const uint32_t generate1(const uint32_t limit) { return rnd(alignedSeedBuf->seed,limit); } const float generate1Float() { return ((float)rnd(alignedSeedBuf->seed))*alignedSeedBuf->multiplier; } const float generate1Float(const float limit) { return ((float)rnd(alignedSeedBuf->seed))*alignedSeedBuf->multiplier*limit; } // fills array of length n with values between 0 and max(2^32-1) void generate(uint32_t * const __restrict__ out, const size_t n) { const size_t nL = n-n%LANES; for(size_t i=0;i<nL;i+=LANES) { rndL(alignedSeedBuf->ptrL,out+i); } for(size_t i=nL;i<n;i++) { out[i]=rnd(alignedSeedBuf->seed); } } // fills array of length n with values in range [0,limit) void generate(uint32_t * const __restrict__ out, const size_t n, const uint32_t limit) { const size_t nL = n-n%LANES; for(size_t i=0;i<nL;i+=LANES) { rndL(alignedSeedBuf->ptrL,out+i,limit); } for(size_t i=nL;i<n;i++) { out[i]=rnd(alignedSeedBuf->seed,limit); } } // generate [0,1) void generate(float * const __restrict__ out, const size_t n) { const size_t nL = n-n%LANES; for(size_t i=0;i<nL;i+=LANES) { rndL(alignedSeedBuf->ptrL,out+i); } for(size_t i=nL;i<n;i++) { out[i]=rnd(alignedSeedBuf->seed); } } // generate [0,limit) void generate(float * const __restrict__ out, const size_t n, const float limit) { const size_t nL = n-n%LANES; for(size_t i=0;i<nL;i+=LANES) { rndL(alignedSeedBuf->ptrL,out+i,limit); } for(size_t i=nL;i<n;i++) { out[i]=rnd(alignedSeedBuf->seed,limit); } } private: static uint32_t* computeAlignment(uint32_t* ptr) { uint32_t* ptrLTmp = ptr; while(((size_t)ptrLTmp)%4096 != 0) { ptrLTmp++; } return ptrLTmp; } class AlignedSeedBuffer { public: AlignedSeedBuffer():ptrL(computeAlignment(seedL)),uint32_tmax(((uint32_t)0)-1),multiplier(1.0/uint32_tmax) { for(size_t i=0;i<LANES;i++) { ptrL[i]=i+1; } seed=LANES+1; } uint32_t seedL[LANES+4096]; uint32_t seed; uint32_t* const __restrict__ ptrL; const uint32_t uint32_tmax; const float multiplier; }; std::shared_ptr<AlignedSeedBuffer> alignedSeedBuf; // generate random number in range [0,max) const uint32_t rnd(uint32_t& seed) { // Thomas Wang's invention seed = (seed ^ 61) ^ (seed >> 16); seed *= 9; seed = seed ^ (seed >> 4); seed *= 0x27d4eb2d; seed = seed ^ (seed >> 15); return seed; } // generate random number in range [0,limit) const uint32_t rnd(uint32_t& seed, const uint32_t limit) { // Thomas Wang's invention seed = (seed ^ 61) ^ (seed >> 16); seed *= 9; seed = seed ^ (seed >> 4); seed *= 0x27d4eb2d; seed = seed ^ (seed >> 15); return seed%limit; } // generate [0,max) inline void rndL(uint32_t * const __restrict__ seed, uint32_t * const __restrict__ out) { for(int i=0;i<LANES;i+=2) { const uint32_t sd = seed[i]; const uint32_t sd_ = seed[i+1]; const uint32_t sd2 = (sd ^ 61) ^ (sd >> 16); const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); const uint32_t sd3 = sd2*9; const uint32_t sd3_ = sd2_*9; const uint32_t sd4 = sd3 ^ (sd3 >> 4); const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); const uint32_t sd5 = sd4*0x27d4eb2d; const uint32_t sd5_ = sd4_*0x27d4eb2d; const uint32_t sd6 = sd5 ^ (sd5 >> 15); const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); out[i]=sd6; out[i+1]=sd6_; seed[i]=sd6; seed[i+1]=sd6_; } } // generate [0,limit) inline void rndL(uint32_t * const __restrict__ seed, uint32_t * const __restrict__ out, const uint32_t limit) { for(int i=0;i<LANES;i+=2) { const uint32_t sd = seed[i]; const uint32_t sd_ = seed[i+1]; const uint32_t sd2 = (sd ^ 61) ^ (sd >> 16); const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); const uint32_t sd3 = sd2*9; const uint32_t sd3_ = sd2_*9; const uint32_t sd4 = sd3 ^ (sd3 >> 4); const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); const uint32_t sd5 = sd4*0x27d4eb2d; const uint32_t sd5_ = sd4_*0x27d4eb2d; const uint32_t sd6 = sd5 ^ (sd5 >> 15); const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); out[i]=sd6%limit; out[i+1]=sd6_%limit; seed[i]=sd6; seed[i+1]=sd6_; } } // generate [0,1) inline void rndL(uint32_t * const __restrict__ seed, float * const __restrict__ out) { const float mult = alignedSeedBuf->multiplier; for(int i=0;i<LANES;i+=2) { const uint32_t sd = seed[i]; const uint32_t sd_ = seed[i+1]; const uint32_t sd2 = (sd ^ 61) ^ (sd >> 16); const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); const uint32_t sd3 = sd2*9; const uint32_t sd3_ = sd2_*9; const uint32_t sd4 = sd3 ^ (sd3 >> 4); const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); const uint32_t sd5 = sd4*0x27d4eb2d; const uint32_t sd5_ = sd4_*0x27d4eb2d; const uint32_t sd6 = sd5 ^ (sd5 >> 15); const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); const float sd7 = sd6*mult; const float sd7_ = sd6_*mult; out[i]=sd7; out[i+1]=sd7_; seed[i]=sd6; seed[i+1]=sd6_; } } // generate [0,limit) inline void rndL(uint32_t * const __restrict__ seed, float * const __restrict__ out, const float limit) { const float mult = alignedSeedBuf->multiplier*limit; for(int i=0;i<LANES;i+=2) { const uint32_t sd = seed[i]; const uint32_t sd_ = seed[i+1]; const uint32_t sd2 = (sd ^ 61) ^ (sd >> 16); const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); const uint32_t sd3 = sd2*9; const uint32_t sd3_ = sd2_*9; const uint32_t sd4 = sd3 ^ (sd3 >> 4); const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); const uint32_t sd5 = sd4*0x27d4eb2d; const uint32_t sd5_ = sd4_*0x27d4eb2d; const uint32_t sd6 = sd5 ^ (sd5 >> 15); const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); const float sd7 = sd6*mult; const float sd7_ = sd6_*mult; out[i]=sd7; out[i+1]=sd7_; seed[i]=sd6; seed[i+1]=sd6_; } } }; } #endif /* GENERATOR_H_ */ /* * FastCollisionDetectionLib.h * * Created on: Mar 11, 2022 * Author: tugrul */ #ifndef FASTCOLLISIONDETECTIONLIB_H_ #define FASTCOLLISIONDETECTIONLIB_H_ #include<algorithm> #include<vector> #include<map> #include<unordered_map> #include<chrono> #include<memory> #include<math.h> #include<queue> #include<stack> #include<thread> #include<mutex> #include<set> #include<functional> #include<condition_variable> #include<unordered_set> #include<cmath> #include<iostream> namespace FastColDetLib { inline const int intersectDim(const float minx, const float maxx, const float minx2, const float maxx2) noexcept { return !((maxx < minx2) || (maxx2 < minx)); } /* * interface to build various objects that can collide each other * */ template<typename CoordType> class IParticle { public: virtual const CoordType getMaxX()const =0; virtual const CoordType getMaxY()const =0; virtual const CoordType getMaxZ()const =0; virtual const CoordType getMinX()const =0; virtual const CoordType getMinY()const =0; virtual const CoordType getMinZ()const =0; virtual const int getId()const =0; const bool intersectX(IParticle<CoordType>* p) { return !((getMaxX() < p->getMinX()) || (p->getMaxX() < getMinX())); } const bool intersectY(IParticle<CoordType>* p) { return !((getMaxY() < p->getMinY()) || (p->getMaxY() < getMinY())); } const bool intersectZ(IParticle<CoordType>* p) { return !((getMaxZ() < p->getMinZ()) || (p->getMaxZ() < getMinZ())); } virtual ~IParticle(){}; }; class Bench { public: Bench(size_t * targetPtr) { target=targetPtr; t1 = std::chrono::duration_cast< std::chrono::nanoseconds >(std::chrono::high_resolution_clock::now().time_since_epoch()); } ~Bench() { t2 = std::chrono::duration_cast< std::chrono::nanoseconds >(std::chrono::high_resolution_clock::now().time_since_epoch()); *target= t2.count() - t1.count(); } private: size_t * target; std::chrono::nanoseconds t1,t2; }; // keeps record of unique values inserted // works for positive integers (-1 reserved for first comparisons) template<typename SignedIntegralType, int n> struct FastUnique { public: FastUnique() { it=0; for(int i=0;i<n;i++) dict[i]=-1; } inline void reset() { it=0; for(int i=0;i<n;i++) dict[i]=-1; } inline void insert(const SignedIntegralType val) { const bool result = testImpl(val); dict[it]=(result?val:dict[it]); it+=(result?1:0); } inline const SignedIntegralType get(const int index) const noexcept { return dict[index]; } inline const bool test(const SignedIntegralType val) noexcept { return testImpl(val); } inline const void iterateSet(const SignedIntegralType val) noexcept { dict[it++]=val; } const int size() { return it; } SignedIntegralType * begin() { return dict; } SignedIntegralType * end() { return dict + it; } private: alignas(32) SignedIntegralType dict[n]; alignas(32) SignedIntegralType c[n]; int it; inline bool testImpl(const int val) noexcept { for(int i=0;i<n;i++) c[i]=(dict[i]==val); SignedIntegralType s = 0; for(int i=0;i<n;i++) s+=c[i]; return s==0; } }; template<typename DataType> class Memory { public: Memory() { memory=std::make_shared<std::vector<DataType>>(); allocPtr=std::make_shared<int>(); *allocPtr = 0; allocPtrPtr=allocPtr.get(); memory->resize(1024); ptr=memory->data(); } inline DataType * getPtr(const int index) const noexcept { return ptr+index; } inline DataType& getRef(const int index) const noexcept { return ((DataType* __restrict__ const)ptr)[index]; } inline const DataType get(const int index) const noexcept { return ((DataType* __restrict__ const)ptr)[index]; } inline void set(const int index, const DataType data) const noexcept { ((DataType* __restrict__ const)ptr)[index]=data; } inline void readFrom(Memory<DataType>& mem, const int index, const int indexThis, const int n) { std::copy(mem.ptr+index,mem.ptr+index+n,ptr+indexThis); } inline void writeTo(std::vector<DataType>& vec) { std::copy(ptr,ptr+*allocPtrPtr,vec.data()); } inline const int allocate(const int size) { const int result = *allocPtrPtr; while(size + *allocPtrPtr >= memory->size()) { memory->resize(memory->size()*2); } *allocPtrPtr += size; ptr=memory->data(); return result; } inline const int capacity() { return memory->size(); } inline const int size() { return *allocPtrPtr; } inline void reset() { *allocPtrPtr = 0; } private: DataType* ptr; std::shared_ptr<int> allocPtr; int* allocPtrPtr; std::shared_ptr<std::vector<DataType>> memory; }; constexpr int testParticleLimit = 128; // maximum particle AABB overlapping allowed on same cell constexpr int testUniqueLimit = 32; // maximum unique numbers for accumulation (equal to or less than testParticleLimit) struct MemoryPool { void clear() { nodeCollisionMask.reset(); childNodeCount.reset(); index.reset(); indexParticle.reset(); orderParticle.reset(); minX.reset(); maxX.reset(); minY.reset(); maxY.reset(); minZ.reset(); maxZ.reset(); nodeMinX.reset(); nodeMinY.reset(); nodeMinZ.reset(); nodeInvWidth.reset(); nodeInvHeight.reset(); nodeInvDepth.reset(); leafOffset.reset(); } // node-particle collision Memory<uint64_t> nodeCollisionMask; Memory<char> childNodeCount; Memory<int> index; Memory<int> indexParticle; Memory<int> orderParticle; Memory<float> nodeMinX; Memory<float> nodeMinY; Memory<float> nodeMinZ; Memory<float> nodeInvWidth; Memory<float> nodeInvHeight; Memory<float> nodeInvDepth; Memory<float> minX; Memory<float> maxX; Memory<float> minY; Memory<float> maxY; Memory<float> minZ; Memory<float> maxZ; Memory<int> idTmp[64]; Memory<int> orderTmp[64]; Memory<std::pair<int,int>> allPairsColl; Memory<FastUnique<int32_t, testUniqueLimit>> allPairsCollmapping; Memory<int> leafOffset; }; struct AdaptiveGridV2Fields { AdaptiveGridV2Fields(MemoryPool mPool, const float minx, const float miny, const float minz, const float maxx, const float maxy, const float maxz):mem(mPool), minCornerX(minx),minCornerY(miny),minCornerZ(minz),maxCornerX(maxx),maxCornerY(maxy),maxCornerZ(maxz), cellWidth ((maxx-minx)*0.25f), cellHeight ((maxy-miny)*0.25f), cellDepth ((maxz-minz)*0.25f), cellWidthInv (1.0f/((maxx-minx)*0.25f)), cellHeightInv(1.0f/((maxy-miny)*0.25f)), cellDepthInv (1.0f/((maxz-minz)*0.25f)) { } MemoryPool mem; const float minCornerX; const float minCornerY; const float minCornerZ; const float maxCornerX; const float maxCornerY; const float maxCornerZ; const float cellWidth; const float cellHeight; const float cellDepth; const float cellWidthInv; const float cellHeightInv; const float cellDepthInv; }; class AdaptiveGridV2 { private: // stores a bit in a byte at a position inline void storeBit(uint64_t & data, const uint64_t value, const int pos) noexcept { data = (value << pos) | (data & ~(((uint64_t)1) << pos)); } void comp4vs4( const int * const __restrict__ partId1, const int * const __restrict__ partId2, const float * const __restrict__ minx1, const float * const __restrict__ minx2, const float * const __restrict__ miny1, const float * const __restrict__ miny2, const float * const __restrict__ minz1, const float * const __restrict__ minz2, const float * const __restrict__ maxx1, const float * const __restrict__ maxx2, const float * const __restrict__ maxy1, const float * const __restrict__ maxy2, const float * const __restrict__ maxz1, const float * const __restrict__ maxz2, int * const __restrict__ out ) { alignas(32) int result[16]={ // 0v0 0v1 0v2 0v3 // 1v0 1v1 1v2 1v3 // 2v0 2v1 2v2 2v3 // 3v0 3v1 3v2 3v3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; for(int i=0;i<16;i++) result[i] = partId1[i&3]<partId2[i/4]; for(int i=0;i<16;i++) result[i] = result[i] && intersectDim(minx1[i&3], maxx1[i&3], minx2[i/4], maxx2[i/4]) && intersectDim(miny1[i&3], maxy1[i&3], miny2[i/4], maxy2[i/4]) && intersectDim(minz1[i&3], maxz1[i&3], minz2[i/4], maxz2[i/4]); for(int i=0;i<16;i++) out[i]=result[i]; }; public: AdaptiveGridV2(MemoryPool mem, const float minx, const float miny, const float minz, const float maxx, const float maxy, const float maxz) { fields = std::make_shared<AdaptiveGridV2Fields>(mem,minx,miny,minz,maxx,maxy,maxz); } void clear() { fields->mem.clear(); // set current (root) node's particle start index to 0 const int indexParticleStart = fields->mem.index.allocate(1); fields->mem.index.set(indexParticleStart,0); // set current (root) node's number of particles to 0 const int indexNumParticles = fields->mem.index.allocate(1); fields->mem.index.set(indexNumParticles,0); // set current (root) node's child node start const int indexChildNodeStart = fields->mem.index.allocate(1); fields->mem.index.set(indexChildNodeStart,3); // set AABB of current (root) node // X const int indexBoundMinXFloat = fields->mem.nodeMinX.allocate(1); fields->mem.nodeMinX.set(indexBoundMinXFloat,fields->minCornerX); // Y const int indexBoundMinYFloat = fields->mem.nodeMinY.allocate(1); fields->mem.nodeMinY.set(indexBoundMinYFloat,fields->minCornerY); // Z const int indexBoundMinZFloat = fields->mem.nodeMinZ.allocate(1); fields->mem.nodeMinZ.set(indexBoundMinZFloat,fields->minCornerZ); // cell inverse width const int indexWidthFloat = fields->mem.nodeInvWidth.allocate(1); fields->mem.nodeInvWidth.set(indexWidthFloat,fields->cellWidthInv); // cell inverse height const int indexHeightFloat = fields->mem.nodeInvHeight.allocate(1); fields->mem.nodeInvHeight.set(indexHeightFloat,fields->cellHeightInv); // cell inverse depth const int indexDepthFloat = fields->mem.nodeInvDepth.allocate(1); fields->mem.nodeInvDepth.set(indexDepthFloat,fields->cellDepthInv); fields->mem.childNodeCount.set(fields->mem.childNodeCount.allocate(1),0); fields->mem.nodeCollisionMask.set(fields->mem.nodeCollisionMask.allocate(1),0); } template<typename Derived> inline void addParticles(const int numParticlesToAdd, Derived * const __restrict__ particles) { const int pId = fields->mem.indexParticle.allocate(numParticlesToAdd); const int oId = fields->mem.orderParticle.allocate(numParticlesToAdd); const int maxXId = fields->mem.maxX.allocate(numParticlesToAdd); const int maxYId = fields->mem.maxY.allocate(numParticlesToAdd); const int maxZId = fields->mem.maxZ.allocate(numParticlesToAdd); const int minXId = fields->mem.minX.allocate(numParticlesToAdd); const int minYId = fields->mem.minY.allocate(numParticlesToAdd); const int minZId = fields->mem.minZ.allocate(numParticlesToAdd); fields->mem.index.set(1,fields->mem.index.get(1)+numParticlesToAdd); for(int i=0;i<numParticlesToAdd;i++) { const IParticle<float> * const curPtr = static_cast<const IParticle<float>* const>(particles+i); fields->mem.indexParticle.set(pId+i,curPtr->getId()); fields->mem.orderParticle.set(oId+i,oId+i); fields->mem.maxX.set(maxXId+i,curPtr->getMaxX()); fields->mem.maxY.set(maxYId+i,curPtr->getMaxY()); fields->mem.maxZ.set(maxZId+i,curPtr->getMaxZ()); fields->mem.minX.set(minXId+i,curPtr->getMinX()); fields->mem.minY.set(minYId+i,curPtr->getMinY()); fields->mem.minZ.set(minZId+i,curPtr->getMinZ()); } } struct NodeTask { NodeTask(const int n1=0):nodePointer(n1){ } const int nodePointer; }; struct LeafTask { LeafTask(const int n1=0):particlePointer(n1){ } int particlePointer; }; // returns id values of particles std::vector<int> findCollisions(const float minx, const float miny, const float minz, const float maxx, const float maxy, const float maxz) { FastUnique<int32_t, testUniqueLimit> fastSet; std::vector<int> result; std::stack<NodeTask> nodesToCompute; std::vector<LeafTask> particlesToCompute; // push root node to work queue nodesToCompute.push(NodeTask(0)); // traverse all colliding sparse cells while(!nodesToCompute.empty() /* stack>=0 */) { NodeTask task = nodesToCompute.top(); nodesToCompute.pop(); const int pointer = fields->mem.index.get(task.nodePointer+2); const int npdiv3 = task.nodePointer/3; const int numChildNodes = fields->mem.childNodeCount.get(npdiv3); // if this is not a leaf node, traverse all child nodes (they are sparse, so may be less than 8(8bit mask) or 64(64 bit mask)) if(pointer<0) { // get current node's information const float minCornerX = fields->mem.nodeMinX.get(npdiv3); const float minCornerY = fields->mem.nodeMinY.get(npdiv3); const float minCornerZ = fields->mem.nodeMinZ.get(npdiv3); const float cellWidthInv = fields->mem.nodeInvWidth.get(npdiv3); const float cellHeightInv = fields->mem.nodeInvHeight.get(npdiv3); const float cellDepthInv = fields->mem.nodeInvDepth.get(npdiv3); const int indexStartX = std::floor((minx - minCornerX)*cellWidthInv); const int indexEndX = std::floor((maxx - minCornerX)*cellWidthInv); const int indexStartY = std::floor((miny - minCornerY)*cellHeightInv); const int indexEndY = std::floor((maxy - minCornerY)*cellHeightInv); const int indexStartZ = std::floor((minz - minCornerZ)*cellDepthInv); const int indexEndZ = std::floor((maxz - minCornerZ)*cellDepthInv); // prepare cell indicator mask (1 bit = has object, 0 bit = empty)) uint64_t maskCellsFilled=0; for(int k=indexStartZ; k<=indexEndZ; k++) { if(k<0 || k>=4) continue; for(int j=indexStartY; j<=indexEndY; j++) { if(j<0 || j>=4) continue; for(int i=indexStartX; i<=indexEndX; i++) { if(i<0 || i>=4) continue; storeBit(maskCellsFilled,1,i+j*4+k*16); } } } const int nodeOffset = -pointer-1; for(int i=0;i<numChildNodes;i++) { // if there is possible collision (accelerated by bit mask for collisions) uint64_t cellMask = fields->mem.nodeCollisionMask.get((nodeOffset+i*3)/3); if(maskCellsFilled & cellMask) { nodesToCompute.push(NodeTask(nodeOffset+i*3)); } } } else { // this is leaf node const int ptr = fields->mem.index.get(task.nodePointer); const int n = fields->mem.index.get(task.nodePointer+1); for(int i=0;i<n;i++) { const int index = ptr+i; { particlesToCompute.push_back(LeafTask(index)); } } } } const int sz = particlesToCompute.size(); //f(particlesToCompute.data(),sz); for(int i=0;i<sz;i++) { const int index = particlesToCompute[i].particlePointer; const int orderId = fields->mem.orderParticle.get(index); const int partId = fields->mem.indexParticle.get(orderId); if(fastSet.test(partId)) { const float minX = fields->mem.minX.get(orderId); const float maxX = fields->mem.maxX.get(orderId); if(intersectDim(minx, maxx, minX, maxX)) { const float minY = fields->mem.minY.get(orderId); const float maxY = fields->mem.maxY.get(orderId); if(intersectDim(miny, maxy, minY, maxY)) { const float minZ = fields->mem.minZ.get(orderId); const float maxZ = fields->mem.maxZ.get(orderId); if(intersectDim(minz, maxz, minZ, maxZ)) { fastSet.iterateSet(partId); } } } } } const int fsz = fastSet.size(); for(int i=0;i<fsz;i++) { result.push_back(fastSet.get(i)); } return result; } std::vector<std::pair<int,int>> findCollisionsAll() { const int resetN = fields->mem.indexParticle.size(); fields->mem.allPairsCollmapping.allocate(resetN); for(int i=0;i<resetN;i++) { fields->mem.allPairsCollmapping.getRef(i).reset(); } fields->mem.allPairsCollmapping.reset(); fields->mem.allPairsColl.reset(); std::vector<std::pair<int,int>> result; const int numLeaf = fields->mem.leafOffset.size(); for(int leaf=0;leaf<numLeaf;leaf++) { { const int leafOfs = fields->mem.leafOffset.get(leaf); const int ptr = fields->mem.index.get(leafOfs); const int n = fields->mem.index.get(leafOfs+1); alignas(32) int index[testParticleLimit]; alignas(32) int orderId[testParticleLimit]; alignas(32) int partId[testParticleLimit]; alignas(32) float minx[testParticleLimit]; alignas(32) float miny[testParticleLimit]; alignas(32) float minz[testParticleLimit]; alignas(32) float maxx[testParticleLimit]; alignas(32) float maxy[testParticleLimit]; alignas(32) float maxz[testParticleLimit]; constexpr int simd = 4; constexpr int simd1 = simd-1; const int n8 = n-(n&simd1); for(int i=0;i<n8;i+=simd) { for(int j=0;j<simd;j++) index[i+j] = ptr + i + j; for(int j=0;j<simd;j++) orderId[i+j] = fields->mem.orderParticle.get(index[i+j]); for(int j=0;j<simd;j++) partId[i+j] = fields->mem.indexParticle.get(orderId[i+j]); for(int j=0;j<simd;j++) minx[i+j] = fields->mem.minX.get(orderId[i+j]); for(int j=0;j<simd;j++) miny[i+j] = fields->mem.minY.get(orderId[i+j]); for(int j=0;j<simd;j++) minz[i+j] = fields->mem.minZ.get(orderId[i+j]); for(int j=0;j<simd;j++) maxx[i+j] = fields->mem.maxX.get(orderId[i+j]); for(int j=0;j<simd;j++) maxy[i+j] = fields->mem.maxY.get(orderId[i+j]); for(int j=0;j<simd;j++) maxz[i+j] = fields->mem.maxZ.get(orderId[i+j]); } for(int i=n8;i<n;i++) { index[i] = ptr + i; orderId[i] = fields->mem.orderParticle.get(index[i]); partId[i] = fields->mem.indexParticle.get(orderId[i]); minx[i] = fields->mem.minX.get(orderId[i]); miny[i] = fields->mem.minY.get(orderId[i]); minz[i] = fields->mem.minZ.get(orderId[i]); maxx[i] = fields->mem.maxX.get(orderId[i]); maxy[i] = fields->mem.maxY.get(orderId[i]); maxz[i] = fields->mem.maxZ.get(orderId[i]); } for(int i=n;i<testParticleLimit;i++) { index[i] = -1; orderId[i] = -1; partId[i] = -1; minx[i] = 1000000000000000000.0f; miny[i] = 1000000000000000000.0f; minz[i] = 1000000000000000000.0f; maxx[i] = 1000000000000000000.0f; maxy[i] = 1000000000000000000.0f; maxz[i] = 1000000000000000000.0f; } // SIMD computation (tiled computing) { alignas(32) int out[16]={ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; for(int i=0;i<testParticleLimit;i+=simd) { if(i>=n) break; FastUnique<int32_t, testUniqueLimit> * map[simd] = { partId[i]>=0?fields->mem.allPairsCollmapping.getPtr(partId[i]):nullptr, partId[i+1]>=0?fields->mem.allPairsCollmapping.getPtr(partId[i+1]):nullptr, partId[i+2]>=0?fields->mem.allPairsCollmapping.getPtr(partId[i+2]):nullptr, partId[i+3]>=0?fields->mem.allPairsCollmapping.getPtr(partId[i+3]):nullptr }; for(int j=i;j<testParticleLimit;j+=simd) { if(j>=n) break; // 0v0, 0v1, 0v2, 0v3, // 1v0, 1v1, 1v2, 1v3, // 2v0, 2v1, 2v2, 2v3, // 3v0, 3v1, 3v2, 3v3, comp4vs4( partId+i, partId+j, minx+i, minx+j, miny+i, miny+j, minz+i, minz+j, maxx+i, maxx+j, maxy+i, maxy+j, maxz+i, maxz+j, out ); for(int k=0;k<16;k++) { const int k3 = k&3; const int id2 = j+(k>>2); if(out[k]) { if(map[k3]) map[k3]->insert(partId[id2]); } } } } } } } for(int i=0;i<resetN;i++) { FastUnique<int32_t, testUniqueLimit>& map = fields->mem.allPairsCollmapping.getRef(i); const int ms = map.size(); const int allocIdx = fields->mem.allPairsColl.allocate(ms); for(int j=0;j<ms;j++) { fields->mem.allPairsColl.set(allocIdx+j,std::pair<int,int>(fields->mem.indexParticle.get(i),fields->mem.indexParticle.get(map.get(j)))); } } result.resize(fields->mem.allPairsColl.size()); fields->mem.allPairsColl.writeTo(result); return result; } void buildTree() { int particleStart = fields->mem.index.get(0); int numParticle = fields->mem.index.get(1); int nodeOffset = 0; float minCornerX = fields->mem.nodeMinX.get(0); float minCornerY = fields->mem.nodeMinY.get(0); float minCornerZ = fields->mem.nodeMinZ.get(0); float cellWidthInv = fields->mem.nodeInvWidth.get(0); float cellHeightInv = fields->mem.nodeInvHeight.get(0); float cellDepthInv = fields->mem.nodeInvDepth.get(0); float cellWidth = 1.0f/cellWidthInv; float cellHeight = 1.0f/cellHeightInv; float cellDepth = 1.0f/cellDepthInv; int ctr=0; int maxNodeOffset = 3; while(nodeOffset <= maxNodeOffset) { ctr++; int ctrTmp[64]={0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0}; // if child node pointer not set up if(fields->mem.index.get(nodeOffset+2)<nodeOffset && fields->mem.index.get(fields->mem.index.get(nodeOffset+2)+2)>=0) { fields->mem.index.set(fields->mem.index.get(nodeOffset+2)+2,-(nodeOffset+1)); } int childNodeCount = 0; if(numParticle > testParticleLimit) { { for(int zz = 0; zz<4; zz++) for(int yy = 0; yy<4; yy++) for(int xx = 0; xx<4; xx++) { // allocate node const int index0 = xx+yy*4+zz*16; fields->mem.orderTmp[index0].reset(); fields->mem.orderTmp[index0].allocate(numParticle); } } { for(int ii=0;ii<numParticle;ii++) { const int orderParticle = fields->mem.orderParticle.get(particleStart+ii); const float& minx = fields->mem.minX.getRef(orderParticle); const float& miny = fields->mem.minY.getRef(orderParticle); const float& minz = fields->mem.minZ.getRef(orderParticle); const float& maxx = fields->mem.maxX.getRef(orderParticle); const float& maxy = fields->mem.maxY.getRef(orderParticle); const float& maxz = fields->mem.maxZ.getRef(orderParticle); const int indexStartX = std::floor((minx - minCornerX)*cellWidthInv); const int indexEndX = std::floor((maxx - minCornerX)*cellWidthInv); const int indexStartY = std::floor((miny - minCornerY)*cellHeightInv); const int indexEndY = std::floor((maxy - minCornerY)*cellHeightInv); const int indexStartZ = std::floor((minz -minCornerZ)*cellDepthInv); const int indexEndZ = std::floor((maxz - minCornerZ)*cellDepthInv); // prepare cell indicator mask (1 bit = has object, 0 bit = empty)) for(int k=indexStartZ; k<=indexEndZ; k++) { if(k<0 || k>=4) continue; for(int j=indexStartY; j<=indexEndY; j++) { if(j<0 || j>=4) continue; for(int i=indexStartX; i<=indexEndX; i++) { if(i<0 || i>=4) continue; const int index0 = i+j*4+k*16; fields->mem.orderTmp[index0].set(ctrTmp[index0],orderParticle); ctrTmp[index0]++; } } } } } // add all particles in order (from first child node to last child node) childNodeCount=0; for(int zz = 0; zz<4; zz++) for(int yy = 0; yy<4; yy++) for(int xx = 0; xx<4; xx++) { const int index0 = xx+yy*4+zz*16; const int sz = ctrTmp[index0]; if(sz>0) { childNodeCount++; const int nodeIndexOfs = fields->mem.index.allocate(3); const int particleStartCur = nodeIndexOfs; const int numParticleCur = nodeIndexOfs+1; const int childNodeStartCur = nodeIndexOfs+2; const int tmpIndex = fields->mem.childNodeCount.allocate(1); const int nodeBoundMinXFloat = fields->mem.nodeMinX.allocate(1); const int nodeBoundMinYFloat = fields->mem.nodeMinY.allocate(1); const int nodeBoundMinZFloat = fields->mem.nodeMinZ.allocate(1); const int nodeInvWidthFloat = fields->mem.nodeInvWidth.allocate(1); const int nodeInvHeightFloat = fields->mem.nodeInvHeight.allocate(1); const int nodeInvDepthFloat = fields->mem.nodeInvDepth.allocate(1); fields->mem.nodeMinX.set(nodeBoundMinXFloat,minCornerX+xx*cellWidth); fields->mem.nodeMinY.set(nodeBoundMinYFloat,minCornerY+yy*cellHeight); fields->mem.nodeMinZ.set(nodeBoundMinZFloat,minCornerZ+zz*cellDepth); fields->mem.nodeInvWidth.set(nodeInvWidthFloat,cellWidthInv*4.0f); fields->mem.nodeInvHeight.set(nodeInvHeightFloat,cellHeightInv*4.0f); fields->mem.nodeInvDepth.set(nodeInvDepthFloat,cellDepthInv*4.0f); const int nodeMaskIndex = fields->mem.nodeCollisionMask.allocate(1); uint64_t nodeMask = 0; storeBit(nodeMask,1,index0); fields->mem.nodeCollisionMask.set(nodeMaskIndex,nodeMask); //const int allocOffset = fields->mem.indexParticle.allocate(sz); const int allocOffset = fields->mem.orderParticle.allocate(sz); //fields->mem.indexParticle.readFrom(fields->mem.idTmp[index0],0,allocOffset,sz); fields->mem.orderParticle.readFrom(fields->mem.orderTmp[index0],0,allocOffset,sz); fields->mem.index.set(particleStartCur,allocOffset); fields->mem.index.set(numParticleCur,sz); fields->mem.index.set(childNodeStartCur,nodeOffset); maxNodeOffset=particleStartCur; } } fields->mem.childNodeCount.set(nodeOffset/3,childNodeCount); } else { fields->mem.childNodeCount.set(nodeOffset/3,0); const int idx = fields->mem.leafOffset.allocate(1); fields->mem.leafOffset.set(idx,nodeOffset); } nodeOffset += 3; numParticle=0; if(nodeOffset <= maxNodeOffset) { particleStart = fields->mem.index.get(nodeOffset); numParticle = fields->mem.index.get(nodeOffset+1); minCornerX = fields->mem.nodeMinX.get(nodeOffset/3); minCornerY = fields->mem.nodeMinY.get(nodeOffset/3); minCornerZ = fields->mem.nodeMinZ.get(nodeOffset/3); cellWidthInv = fields->mem.nodeInvWidth.get(nodeOffset/3); cellHeightInv = fields->mem.nodeInvHeight.get(nodeOffset/3); cellDepthInv = fields->mem.nodeInvDepth.get(nodeOffset/3); cellWidth = 1.0f/cellWidthInv; cellHeight = 1.0f/cellHeightInv; cellDepth = 1.0f/cellDepthInv; } } } private: std::shared_ptr<AdaptiveGridV2Fields> fields; }; template<typename CoordType> class CollisionPair { public: CollisionPair(IParticle<CoordType>* p1Prm=nullptr, IParticle<CoordType>* p2Prm=nullptr) { p1=p1Prm; p2=p2Prm; } IParticle<CoordType>* getParticle1() const { return p1; } IParticle<CoordType>* getParticle2() const { return p2; } private: IParticle<CoordType> * p1; IParticle<CoordType> * p2; }; template<typename CoordType> class AdaptiveGrid; using GridDataType = char; struct MutexWithoutFalseSharing { std::mutex mut; char padding[(64-sizeof(std::mutex))>0?(64-sizeof(std::mutex)):64]; }; // Fixed grid of cells (adaptive if a cell overflows) template<typename CoordType> class FixedGridFields { public: FixedGridFields(const int w, const int h, const int d, const int s, const CoordType minXp, const CoordType minYp, const CoordType minZp, const CoordType maxXp, const CoordType maxYp, const CoordType maxZp): width(w),height(h),depth(d),widthDiv1(CoordType(1)/w),heightDiv1(CoordType(1)/h),depthDiv1(CoordType(1)/d),storage(s), minX(minXp),minY(minYp),minZ(minZp),maxX(maxXp),maxY(maxYp),maxZ(maxZp) { } ~FixedGridFields() { } inline const int getWidth () const noexcept { return width;}; inline const int getHeight () const noexcept { return height;}; inline const int getDepth () const noexcept { return depth;}; inline const CoordType getWidthDiv1 () const noexcept { return widthDiv1;}; inline const CoordType getHeightDiv1 () const noexcept { return heightDiv1;}; inline const CoordType getDepthDiv1 () const noexcept { return depthDiv1;}; inline const int getStorage () const noexcept { return storage;}; std::vector<IParticle<CoordType>*> particles; std::vector<uint64_t> particlesCollisionMask; std::map<IParticle<CoordType>*,std::map<IParticle<CoordType>*,bool>> coll; std::map<IParticle<CoordType>*,std::map<IParticle<CoordType>*,bool>> mapping; std::vector<CollisionPair<CoordType>> result; const int width; const int height; const int depth; const CoordType widthDiv1; const CoordType heightDiv1; const CoordType depthDiv1; const int storage; const CoordType minX; const CoordType minY; const CoordType minZ; const CoordType maxX; const CoordType maxY; const CoordType maxZ; }; template<typename CoordType> struct Cmd { AdaptiveGrid<CoordType>* grid; std::mutex* mut; std::map<IParticle<CoordType>*,std::map<IParticle<CoordType>*,bool>>* mapping; bool* completed; }; template<typename T> class SyncQueue { public: SyncQueue(){} void push(T t) { std::unique_lock<std::mutex> lc(m); q.push(t); c.notify_one(); } void push2(T t) { std::unique_lock<std::mutex> lc(m); q.push(t); c.notify_all(); } T pop() { std::unique_lock<std::mutex> lc(m); while(q.empty()) { c.wait(lc); } T result = q.front(); q.pop(); return result; } private: std::queue<T> q; std::mutex m; std::condition_variable c; }; template<typename CoordType> class ThreadPoolFields { public: ThreadPoolFields() {ctr=0; } int ctr; std::vector<std::thread> worker; MutexWithoutFalseSharing mut[7]; std::vector<int> msg; std::vector<std::shared_ptr<SyncQueue<Cmd<CoordType>>>> q; ~ThreadPoolFields() { for(unsigned int i=0;i<worker.size();i++) { std::lock_guard<std::mutex> lg(mut[i].mut); msg[i]=0; Cmd<CoordType> cmd; cmd.grid=nullptr; q[i]->push2(cmd); } for(unsigned int i=0;i<worker.size();i++) { worker[i].join(); } } }; template<typename CoordType> class ThreadPool { public: ThreadPool() { fields=std::make_shared<ThreadPoolFields<CoordType>>(); for(int i=0;i<7;i++) { fields->q.push_back(std::make_shared<SyncQueue<Cmd<CoordType>>>()); fields->msg.push_back(1); } auto ptr = fields.get(); for(int i=0;i<7;i++) { fields->worker.push_back(std::thread( [&,i,ptr]() { auto fields = ptr; bool work = true; while(work) { { { std::lock_guard<std::mutex> lg(fields->mut[i].mut); work=(fields->msg[i]>0); } Cmd<CoordType> cmd = fields->q[i]->pop(); if(cmd.grid==nullptr) break; auto collisions = cmd.grid->getCollisions(); { std::lock_guard<std::mutex> lg(*cmd.mut); for(auto& c:collisions) { (*cmd.mapping)[c.getParticle1()][c.getParticle2()]=true; } *cmd.completed=true; } } } } )); } } void compute(Cmd<CoordType> cmd) { fields->q[fields->ctr++%7]->push(cmd); } private: std::shared_ptr<ThreadPoolFields<CoordType>> fields; }; template<typename CoordType> class AdaptiveGrid { protected: AdaptiveGrid(ThreadPool<CoordType> thr, int depthPrm, const CoordType minX, const CoordType minY, const CoordType minZ, const CoordType maxX, const CoordType maxY, const CoordType maxZ):thrPool(thr) { isLeaf = std::make_shared<bool>(); *isLeaf=true; depth = std::make_shared<int>(); *depth=depthPrm; if(*depth<10) fields = std::make_shared<FixedGridFields<CoordType>>(4,4,4,300,minX,minY,minZ,maxX,maxY,maxZ); else fields = std::make_shared<FixedGridFields<CoordType>>(4,4,4,400,minX,minY,minZ,maxX,maxY,maxZ); subGrid = std::make_shared<std::vector<AdaptiveGrid<CoordType>>>(); } // loads a bit from a 8-byte integer at a position inline uint64_t loadBitSizeT(const uint64_t & data, const int pos) noexcept { return (data>>pos)&1; } // stores a bit in a 8-byte integer at a position inline void storeBitSizeT(uint64_t & data, const uint64_t value, const int pos) noexcept { data = (value << pos) | (data & ~(((uint64_t)1) << pos)); } public: AdaptiveGrid(ThreadPool<CoordType> thr, const CoordType minX, const CoordType minY, const CoordType minZ, const CoordType maxX, const CoordType maxY, const CoordType maxZ):thrPool(thr) { isLeaf=std::make_shared<bool>(); *isLeaf = true; depth = std::make_shared<int>(); *depth=0; fields = std::make_shared<FixedGridFields<CoordType>>(4,4,4,100,minX,minY,minZ,maxX,maxY,maxZ); subGrid = std::make_shared<std::vector<AdaptiveGrid<CoordType>>>(); } AdaptiveGrid() { AdaptiveGrid(ThreadPool<CoordType>(),0,0,0,1,1,1); } void clear() { *isLeaf=true; subGrid->clear(); fields->particles.clear(); fields->particlesCollisionMask.clear(); } template<typename Derived> void add(Derived * particlesPrm, int n) { for(int i=0;i<n;i++) add(particlesPrm + i); } // add static particle object pointers to compute all-vs-all comparisons in an optimized way // the generated internal data is also used for static vs dynamic collision checking template<typename Derived> void add(Derived * particlesPrm) { const int w = fields->getWidth(); const int h = fields->getHeight(); const int d = fields->getDepth(); // grid const CoordType xDim = fields->maxX - fields->minX; const CoordType yDim = fields->maxY - fields->minY; const CoordType zDim = fields->maxZ - fields->minZ; // cell const CoordType stepX = xDim/w; const CoordType stepY = yDim/h; const CoordType stepZ = zDim/d; const int sto = fields->getStorage(); const int nPar = fields->particles.size(); // if current grid leaf is full, convert it to node with 4x4x4 leaves if(*isLeaf && (nPar == sto)) { *isLeaf = false; // create leaf nodes (4x4x4=64) subGrid->reserve(64); for(int zz = 0; zz<d; zz++) for(int yy = 0; yy<h; yy++) for(int xx = 0; xx<w; xx++) { AdaptiveGrid<CoordType> newGrid(thrPool,*depth+1,fields->minX+stepX*xx,fields->minY+stepY*yy,fields->minZ+stepZ*zz, fields->minX+(stepX)*(xx+1),fields->minY+(stepY)*(yy+1),fields->minZ+(stepZ)*(zz+1)); subGrid->push_back(newGrid); } for(int ii=0;ii<nPar;ii++) { // AABB box of particle const CoordType minx = fields->particles[ii]->getMinX(); const CoordType miny = fields->particles[ii]->getMinY(); const CoordType minz = fields->particles[ii]->getMinZ(); const CoordType maxx = fields->particles[ii]->getMaxX(); const CoordType maxy = fields->particles[ii]->getMaxY(); const CoordType maxz = fields->particles[ii]->getMaxZ(); const int cellIndexX = std::floor((minx - fields->minX)/stepX); const int cellIndexY = std::floor((miny - fields->minY)/stepY); const int cellIndexZ = std::floor((minz - fields->minZ)/stepZ); const int cellIndexX2 = std::floor((maxx - fields->minX)/stepX); const int cellIndexY2 = std::floor((maxy - fields->minY)/stepY); const int cellIndexZ2 = std::floor((maxz - fields->minZ)/stepZ); for(int zz = cellIndexZ; zz<=cellIndexZ2; zz++) for(int yy = cellIndexY; yy<=cellIndexY2; yy++) for(int xx = cellIndexX; xx<=cellIndexX2; xx++) { if(xx<0 || yy<0 || zz<0 || xx>=w || yy>=h || zz>=d) continue; // overlaps with subgrid, add to it (*subGrid)[xx+yy*4+zz*4*4].add(fields->particles[ii]); } } // clear unused particles fields->particles.clear(); fields->particlesCollisionMask.clear(); } { // AABB box of particle const CoordType minx = (particlesPrm)->getMinX(); const CoordType miny = (particlesPrm)->getMinY(); const CoordType minz = (particlesPrm)->getMinZ(); const CoordType maxx = (particlesPrm)->getMaxX(); const CoordType maxy = (particlesPrm)->getMaxY(); const CoordType maxz = (particlesPrm)->getMaxZ(); const int cellIndexX = std::floor((minx - fields->minX)/stepX); const int cellIndexY = std::floor((miny - fields->minY)/stepY); const int cellIndexZ = std::floor((minz - fields->minZ)/stepZ); const int cellIndexX2 = std::floor((maxx - fields->minX)/stepX); const int cellIndexY2 = std::floor((maxy - fields->minY)/stepY); const int cellIndexZ2 = std::floor((maxz - fields->minZ)/stepZ); uint64_t maskCellsFilled; // "gather" operations on neighbor cells should be cache-friendly for(int zz = cellIndexZ; zz<=cellIndexZ2; zz++) for(int yy = cellIndexY; yy<=cellIndexY2; yy++) for(int xx = cellIndexX; xx<=cellIndexX2; xx++) { if(xx<0 || yy<0 || zz<0 || xx>=w || yy>=h || zz>=d) continue; storeBitSizeT(maskCellsFilled,1,xx+yy*4+zz*4*4); if(!*isLeaf) { (*subGrid)[xx+yy*4+zz*4*4].add(particlesPrm); } } if(maskCellsFilled) { if(*isLeaf) { fields->particlesCollisionMask.push_back(maskCellsFilled); fields->particles.push_back(particlesPrm); } } } } inline const bool intersectDim(const CoordType minx, const CoordType maxx, const CoordType minx2, const CoordType maxx2) const noexcept { return !((maxx < minx2) || (maxx2 < minx)); } // compute collision between given particle and the already-prepared static object grid (after add(..) and getCollisions(..)) // also returns self-collisions if same particle was added as static particle before (by add(..)) // thread-safe std::vector<IParticle<CoordType>*> getDynamicCollisionListFor(IParticle<CoordType>* particle) { std::unordered_map<IParticle<CoordType>*,bool> result; const int n2 = fields->particles.size(); result.reserve(n2); // AABB box of particle const CoordType minx = particle->getMinX(); const CoordType miny = particle->getMinY(); const CoordType minz = particle->getMinZ(); const CoordType maxx = particle->getMaxX(); const CoordType maxy = particle->getMaxY(); const CoordType maxz = particle->getMaxZ(); const CoordType xDim = fields->maxX - fields->minX; const CoordType yDim = fields->maxY - fields->minY; const CoordType zDim = fields->maxZ - fields->minZ; const int w = fields->getWidth(); const int h = fields->getHeight(); const int d = fields->getDepth(); const CoordType stepX = xDim/w; const CoordType stepY = yDim/h; const CoordType stepZ = zDim/d; const int cellIndexX = std::floor((minx - fields->minX)/stepX); const int cellIndexY = std::floor((miny - fields->minY)/stepY); const int cellIndexZ = std::floor((minz - fields->minZ)/stepZ); const int cellIndexX2 = std::floor((maxx - fields->minX)/stepX); const int cellIndexY2 = std::floor((maxy - fields->minY)/stepY); const int cellIndexZ2 = std::floor((maxz - fields->minZ)/stepZ); uint64_t collisionMask=0; for(int zz = cellIndexZ; zz<=cellIndexZ2; zz++) for(int yy = cellIndexY; yy<=cellIndexY2; yy++) for(int xx = cellIndexX; xx<=cellIndexX2; xx++) { if(xx<0 || yy<0 || zz<0 || xx>=w || yy>=h || zz>=d) continue; // if selected cell is a cell // (if parent is leaf, then it is a cell) if(*isLeaf) { storeBitSizeT(collisionMask,1,xx+yy*4+zz*4*4); } else // if this is a grid { auto subResult = (*subGrid)[xx+yy*4+zz*4*4].getDynamicCollisionListFor(particle); for(auto& subr:subResult) { result.emplace(subr,true); } } } // if this is a leaf node for(int j=0;j<n2;j++) { if(result.find(fields->particles[j])==result.end()) if(fields->particlesCollisionMask[j] & collisionMask) { const CoordType minx2 = fields->particles[j]->getMinX(); const CoordType maxx2 = fields->particles[j]->getMaxX(); if(intersectDim(minx, maxx, minx2, maxx2)) { const CoordType miny2 = fields->particles[j]->getMinY(); const CoordType maxy2 = fields->particles[j]->getMaxY(); if(intersectDim(miny, maxy, miny2, maxy2)) { const CoordType minz2 = fields->particles[j]->getMinZ(); const CoordType maxz2 = fields->particles[j]->getMaxZ(); if(intersectDim(minz, maxz, minz2, maxz2)) { result.emplace(fields->particles[j],true); } } } } } std::vector<IParticle<CoordType>*> resultVec; for(auto& res:result) resultVec.push_back(res.first); return resultVec; } // returns collision pairs between static objects (and prepares internal data for future dynamic object collision checking), ordered std::vector<CollisionPair<CoordType>> getCollisions() { fields->mapping.clear(); fields->result.clear(); const int w = fields->getWidth(); const int h = fields->getHeight(); const int d = fields->getDepth(); // check neighbor cells for a collision of another AABB particle std::mutex mut; bool completed[64]; if((!*isLeaf) && *depth>0) { for(int i=0;i<64;i++) completed[i]=false; } int completedCtr = 0; // "gather" operations on neighbor cells should be cache-friendly if(!*isLeaf) { for(int zz = 0; zz<d; zz++) for(int yy = 0; yy<h; yy++) for(int xx = 0; xx<w; xx++) { // if at specific layer, enable threads if( ( *(subGrid->data()[xx+yy*4+zz*4*4].isLeaf) ) && *depth>0) { Cmd<CoordType> cmd; cmd.completed=&completed[completedCtr++]; cmd.mapping=&fields->mapping; cmd.mut=&mut; cmd.grid=subGrid->data()+(xx+yy*4+zz*4*4); thrPool.compute(cmd); } else { auto collisions = (subGrid->data()+(xx+yy*4+zz*4*4))->getCollisions(); { for(auto& c:collisions) { fields->mapping[c.getParticle1()][c.getParticle2()]=true; } } } } } if(*isLeaf) { std::map<IParticle<CoordType>*,std::map<IParticle<CoordType>*,bool>> localMap; const int nMask = fields->particles.size(); std::vector<uint64_t> fastTest; for(int j=0;j<nMask-1;j++) { const CoordType minx = fields->particles[j]->getMinX(); const CoordType maxx = fields->particles[j]->getMaxX(); const CoordType miny = fields->particles[j]->getMinY(); const CoordType maxy = fields->particles[j]->getMaxY(); const CoordType minz = fields->particles[j]->getMinZ(); const CoordType maxz = fields->particles[j]->getMaxZ(); for(int i=j+1;i<nMask;i++) { // if both AABBs in same cell (64bit collision mask = 4x4x4 on/off mapping) if(fields->particlesCollisionMask[j] & fields->particlesCollisionMask[i]) { if(fields->particles[j]->getId()<fields->particles[i]->getId()) { const CoordType minx2 = fields->particles[i]->getMinX(); const CoordType maxx2 = fields->particles[i]->getMaxX(); if(intersectDim(minx, maxx, minx2, maxx2)) { const CoordType miny2 = fields->particles[i]->getMinY(); const CoordType maxy2 = fields->particles[i]->getMaxY(); if(intersectDim(miny, maxy, miny2, maxy2)) { const CoordType minz2 = fields->particles[i]->getMinZ(); const CoordType maxz2 = fields->particles[i]->getMaxZ(); if(intersectDim(minz, maxz, minz2, maxz2)) { localMap[fields->particles[j]][fields->particles[i]]=true; } } } } } } } { std::lock_guard<std::mutex> lg(mut); for(auto& lm:localMap) { for(auto& lm2:lm.second) fields->mapping[lm.first][lm2.first]=true; } } } // if at specific layer, wait for threads if((!*isLeaf) && *depth>0) { bool comp = false; while(!comp) { comp=true; { std::lock_guard<std::mutex> lg(mut); for(int cmd=0;cmd<completedCtr;cmd++) { comp&=completed[cmd]; } } std::this_thread::yield(); } } for(auto& m:fields->mapping) { for(auto& m2:m.second) { fields->result.push_back(CollisionPair<CoordType>(m.first,m2.first)); } } return fields->result; } private: std::shared_ptr<FixedGridFields<CoordType>> fields; std::shared_ptr<std::vector<AdaptiveGrid<CoordType>>> subGrid; ThreadPool<CoordType> thrPool; std::shared_ptr<int> depth; std::shared_ptr<bool> isLeaf; }; // axis-aligned bounding-box collision detection template<typename CoordType> class BruteForce { public: BruteForce() { } template<typename Derived> void add(Derived * particlesPrm, int numParticlesToAdd) { for(int i=0;i<numParticlesToAdd;i++) particles.push_back(static_cast<IParticle<CoordType>*>(particlesPrm+i)); } std::vector<std::pair<int,int>> getCollisionsSIMD() { std::vector<std::pair<int,int>> result; std::vector<CoordType> minx,miny,minz,maxx,maxy,maxz; std::vector<int> id; const int sz = particles.size(); for(int i=0;i<sz;i++) { IParticle<CoordType> * ptr = particles[i]; minx.push_back(ptr->getMinX()); miny.push_back(ptr->getMinY()); minz.push_back(ptr->getMinZ()); maxx.push_back(ptr->getMaxX()); maxy.push_back(ptr->getMaxY()); maxz.push_back(ptr->getMaxZ()); id.push_back(ptr->getId()); } const int sz4 = sz - (sz&3); for(int i=0;i<sz4;i+=4) { for(int j=i;j<sz4;j+=4) { int out[16]; comp4vs4( id.data()+i, id.data()+j, minx.data()+i, minx.data()+j, miny.data()+i, miny.data()+j, minz.data()+i, minz.data()+j, maxx.data()+i, maxx.data()+j, maxy.data()+i, maxy.data()+j, maxz.data()+i, maxz.data()+j, out ); for(int k=0;k<16;k++) { if(out[k]) { result.push_back(std::pair<int,int>(i+(k&3),j+k/4)); } } } } for(int i=0;i<sz;i++) { for(int j=sz4;j<sz;j++) { if(i!=j && particles[i]->getId() < particles[j]->getId()) { if(intersectDim(particles[i]->getMinX(),particles[i]->getMaxX(),particles[j]->getMinX(),particles[j]->getMaxX())) if(intersectDim(particles[i]->getMinY(),particles[i]->getMaxY(),particles[j]->getMinY(),particles[j]->getMaxY())) if(intersectDim(particles[i]->getMinZ(),particles[i]->getMaxZ(),particles[j]->getMinZ(),particles[j]->getMaxZ())) { result.push_back(std::pair<int,int>(i,j)); } } } } return result; } std::vector<CollisionPair<CoordType>> getCollisions() { std::vector<CollisionPair<CoordType>> result; idMap.clear(); collisionPairs.clear(); const int sz = particles.size(); for(int i=0;i<sz-1;i++) { idMap[particles[i]->getId()]=particles[i]; for(int j=i+1;j<sz;j++) { if( particles[i]->intersectX(particles[j]) && particles[i]->intersectY(particles[j]) && particles[i]->intersectZ(particles[j])) { collisionPairs.push_back(CollisionPair<CoordType>(particles[i],particles[j])); } } } std::sort(collisionPairs.begin(),collisionPairs.end(),[](CollisionPair<CoordType>& c1, CollisionPair<CoordType>& c2){ return c1.getParticle1()->getId()<c2.getParticle1()->getId(); }); result=collisionPairs; return result; } private: inline void comp4vs4( const int * const __restrict__ partId1, const int * const __restrict__ partId2, const float * const __restrict__ minx1, const float * const __restrict__ minx2, const float * const __restrict__ miny1, const float * const __restrict__ miny2, const float * const __restrict__ minz1, const float * const __restrict__ minz2, const float * const __restrict__ maxx1, const float * const __restrict__ maxx2, const float * const __restrict__ maxy1, const float * const __restrict__ maxy2, const float * const __restrict__ maxz1, const float * const __restrict__ maxz2, int * const __restrict__ out ) const noexcept { alignas(32) int result[16]={ // 0v0 0v1 0v2 0v3 // 1v0 1v1 1v2 1v3 // 2v0 2v1 2v2 2v3 // 3v0 3v1 3v2 3v3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; alignas(32) int tileId1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 partId1[0],partId1[1],partId1[2],partId1[3], partId1[0],partId1[1],partId1[2],partId1[3], partId1[0],partId1[1],partId1[2],partId1[3], partId1[0],partId1[1],partId1[2],partId1[3] }; alignas(32) int tileId2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 partId2[0],partId2[0],partId2[0],partId2[0], partId2[1],partId2[1],partId2[1],partId2[1], partId2[2],partId2[2],partId2[2],partId2[2], partId2[3],partId2[3],partId2[3],partId2[3] }; alignas(32) float tileMinX1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 minx1[0],minx1[1],minx1[2],minx1[3], minx1[0],minx1[1],minx1[2],minx1[3], minx1[0],minx1[1],minx1[2],minx1[3], minx1[0],minx1[1],minx1[2],minx1[3] }; alignas(32) float tileMinX2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 minx2[0],minx2[0],minx2[0],minx2[0], minx2[1],minx2[1],minx2[1],minx2[1], minx2[2],minx2[2],minx2[2],minx2[2], minx2[3],minx2[3],minx2[3],minx2[3] }; alignas(32) float tileMinY1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 miny1[0],miny1[1],miny1[2],miny1[3], miny1[0],miny1[1],miny1[2],miny1[3], miny1[0],miny1[1],miny1[2],miny1[3], miny1[0],miny1[1],miny1[2],miny1[3] }; alignas(32) float tileMinY2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 miny2[0],miny2[0],miny2[0],miny2[0], miny2[1],miny2[1],miny2[1],miny2[1], miny2[2],miny2[2],miny2[2],miny2[2], miny2[3],miny2[3],miny2[3],miny2[3] }; alignas(32) float tileMinZ1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 minz1[0],minz1[1],minz1[2],minz1[3], minz1[0],minz1[1],minz1[2],minz1[3], minz1[0],minz1[1],minz1[2],minz1[3], minz1[0],minz1[1],minz1[2],minz1[3] }; alignas(32) float tileMinZ2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 minz2[0],minz2[0],minz2[0],minz2[0], minz2[1],minz2[1],minz2[1],minz2[1], minz2[2],minz2[2],minz2[2],minz2[2], minz2[3],minz2[3],minz2[3],minz2[3] }; alignas(32) float tileMaxX1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 maxx1[0],maxx1[1],maxx1[2],maxx1[3], maxx1[0],maxx1[1],maxx1[2],maxx1[3], maxx1[0],maxx1[1],maxx1[2],maxx1[3], maxx1[0],maxx1[1],maxx1[2],maxx1[3] }; alignas(32) float tileMaxX2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 maxx2[0],maxx2[0],maxx2[0],maxx2[0], maxx2[1],maxx2[1],maxx2[1],maxx2[1], maxx2[2],maxx2[2],maxx2[2],maxx2[2], maxx2[3],maxx2[3],maxx2[3],maxx2[3] }; alignas(32) float tileMaxY1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 maxy1[0],maxy1[1],maxy1[2],maxy1[3], maxy1[0],maxy1[1],maxy1[2],maxy1[3], maxy1[0],maxy1[1],maxy1[2],maxy1[3], maxy1[0],maxy1[1],maxy1[2],maxy1[3] }; alignas(32) float tileMaxY2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 maxy2[0],maxy2[0],maxy2[0],maxy2[0], maxy2[1],maxy2[1],maxy2[1],maxy2[1], maxy2[2],maxy2[2],maxy2[2],maxy2[2], maxy2[3],maxy2[3],maxy2[3],maxy2[3] }; alignas(32) float tileMaxZ1[16]={ // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 maxz1[0],maxz1[1],maxz1[2],maxz1[3], maxz1[0],maxz1[1],maxz1[2],maxz1[3], maxz1[0],maxz1[1],maxz1[2],maxz1[3], maxz1[0],maxz1[1],maxz1[2],maxz1[3] }; alignas(32) float tileMaxZ2[16]={ // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 maxz2[0],maxz2[0],maxz2[0],maxz2[0], maxz2[1],maxz2[1],maxz2[1],maxz2[1], maxz2[2],maxz2[2],maxz2[2],maxz2[2], maxz2[3],maxz2[3],maxz2[3],maxz2[3] }; for(int i=0;i<16;i++) result[i] = (tileId1[i] < tileId2[i]); for(int i=0;i<16;i++) result[i] = result[i] && intersectDim(tileMinX1[i], tileMaxX1[i], tileMinX2[i], tileMaxX2[i]) && intersectDim(tileMinY1[i], tileMaxY1[i], tileMinY2[i], tileMaxY2[i]) && intersectDim(tileMinZ1[i], tileMaxZ1[i], tileMinZ2[i], tileMaxZ2[i]); for(int i=0;i<16;i++) out[i]=result[i]; }; std::vector<IParticle<CoordType>*> particles; std::vector<CollisionPair<CoordType>> collisionPairs; std::map<int,IParticle<CoordType>*> idMap; }; } #endif /* FASTCOLLISIONDETECTIONLIB_H_ */ #include<atomic> #include<iostream> template<typename CoordType> struct Vector3D { CoordType x,y,z; Vector3D<CoordType> crossProduct(Vector3D<CoordType> vec) { Vector3D<CoordType> res; res.x = y*vec.z - z*vec.y; res.y = z*vec.x - x*vec.z; res.z = x*vec.y - y*vec.x; return res; } Vector3D<CoordType> operator - (Vector3D<CoordType> vec) { Vector3D<CoordType> result; result.x = x-vec.x; result.y = y-vec.y; result.z = z-vec.z; return result; } Vector3D<CoordType> operator + (Vector3D<CoordType> vec) { Vector3D<CoordType> result; result.x = x+vec.x; result.y = y+vec.y; result.z = z+vec.z; return result; } Vector3D<CoordType> operator * (CoordType v) { Vector3D<CoordType> result; result.x = x*v; result.y = y*v; result.z = z*v; return result; } CoordType abs() { return std::sqrt(x*x+y*y+z*z); } }; template<typename CoordType> struct PointCloud { Vector3D<CoordType> point[125]; PointCloud(CoordType x, CoordType y, CoordType z) { for(int i=0;i<125;i++) { point[i].x=x+i%5-2.5f; point[i].y=y+(i/5)%5-2.5f; point[i].z=z+i/25-2.5f; } } }; template<typename CoordType> bool pointCloudIntersection(PointCloud<CoordType>& cl1, PointCloud<CoordType>& cl2) { for(Vector3D<CoordType>& p:cl1.point) { for(Vector3D<CoordType>& p2:cl2.point) { if((p-p2).abs()<1.0f) { return true; } } } return false; } template<typename CoordType> bool intersectDim(const CoordType minx, const CoordType maxx, const CoordType minx2, const CoordType maxx2) { return !((maxx < minx2) || (maxx2 < minx)); } template<typename CoordType> struct AABBofPointCloud: public FastColDetLib::IParticle<CoordType> { AABBofPointCloud(int idPrm, PointCloud<CoordType> * pCloudPrm) { id=idPrm; pCloud = pCloudPrm; xmin=pCloud->point[0].x; ymin=pCloud->point[0].y; zmin=pCloud->point[0].z; xmax=pCloud->point[0].x; ymax=pCloud->point[0].y; zmax=pCloud->point[0].z; for(int i=0;i<125;i++) { if(xmin>pCloud->point[i].x) xmin=pCloud->point[i].x; if(ymin>pCloud->point[i].y) ymin=pCloud->point[i].y; if(zmin>pCloud->point[i].z) zmin=pCloud->point[i].z; if(xmax<pCloud->point[i].x) xmax=pCloud->point[i].x; if(ymax<pCloud->point[i].y) ymax=pCloud->point[i].y; if(zmax<pCloud->point[i].z) zmax=pCloud->point[i].z; } } int id; PointCloud<CoordType>* pCloud; CoordType xmin; CoordType ymin; CoordType zmin; CoordType xmax; CoordType ymax; CoordType zmax; const CoordType getMaxX()const {return xmax;} const CoordType getMaxY()const {return ymax;} const CoordType getMaxZ()const {return zmax;} const CoordType getMinX()const {return xmin;} const CoordType getMinY()const {return ymin;} const CoordType getMinZ()const {return zmin;} const int getId()const {return id;} }; int main() { using cotype = float; PointCloud<cotype> ico1(0,0,0); // heating the CPU for benchmarking for(int i=0;i<10000;i++) { PointCloud<cotype> ico2(0,0.1f,i*0.1f); pointCloudIntersection(ico1,ico2); } const int N = 20004; std::vector<PointCloud<cotype>> objects; oofrng::Generator<64> gen; for(int i=0;i<N-3;i++) { objects.push_back(PointCloud<cotype>(gen.generate1Float()*150,gen.generate1Float()*150,gen.generate1Float()*150)); } // the teapot in stadium problem objects.push_back(PointCloud<cotype>(9000,9000,9000)); objects.push_back(PointCloud<cotype>(9001,9001,9001)); objects.push_back(PointCloud<cotype>(9002,9002,9002)); std::vector<AABBofPointCloud<cotype>> AABBs; for(int i=0;i<N;i++) { AABBs.push_back(AABBofPointCloud<cotype>(i,&objects[i])); } FastColDetLib::MemoryPool memPool; FastColDetLib::AdaptiveGridV2 grid2_0(memPool,0,0,0,10005,10005,10005); // benchmark begin for(int j=0;j<15;j++) { size_t nano; std::map<int,std::map<int,bool>> collisionMatrix; { std::atomic<int> ctr; ctr.store(0); { { FastColDetLib::Bench bench(&nano); { size_t t1,t2,t3; { FastColDetLib::Bench b(&t1); grid2_0.clear(); } { FastColDetLib::Bench b(&t2); grid2_0.addParticles(N,AABBs.data()); } { FastColDetLib::Bench b(&t3); grid2_0.buildTree(); } std::cout<<t1<<" "<<t2<<" "<<t3<<std::endl; auto vec = grid2_0.findCollisionsAll(); ctr += vec.size(); } } std::cout<<N<<" vs "<<N<<" AABB collision checking by adaptive grid= "<<nano<<" nanoseconds "<<std::endl; std::cout<<"total = "<<ctr.load()<<std::endl; } } } return 0; }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree