Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
c source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
6502 cc65 2.17
6502 cc65 2.18
6502 cc65 2.19
6502 cc65 trunk
ARM GCC 10.2.0 (linux)
ARM GCC 10.2.1 (none)
ARM GCC 10.3.0 (linux)
ARM GCC 10.3.1 (2021.07 none)
ARM GCC 10.3.1 (2021.10 none)
ARM GCC 10.5.0
ARM GCC 11.1.0 (linux)
ARM GCC 11.2.0 (linux)
ARM GCC 11.2.1 (none)
ARM GCC 11.3.0 (linux)
ARM GCC 11.4.0
ARM GCC 12.1.0 (linux)
ARM GCC 12.2.0 (linux)
ARM GCC 12.3.0
ARM GCC 12.4.0
ARM GCC 12.5.0
ARM GCC 13.1.0 (linux)
ARM GCC 13.2.0
ARM GCC 13.2.0 (unknown-eabi)
ARM GCC 13.3.0
ARM GCC 13.3.0 (unknown-eabi)
ARM GCC 13.4.0
ARM GCC 13.4.0 (unknown-eabi)
ARM GCC 14.1.0
ARM GCC 14.1.0 (unknown-eabi)
ARM GCC 14.2.0
ARM GCC 14.2.0 (unknown-eabi)
ARM GCC 14.3.0
ARM GCC 14.3.0 (unknown-eabi)
ARM GCC 15.1.0
ARM GCC 15.1.0 (unknown-eabi)
ARM GCC 15.2.0
ARM GCC 15.2.0 (unknown-eabi)
ARM GCC 4.5.4 (linux)
ARM GCC 4.6.4 (linux)
ARM GCC 5.4 (linux)
ARM GCC 5.4.1 (none)
ARM GCC 6.3.0 (linux)
ARM GCC 6.4.0 (linux)
ARM GCC 7.2.1 (none)
ARM GCC 7.3.0 (linux)
ARM GCC 7.5.0 (linux)
ARM GCC 8.2.0 (WinCE)
ARM GCC 8.2.0 (linux)
ARM GCC 8.3.1 (none)
ARM GCC 8.5.0 (linux)
ARM GCC 9.2.1 (none)
ARM GCC 9.3.0 (linux)
ARM GCC trunk (linux)
ARM msvc v19.0 (ex-WINE)
ARM msvc v19.10 (ex-WINE)
ARM msvc v19.14 (ex-WINE)
ARM64 GCC 10.2.0
ARM64 GCC 10.3.0
ARM64 GCC 10.4.0
ARM64 GCC 10.5.0
ARM64 GCC 11.1.0
ARM64 GCC 11.2.0
ARM64 GCC 11.3.0
ARM64 GCC 11.4.0
ARM64 GCC 12.1.0
ARM64 GCC 12.2.0
ARM64 GCC 12.3.0
ARM64 GCC 12.4.0
ARM64 GCC 12.5.0
ARM64 GCC 13.1.0
ARM64 GCC 13.2.0
ARM64 GCC 13.3.0
ARM64 GCC 13.4.0
ARM64 GCC 14.1.0
ARM64 GCC 14.2.0
ARM64 GCC 14.3.0
ARM64 GCC 15.1.0
ARM64 GCC 15.2.0
ARM64 GCC 4.9.4
ARM64 GCC 5.4
ARM64 GCC 5.5.0
ARM64 GCC 6.3
ARM64 GCC 6.4.0
ARM64 GCC 7.3.0
ARM64 GCC 7.5.0
ARM64 GCC 8.2.0
ARM64 GCC 8.5.0
ARM64 GCC 9.3.0
ARM64 GCC 9.4.0
ARM64 GCC 9.5.0
ARM64 GCC trunk
ARM64 Morello GCC 10.1.0 Alpha 1
ARM64 Morello GCC 10.1.2 Alpha 2
ARM64 msvc v19.14 (ex-WINE)
AVR gcc 10.3.0
AVR gcc 11.1.0
AVR gcc 12.1.0
AVR gcc 12.2.0
AVR gcc 12.3.0
AVR gcc 12.4.0
AVR gcc 12.5.0
AVR gcc 13.1.0
AVR gcc 13.2.0
AVR gcc 13.3.0
AVR gcc 13.4.0
AVR gcc 14.1.0
AVR gcc 14.2.0
AVR gcc 14.3.0
AVR gcc 15.1.0
AVR gcc 15.2.0
AVR gcc 4.5.4
AVR gcc 4.6.4
AVR gcc 5.4.0
AVR gcc 9.2.0
AVR gcc 9.3.0
Arduino Mega (1.8.9)
Arduino Uno (1.8.9)
BPF clang (trunk)
BPF clang 13.0.0
BPF clang 14.0.0
BPF clang 15.0.0
BPF clang 16.0.0
BPF clang 17.0.1
BPF clang 18.1.0
BPF clang 19.1.0
BPF clang 20.1.0
BPF clang 21.1.0
BPF gcc 13.1.0
BPF gcc 13.2.0
BPF gcc 13.3.0
BPF gcc 13.4.0
BPF gcc 14.1.0
BPF gcc 14.2.0
BPF gcc 14.3.0
BPF gcc 15.1.0
BPF gcc 15.2.0
BPF gcc trunk
C2Rust (master)
Chibicc 2020-12-07
FRC 2019
FRC 2020
FRC 2023
HPPA gcc 14.2.0
HPPA gcc 14.3.0
HPPA gcc 15.1.0
HPPA gcc 15.2.0
K1C gcc 7.4
K1C gcc 7.5
KVX ACB 4.1.0 (GCC 7.5.0)
KVX ACB 4.1.0-cd1 (GCC 7.5.0)
KVX ACB 4.10.0 (GCC 10.3.1)
KVX ACB 4.11.1 (GCC 10.3.1)
KVX ACB 4.12.0 (GCC 11.3.0)
KVX ACB 4.2.0 (GCC 7.5.0)
KVX ACB 4.3.0 (GCC 7.5.0)
KVX ACB 4.4.0 (GCC 7.5.0)
KVX ACB 4.6.0 (GCC 9.4.1)
KVX ACB 4.8.0 (GCC 9.4.1)
KVX ACB 4.9.0 (GCC 9.4.1)
KVX ACB 5.0.0 (GCC 12.2.1)
KVX ACB 5.2.0 (GCC 13.2.1)
LC3 (trunk)
M68K clang (trunk)
M68K gcc 13.1.0
M68K gcc 13.2.0
M68K gcc 13.3.0
M68K gcc 13.4.0
M68K gcc 14.1.0
M68K gcc 14.2.0
M68K gcc 14.3.0
M68K gcc 15.1.0
M68K gcc 15.2.0
MRISC32 gcc (trunk)
MSP430 gcc 12.1.0
MSP430 gcc 12.2.0
MSP430 gcc 12.3.0
MSP430 gcc 12.4.0
MSP430 gcc 12.5.0
MSP430 gcc 13.1.0
MSP430 gcc 13.2.0
MSP430 gcc 13.3.0
MSP430 gcc 13.4.0
MSP430 gcc 14.1.0
MSP430 gcc 14.2.0
MSP430 gcc 14.3.0
MSP430 gcc 15.1.0
MSP430 gcc 15.2.0
MSP430 gcc 4.5.3
MSP430 gcc 5.3.0
MSP430 gcc 6.2.1
MinGW clang 14.0.3
MinGW clang 14.0.6
MinGW clang 15.0.7
MinGW clang 16.0.0
MinGW clang 16.0.2
MinGW gcc 11.3.0
MinGW gcc 12.1.0
MinGW gcc 12.2.0
MinGW gcc 13.1.0
MinGW gcc 14.3.0
MinGW gcc 15.2.0
ORCA/C 2.1.0
ORCA/C 2.2.0
ORCA/C 2.2.1
POWER64 gcc 11.2.0
POWER64 gcc 12.1.0
POWER64 gcc 12.2.0
POWER64 gcc 12.3.0
POWER64 gcc 12.4.0
POWER64 gcc 12.5.0
POWER64 gcc 13.1.0
POWER64 gcc 13.2.0
POWER64 gcc 13.3.0
POWER64 gcc 13.4.0
POWER64 gcc 14.1.0
POWER64 gcc 14.2.0
POWER64 gcc 14.3.0
POWER64 gcc 15.1.0
POWER64 gcc 15.2.0
POWER64 gcc trunk
RISC-V (32-bits) gcc (trunk)
RISC-V (32-bits) gcc 10.2.0
RISC-V (32-bits) gcc 10.3.0
RISC-V (32-bits) gcc 11.2.0
RISC-V (32-bits) gcc 11.3.0
RISC-V (32-bits) gcc 11.4.0
RISC-V (32-bits) gcc 12.1.0
RISC-V (32-bits) gcc 12.2.0
RISC-V (32-bits) gcc 12.3.0
RISC-V (32-bits) gcc 12.4.0
RISC-V (32-bits) gcc 12.5.0
RISC-V (32-bits) gcc 13.1.0
RISC-V (32-bits) gcc 13.2.0
RISC-V (32-bits) gcc 13.3.0
RISC-V (32-bits) gcc 13.4.0
RISC-V (32-bits) gcc 14.1.0
RISC-V (32-bits) gcc 14.2.0
RISC-V (32-bits) gcc 14.3.0
RISC-V (32-bits) gcc 15.1.0
RISC-V (32-bits) gcc 15.2.0
RISC-V (32-bits) gcc 8.2.0
RISC-V (32-bits) gcc 8.5.0
RISC-V (32-bits) gcc 9.4.0
RISC-V (64-bits) gcc (trunk)
RISC-V (64-bits) gcc 10.2.0
RISC-V (64-bits) gcc 10.3.0
RISC-V (64-bits) gcc 11.2.0
RISC-V (64-bits) gcc 11.3.0
RISC-V (64-bits) gcc 11.4.0
RISC-V (64-bits) gcc 12.1.0
RISC-V (64-bits) gcc 12.2.0
RISC-V (64-bits) gcc 12.3.0
RISC-V (64-bits) gcc 12.4.0
RISC-V (64-bits) gcc 12.5.0
RISC-V (64-bits) gcc 13.1.0
RISC-V (64-bits) gcc 13.2.0
RISC-V (64-bits) gcc 13.3.0
RISC-V (64-bits) gcc 13.4.0
RISC-V (64-bits) gcc 14.1.0
RISC-V (64-bits) gcc 14.2.0
RISC-V (64-bits) gcc 14.3.0
RISC-V (64-bits) gcc 15.1.0
RISC-V (64-bits) gcc 15.2.0
RISC-V (64-bits) gcc 8.2.0
RISC-V (64-bits) gcc 8.5.0
RISC-V (64-bits) gcc 9.4.0
RISC-V rv32gc clang (trunk)
RISC-V rv32gc clang 10.0.0
RISC-V rv32gc clang 10.0.1
RISC-V rv32gc clang 11.0.0
RISC-V rv32gc clang 11.0.1
RISC-V rv32gc clang 12.0.0
RISC-V rv32gc clang 12.0.1
RISC-V rv32gc clang 13.0.0
RISC-V rv32gc clang 13.0.1
RISC-V rv32gc clang 14.0.0
RISC-V rv32gc clang 15.0.0
RISC-V rv32gc clang 16.0.0
RISC-V rv32gc clang 17.0.1
RISC-V rv32gc clang 18.1.0
RISC-V rv32gc clang 19.1.0
RISC-V rv32gc clang 20.1.0
RISC-V rv32gc clang 21.1.0
RISC-V rv32gc clang 9.0.0
RISC-V rv32gc clang 9.0.1
RISC-V rv64gc clang (trunk)
RISC-V rv64gc clang 10.0.0
RISC-V rv64gc clang 10.0.1
RISC-V rv64gc clang 11.0.0
RISC-V rv64gc clang 11.0.1
RISC-V rv64gc clang 12.0.0
RISC-V rv64gc clang 12.0.1
RISC-V rv64gc clang 13.0.0
RISC-V rv64gc clang 13.0.1
RISC-V rv64gc clang 14.0.0
RISC-V rv64gc clang 15.0.0
RISC-V rv64gc clang 16.0.0
RISC-V rv64gc clang 17.0.1
RISC-V rv64gc clang 18.1.0
RISC-V rv64gc clang 19.1.0
RISC-V rv64gc clang 20.1.0
RISC-V rv64gc clang 21.1.0
RISC-V rv64gc clang 9.0.0
RISC-V rv64gc clang 9.0.1
Raspbian Buster
Raspbian Stretch
SDCC 4.0.0
SDCC 4.1.0
SDCC 4.2.0
SDCC 4.3.0
SDCC 4.4.0
SDCC 4.5.0
SPARC LEON gcc 12.2.0
SPARC LEON gcc 12.3.0
SPARC LEON gcc 12.4.0
SPARC LEON gcc 12.5.0
SPARC LEON gcc 13.1.0
SPARC LEON gcc 13.2.0
SPARC LEON gcc 13.3.0
SPARC LEON gcc 13.4.0
SPARC LEON gcc 14.1.0
SPARC LEON gcc 14.2.0
SPARC LEON gcc 14.3.0
SPARC LEON gcc 15.1.0
SPARC LEON gcc 15.2.0
SPARC gcc 12.2.0
SPARC gcc 12.3.0
SPARC gcc 12.4.0
SPARC gcc 12.5.0
SPARC gcc 13.1.0
SPARC gcc 13.2.0
SPARC gcc 13.3.0
SPARC gcc 13.4.0
SPARC gcc 14.1.0
SPARC gcc 14.2.0
SPARC gcc 14.3.0
SPARC gcc 15.1.0
SPARC gcc 15.2.0
SPARC64 gcc 12.2.0
SPARC64 gcc 12.3.0
SPARC64 gcc 12.4.0
SPARC64 gcc 12.5.0
SPARC64 gcc 13.1.0
SPARC64 gcc 13.2.0
SPARC64 gcc 13.3.0
SPARC64 gcc 13.4.0
SPARC64 gcc 14.1.0
SPARC64 gcc 14.2.0
SPARC64 gcc 14.3.0
SPARC64 gcc 15.1.0
SPARC64 gcc 15.2.0
TCC (trunk)
TCC 0.9.27
TI C6x gcc 12.2.0
TI C6x gcc 12.3.0
TI C6x gcc 12.4.0
TI C6x gcc 12.5.0
TI C6x gcc 13.1.0
TI C6x gcc 13.2.0
TI C6x gcc 13.3.0
TI C6x gcc 13.4.0
TI C6x gcc 14.1.0
TI C6x gcc 14.2.0
TI C6x gcc 14.3.0
TI C6x gcc 15.1.0
TI C6x gcc 15.2.0
TI CL430 21.6.1
Tricore gcc 11.3.0 (EEESlab)
VAX gcc NetBSDELF 10.4.0
VAX gcc NetBSDELF 10.5.0 (Nov 15 03:50:22 2023)
VAX gcc NetBSDELF 12.4.0 (Apr 16 05:27 2025)
WebAssembly clang (trunk)
Xtensa ESP32 gcc 11.2.0 (2022r1)
Xtensa ESP32 gcc 12.2.0 (20230208)
Xtensa ESP32 gcc 14.2.0 (20241119)
Xtensa ESP32 gcc 8.2.0 (2019r2)
Xtensa ESP32 gcc 8.2.0 (2020r1)
Xtensa ESP32 gcc 8.2.0 (2020r2)
Xtensa ESP32 gcc 8.4.0 (2020r3)
Xtensa ESP32 gcc 8.4.0 (2021r1)
Xtensa ESP32 gcc 8.4.0 (2021r2)
Xtensa ESP32-S2 gcc 11.2.0 (2022r1)
Xtensa ESP32-S2 gcc 12.2.0 (20230208)
Xtensa ESP32-S2 gcc 14.2.0 (20241119)
Xtensa ESP32-S2 gcc 8.2.0 (2019r2)
Xtensa ESP32-S2 gcc 8.2.0 (2020r1)
Xtensa ESP32-S2 gcc 8.2.0 (2020r2)
Xtensa ESP32-S2 gcc 8.4.0 (2020r3)
Xtensa ESP32-S2 gcc 8.4.0 (2021r1)
Xtensa ESP32-S2 gcc 8.4.0 (2021r2)
Xtensa ESP32-S3 gcc 11.2.0 (2022r1)
Xtensa ESP32-S3 gcc 12.2.0 (20230208)
Xtensa ESP32-S3 gcc 14.2.0 (20241119)
Xtensa ESP32-S3 gcc 8.4.0 (2020r3)
Xtensa ESP32-S3 gcc 8.4.0 (2021r1)
Xtensa ESP32-S3 gcc 8.4.0 (2021r2)
arm64 msvc v19.20 VS16.0
arm64 msvc v19.21 VS16.1
arm64 msvc v19.22 VS16.2
arm64 msvc v19.23 VS16.3
arm64 msvc v19.24 VS16.4
arm64 msvc v19.25 VS16.5
arm64 msvc v19.27 VS16.7
arm64 msvc v19.28 VS16.8
arm64 msvc v19.28 VS16.9
arm64 msvc v19.29 VS16.10
arm64 msvc v19.29 VS16.11
arm64 msvc v19.30 VS17.0
arm64 msvc v19.31 VS17.1
arm64 msvc v19.32 VS17.2
arm64 msvc v19.33 VS17.3
arm64 msvc v19.34 VS17.4
arm64 msvc v19.35 VS17.5
arm64 msvc v19.36 VS17.6
arm64 msvc v19.37 VS17.7
arm64 msvc v19.38 VS17.8
arm64 msvc v19.39 VS17.9
arm64 msvc v19.40 VS17.10
arm64 msvc v19.41 VS17.11
arm64 msvc v19.42 VS17.12
arm64 msvc v19.43 VS17.13
arm64 msvc v19.latest
armv7-a clang (trunk)
armv7-a clang 10.0.0
armv7-a clang 10.0.1
armv7-a clang 11.0.0
armv7-a clang 11.0.1
armv7-a clang 12.0.0
armv7-a clang 12.0.1
armv7-a clang 13.0.0
armv7-a clang 13.0.1
armv7-a clang 14.0.0
armv7-a clang 15.0.0
armv7-a clang 16.0.0
armv7-a clang 17.0.1
armv7-a clang 18.1.0
armv7-a clang 19.1.0
armv7-a clang 20.1.0
armv7-a clang 21.1.0
armv7-a clang 9.0.0
armv7-a clang 9.0.1
armv8-a clang (all architectural features, trunk)
armv8-a clang (trunk)
armv8-a clang 10.0.0
armv8-a clang 10.0.1
armv8-a clang 11.0.0
armv8-a clang 11.0.1
armv8-a clang 12.0.0
armv8-a clang 12.0.1
armv8-a clang 13.0.0
armv8-a clang 13.0.1
armv8-a clang 14.0.0
armv8-a clang 15.0.0
armv8-a clang 16.0.0
armv8-a clang 17.0.1
armv8-a clang 18.1.0
armv8-a clang 19.1.0
armv8-a clang 20.1.0
armv8-a clang 21.1.0
armv8-a clang 9.0.0
armv8-a clang 9.0.1
clang 12 for DPU (rel 2023.2.0)
cproc-master
ez80-clang 15.0.0
ez80-clang 15.0.7
llvm-mos commander X16
llvm-mos commodore 64
llvm-mos mega65
llvm-mos nes-cnrom
llvm-mos nes-mmc1
llvm-mos nes-mmc3
llvm-mos nes-nrom
llvm-mos osi-c1p
loongarch64 gcc 12.2.0
loongarch64 gcc 12.3.0
loongarch64 gcc 12.4.0
loongarch64 gcc 12.5.0
loongarch64 gcc 13.1.0
loongarch64 gcc 13.2.0
loongarch64 gcc 13.3.0
loongarch64 gcc 13.4.0
loongarch64 gcc 14.1.0
loongarch64 gcc 14.2.0
loongarch64 gcc 14.3.0
loongarch64 gcc 15.1.0
loongarch64 gcc 15.2.0
mips (el) gcc 12.1.0
mips (el) gcc 12.2.0
mips (el) gcc 12.3.0
mips (el) gcc 12.4.0
mips (el) gcc 12.5.0
mips (el) gcc 13.1.0
mips (el) gcc 13.2.0
mips (el) gcc 13.3.0
mips (el) gcc 13.4.0
mips (el) gcc 14.1.0
mips (el) gcc 14.2.0
mips (el) gcc 14.3.0
mips (el) gcc 15.1.0
mips (el) gcc 15.2.0
mips (el) gcc 4.9.4
mips (el) gcc 5.4
mips (el) gcc 5.5.0
mips (el) gcc 9.5.0
mips clang 13.0.0
mips clang 14.0.0
mips clang 15.0.0
mips clang 16.0.0
mips clang 17.0.1
mips clang 18.1.0
mips clang 19.1.0
mips clang 20.1.0
mips clang 21.1.0
mips gcc 11.2.0
mips gcc 12.1.0
mips gcc 12.2.0
mips gcc 12.3.0
mips gcc 12.4.0
mips gcc 12.5.0
mips gcc 13.1.0
mips gcc 13.2.0
mips gcc 13.3.0
mips gcc 13.4.0
mips gcc 14.1.0
mips gcc 14.2.0
mips gcc 14.3.0
mips gcc 15.1.0
mips gcc 15.2.0
mips gcc 4.9.4
mips gcc 5.4
mips gcc 5.5.0
mips gcc 9.3.0 (codescape)
mips gcc 9.5.0
mips64 (el) gcc 12.1.0
mips64 (el) gcc 12.2.0
mips64 (el) gcc 12.3.0
mips64 (el) gcc 12.4.0
mips64 (el) gcc 12.5.0
mips64 (el) gcc 13.1.0
mips64 (el) gcc 13.2.0
mips64 (el) gcc 13.3.0
mips64 (el) gcc 13.4.0
mips64 (el) gcc 14.1.0
mips64 (el) gcc 14.2.0
mips64 (el) gcc 14.3.0
mips64 (el) gcc 15.1.0
mips64 (el) gcc 15.2.0
mips64 (el) gcc 4.9.4
mips64 (el) gcc 5.4.0
mips64 (el) gcc 5.5.0
mips64 (el) gcc 9.5.0
mips64 clang 13.0.0
mips64 clang 14.0.0
mips64 clang 15.0.0
mips64 clang 16.0.0
mips64 clang 17.0.1
mips64 clang 18.1.0
mips64 clang 19.1.0
mips64 clang 20.1.0
mips64 clang 21.1.0
mips64 gcc 11.2.0
mips64 gcc 12.1.0
mips64 gcc 12.2.0
mips64 gcc 12.3.0
mips64 gcc 12.4.0
mips64 gcc 12.5.0
mips64 gcc 13.1.0
mips64 gcc 13.2.0
mips64 gcc 13.3.0
mips64 gcc 13.4.0
mips64 gcc 14.1.0
mips64 gcc 14.2.0
mips64 gcc 14.3.0
mips64 gcc 15.1.0
mips64 gcc 15.2.0
mips64 gcc 4.9.4
mips64 gcc 5.4
mips64 gcc 5.5.0
mips64 gcc 9.5.0
mips64el clang 13.0.0
mips64el clang 14.0.0
mips64el clang 15.0.0
mips64el clang 16.0.0
mips64el clang 17.0.1
mips64el clang 18.1.0
mips64el clang 19.1.0
mips64el clang 20.1.0
mips64el clang 21.1.0
mipsel clang 13.0.0
mipsel clang 14.0.0
mipsel clang 15.0.0
mipsel clang 16.0.0
mipsel clang 17.0.1
mipsel clang 18.1.0
mipsel clang 19.1.0
mipsel clang 20.1.0
mipsel clang 21.1.0
movfuscator (trunk)
nanoMIPS gcc 6.3.0
power gcc 11.2.0
power gcc 12.1.0
power gcc 12.2.0
power gcc 12.3.0
power gcc 12.4.0
power gcc 12.5.0
power gcc 13.1.0
power gcc 13.2.0
power gcc 13.3.0
power gcc 13.4.0
power gcc 14.1.0
power gcc 14.2.0
power gcc 14.3.0
power gcc 15.1.0
power gcc 15.2.0
power gcc 4.8.5
power64 AT12.0 (gcc8)
power64 AT13.0 (gcc9)
power64le AT12.0 (gcc8)
power64le AT13.0 (gcc9)
power64le clang (trunk)
power64le gcc 11.2.0
power64le gcc 12.1.0
power64le gcc 12.2.0
power64le gcc 12.3.0
power64le gcc 12.4.0
power64le gcc 12.5.0
power64le gcc 13.1.0
power64le gcc 13.2.0
power64le gcc 13.3.0
power64le gcc 13.4.0
power64le gcc 14.1.0
power64le gcc 14.2.0
power64le gcc 14.3.0
power64le gcc 15.1.0
power64le gcc 15.2.0
power64le gcc 6.3.0
power64le gcc trunk
powerpc64 clang (trunk)
ppci 0.5.5
s390x gcc 11.2.0
s390x gcc 12.1.0
s390x gcc 12.2.0
s390x gcc 12.3.0
s390x gcc 12.4.0
s390x gcc 12.5.0
s390x gcc 13.1.0
s390x gcc 13.2.0
s390x gcc 13.3.0
s390x gcc 13.4.0
s390x gcc 14.1.0
s390x gcc 14.2.0
s390x gcc 14.3.0
s390x gcc 15.1.0
s390x gcc 15.2.0
sh gcc 12.2.0
sh gcc 12.3.0
sh gcc 12.4.0
sh gcc 12.5.0
sh gcc 13.1.0
sh gcc 13.2.0
sh gcc 13.3.0
sh gcc 13.4.0
sh gcc 14.1.0
sh gcc 14.2.0
sh gcc 14.3.0
sh gcc 15.1.0
sh gcc 15.2.0
sh gcc 4.9.4
sh gcc 9.5.0
vast (trunk)
x64 msvc v19.0 (ex-WINE)
x64 msvc v19.10 (ex-WINE)
x64 msvc v19.14 (ex-WINE)
x64 msvc v19.20 VS16.0
x64 msvc v19.21 VS16.1
x64 msvc v19.22 VS16.2
x64 msvc v19.23 VS16.3
x64 msvc v19.24 VS16.4
x64 msvc v19.25 VS16.5
x64 msvc v19.27 VS16.7
x64 msvc v19.28 VS16.8
x64 msvc v19.28 VS16.9
x64 msvc v19.29 VS16.10
x64 msvc v19.29 VS16.11
x64 msvc v19.30 VS17.0
x64 msvc v19.31 VS17.1
x64 msvc v19.32 VS17.2
x64 msvc v19.33 VS17.3
x64 msvc v19.34 VS17.4
x64 msvc v19.35 VS17.5
x64 msvc v19.36 VS17.6
x64 msvc v19.37 VS17.7
x64 msvc v19.38 VS17.8
x64 msvc v19.39 VS17.9
x64 msvc v19.40 VS17.10
x64 msvc v19.41 VS17.11
x64 msvc v19.42 VS17.12
x64 msvc v19.43 VS17.13
x64 msvc v19.latest
x86 CompCert 3.10
x86 CompCert 3.11
x86 CompCert 3.12
x86 CompCert 3.9
x86 gcc 1.27
x86 msvc v19.0 (ex-WINE)
x86 msvc v19.10 (ex-WINE)
x86 msvc v19.14 (ex-WINE)
x86 msvc v19.20 VS16.0
x86 msvc v19.21 VS16.1
x86 msvc v19.22 VS16.2
x86 msvc v19.23 VS16.3
x86 msvc v19.24 VS16.4
x86 msvc v19.25 VS16.5
x86 msvc v19.27 VS16.7
x86 msvc v19.28 VS16.8
x86 msvc v19.28 VS16.9
x86 msvc v19.29 VS16.10
x86 msvc v19.29 VS16.11
x86 msvc v19.30 VS17.0
x86 msvc v19.31 VS17.1
x86 msvc v19.32 VS17.2
x86 msvc v19.33 VS17.3
x86 msvc v19.34 VS17.4
x86 msvc v19.35 VS17.5
x86 msvc v19.36 VS17.6
x86 msvc v19.37 VS17.7
x86 msvc v19.38 VS17.8
x86 msvc v19.39 VS17.9
x86 msvc v19.40 VS17.10
x86 msvc v19.41 VS17.11
x86 msvc v19.42 VS17.12
x86 msvc v19.43 VS17.13
x86 msvc v19.latest
x86 nvc 24.11
x86 nvc 24.9
x86 nvc 25.1
x86 nvc 25.3
x86 nvc 25.5
x86 nvc 25.7
x86 tendra (trunk)
x86-64 clang (assertions trunk)
x86-64 clang (thephd.dev)
x86-64 clang (trunk)
x86-64 clang (widberg)
x86-64 clang 10.0.0
x86-64 clang 10.0.1
x86-64 clang 11.0.0
x86-64 clang 11.0.1
x86-64 clang 12.0.0
x86-64 clang 12.0.1
x86-64 clang 13.0.0
x86-64 clang 13.0.1
x86-64 clang 14.0.0
x86-64 clang 15.0.0
x86-64 clang 16.0.0
x86-64 clang 17.0.1
x86-64 clang 18.1.0
x86-64 clang 19.1.0
x86-64 clang 20.1.0
x86-64 clang 21.1.0
x86-64 clang 3.0.0
x86-64 clang 3.1
x86-64 clang 3.2
x86-64 clang 3.3
x86-64 clang 3.4.1
x86-64 clang 3.5
x86-64 clang 3.5.1
x86-64 clang 3.5.2
x86-64 clang 3.6
x86-64 clang 3.7
x86-64 clang 3.7.1
x86-64 clang 3.8
x86-64 clang 3.8.1
x86-64 clang 3.9.0
x86-64 clang 3.9.1
x86-64 clang 4.0.0
x86-64 clang 4.0.1
x86-64 clang 5.0.0
x86-64 clang 5.0.1
x86-64 clang 5.0.2
x86-64 clang 6.0.0
x86-64 clang 6.0.1
x86-64 clang 7.0.0
x86-64 clang 7.0.1
x86-64 clang 7.1.0
x86-64 clang 8.0.0
x86-64 clang 8.0.1
x86-64 clang 9.0.0
x86-64 clang 9.0.1
x86-64 gcc (trunk)
x86-64 gcc 10.1
x86-64 gcc 10.2
x86-64 gcc 10.3
x86-64 gcc 10.3 (assertions)
x86-64 gcc 10.4
x86-64 gcc 10.4 (assertions)
x86-64 gcc 10.5
x86-64 gcc 10.5 (assertions)
x86-64 gcc 11.1
x86-64 gcc 11.1 (assertions)
x86-64 gcc 11.2
x86-64 gcc 11.2 (assertions)
x86-64 gcc 11.3
x86-64 gcc 11.3 (assertions)
x86-64 gcc 11.4
x86-64 gcc 11.4 (assertions)
x86-64 gcc 12.1
x86-64 gcc 12.1 (assertions)
x86-64 gcc 12.2
x86-64 gcc 12.2 (assertions)
x86-64 gcc 12.3
x86-64 gcc 12.3 (assertions)
x86-64 gcc 12.4
x86-64 gcc 12.4 (assertions)
x86-64 gcc 12.5
x86-64 gcc 12.5 (assertions)
x86-64 gcc 13.1
x86-64 gcc 13.1 (assertions)
x86-64 gcc 13.2
x86-64 gcc 13.2 (assertions)
x86-64 gcc 13.3
x86-64 gcc 13.3 (assertions)
x86-64 gcc 13.4
x86-64 gcc 13.4 (assertions)
x86-64 gcc 14.1
x86-64 gcc 14.1 (assertions)
x86-64 gcc 14.2
x86-64 gcc 14.2 (assertions)
x86-64 gcc 14.3
x86-64 gcc 14.3 (assertions)
x86-64 gcc 15.1
x86-64 gcc 15.1 (assertions)
x86-64 gcc 15.2
x86-64 gcc 15.2 (assertions)
x86-64 gcc 3.4.6
x86-64 gcc 4.0.4
x86-64 gcc 4.1.2
x86-64 gcc 4.4.7
x86-64 gcc 4.5.3
x86-64 gcc 4.6.4
x86-64 gcc 4.7.1
x86-64 gcc 4.7.2
x86-64 gcc 4.7.3
x86-64 gcc 4.7.4
x86-64 gcc 4.8.1
x86-64 gcc 4.8.2
x86-64 gcc 4.8.3
x86-64 gcc 4.8.4
x86-64 gcc 4.8.5
x86-64 gcc 4.9.0
x86-64 gcc 4.9.1
x86-64 gcc 4.9.2
x86-64 gcc 4.9.3
x86-64 gcc 4.9.4
x86-64 gcc 5.1
x86-64 gcc 5.2
x86-64 gcc 5.3
x86-64 gcc 5.4
x86-64 gcc 6.1
x86-64 gcc 6.2
x86-64 gcc 6.3
x86-64 gcc 6.5
x86-64 gcc 7.1
x86-64 gcc 7.2
x86-64 gcc 7.3
x86-64 gcc 7.4
x86-64 gcc 7.5
x86-64 gcc 8.1
x86-64 gcc 8.2
x86-64 gcc 8.3
x86-64 gcc 8.4
x86-64 gcc 8.5
x86-64 gcc 9.1
x86-64 gcc 9.2
x86-64 gcc 9.3
x86-64 gcc 9.4
x86-64 gcc 9.5
x86-64 icc 13.0.1
x86-64 icc 16.0.3
x86-64 icc 17.0.0
x86-64 icc 18.0.0
x86-64 icc 19.0.0
x86-64 icc 19.0.1
x86-64 icc 2021.1.2
x86-64 icc 2021.10.0
x86-64 icc 2021.2.0
x86-64 icc 2021.3.0
x86-64 icc 2021.4.0
x86-64 icc 2021.5.0
x86-64 icc 2021.6.0
x86-64 icc 2021.7.0
x86-64 icc 2021.7.1
x86-64 icc 2021.8.0
x86-64 icc 2021.9.0
x86-64 icx (latest)
x86-64 icx 2021.1.2
x86-64 icx 2021.2.0
x86-64 icx 2021.3.0
x86-64 icx 2021.4.0
x86-64 icx 2022.0.0
x86-64 icx 2022.1.0
x86-64 icx 2022.2.0
x86-64 icx 2022.2.1
x86-64 icx 2023.0.0
x86-64 icx 2023.1.0
x86-64 icx 2024.0.0
x86_64 CompCert 3.10
x86_64 CompCert 3.11
x86_64 CompCert 3.12
x86_64 CompCert 3.9
z180-clang 15.0.0
z180-clang 15.0.7
z80-clang 15.0.0
z80-clang 15.0.7
z88dk 2.2
zig cc 0.10.0
zig cc 0.11.0
zig cc 0.12.0
zig cc 0.12.1
zig cc 0.13.0
zig cc 0.14.0
zig cc 0.14.1
zig cc 0.15.1
zig cc 0.6.0
zig cc 0.7.0
zig cc 0.7.1
zig cc 0.8.0
zig cc 0.9.0
zig cc trunk
Options
Source code
// Source: https://github.com/rui314/chibicc #define _POSIX_C_SOURCE 200809L #include <assert.h> #include <ctype.h> #include <errno.h> #include <glob.h> #include <libgen.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <stdnoreturn.h> #include <string.h> #include <strings.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> #define MAX(x, y) ((x) < (y) ? (y) : (x)) #define MIN(x, y) ((x) < (y) ? (x) : (y)) #ifndef __GNUC__ # define __attribute__(x) #endif typedef struct Type Type; typedef struct Node Node; typedef struct Member Member; typedef struct Relocation Relocation; typedef struct Hideset Hideset; // // strings.c // typedef struct { char **data; int capacity; int len; } StringArray; void strarray_push(StringArray *arr, char *s); char *format(char *fmt, ...) __attribute__((format(printf, 1, 2))); // // tokenize.c // // Token typedef enum { TK_IDENT, // Identifiers TK_PUNCT, // Punctuators TK_KEYWORD, // Keywords TK_STR, // String literals TK_NUM, // Numeric literals TK_PP_NUM, // Preprocessing numbers TK_EOF, // End-of-file markers } TokenKind; typedef struct { char *name; int file_no; char *contents; // For #line directive char *display_name; int line_delta; } File; // Token type typedef struct Token Token; struct Token { TokenKind kind; // Token kind Token *next; // Next token int64_t val; // If kind is TK_NUM, its value long double fval; // If kind is TK_NUM, its value char *loc; // Token location int len; // Token length Type *ty; // Used if TK_NUM or TK_STR char *str; // String literal contents including terminating '\0' File *file; // Source location char *filename; // Filename int line_no; // Line number int line_delta; // Line number bool at_bol; // True if this token is at beginning of line bool has_space; // True if this token follows a space character Hideset *hideset; // For macro expansion Token *origin; // If this is expanded from a macro, the original token }; noreturn void error(char *fmt, ...) __attribute__((format(printf, 1, 2))); noreturn void error_at(char *loc, char *fmt, ...) __attribute__((format(printf, 2, 3))); noreturn void error_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3))); void warn_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3))); bool equal(Token *tok, char *op); Token *skip(Token *tok, char *op); bool consume(Token **rest, Token *tok, char *str); void convert_pp_tokens(Token *tok); File **get_input_files(void); File *new_file(char *name, int file_no, char *contents); Token *tokenize_string_literal(Token *tok, Type *basety); Token *tokenize(File *file); Token *tokenize_file(char *filename); #define unreachable() \ error("internal error at %s:%d", __FILE__, __LINE__) // // preprocess.c // char *search_include_paths(char *filename); void init_macros(void); void define_macro(char *name, char *buf); void undef_macro(char *name); Token *preprocess(Token *tok); // // parse.c // // Variable or function typedef struct Obj Obj; struct Obj { Obj *next; char *name; // Variable name Type *ty; // Type Token *tok; // representative token bool is_local; // local or global/function int align; // alignment // Local variable int offset; // Global variable or function bool is_function; bool is_definition; bool is_static; // Global variable bool is_tentative; bool is_tls; char *init_data; Relocation *rel; // Function bool is_inline; Obj *params; Node *body; Obj *locals; Obj *va_area; Obj *alloca_bottom; int stack_size; // Static inline function bool is_live; bool is_root; StringArray refs; }; // Global variable can be initialized either by a constant expression // or a pointer to another global variable. This struct represents the // latter. typedef struct Relocation Relocation; struct Relocation { Relocation *next; int offset; char **label; long addend; }; // AST node typedef enum { ND_NULL_EXPR, // Do nothing ND_ADD, // + ND_SUB, // - ND_MUL, // * ND_DIV, // / ND_NEG, // unary - ND_MOD, // % ND_BITAND, // & ND_BITOR, // | ND_BITXOR, // ^ ND_SHL, // << ND_SHR, // >> ND_EQ, // == ND_NE, // != ND_LT, // < ND_LE, // <= ND_ASSIGN, // = ND_COND, // ?: ND_COMMA, // , ND_MEMBER, // . (struct member access) ND_ADDR, // unary & ND_DEREF, // unary * ND_NOT, // ! ND_BITNOT, // ~ ND_LOGAND, // && ND_LOGOR, // || ND_RETURN, // "return" ND_IF, // "if" ND_FOR, // "for" or "while" ND_DO, // "do" ND_SWITCH, // "switch" ND_CASE, // "case" ND_BLOCK, // { ... } ND_GOTO, // "goto" ND_GOTO_EXPR, // "goto" labels-as-values ND_LABEL, // Labeled statement ND_LABEL_VAL, // [GNU] Labels-as-values ND_FUNCALL, // Function call ND_EXPR_STMT, // Expression statement ND_STMT_EXPR, // Statement expression ND_VAR, // Variable ND_VLA_PTR, // VLA designator ND_NUM, // Integer ND_CAST, // Type cast ND_MEMZERO, // Zero-clear a stack variable ND_ASM, // "asm" ND_CAS, // Atomic compare-and-swap ND_EXCH, // Atomic exchange } NodeKind; // AST node type struct Node { NodeKind kind; // Node kind Node *next; // Next node Type *ty; // Type, e.g. int or pointer to int Token *tok; // Representative token Node *lhs; // Left-hand side Node *rhs; // Right-hand side // "if" or "for" statement Node *cond; Node *then; Node *els; Node *init; Node *inc; // "break" and "continue" labels char *brk_label; char *cont_label; // Block or statement expression Node *body; // Struct member access Member *member; // Function call Type *func_ty; Node *args; bool pass_by_stack; Obj *ret_buffer; // Goto or labeled statement, or labels-as-values char *label; char *unique_label; Node *goto_next; // Switch Node *case_next; Node *default_case; // Case long begin; long end; // "asm" string literal char *asm_str; // Atomic compare-and-swap Node *cas_addr; Node *cas_old; Node *cas_new; // Atomic op= operators Obj *atomic_addr; Node *atomic_expr; // Variable Obj *var; // Numeric literal int64_t val; long double fval; }; Node *new_cast(Node *expr, Type *ty); int64_t const_expr(Token **rest, Token *tok); Obj *parse(Token *tok); // // type.c // typedef enum { TY_VOID, TY_BOOL, TY_CHAR, TY_SHORT, TY_INT, TY_LONG, TY_FLOAT, TY_DOUBLE, TY_LDOUBLE, TY_ENUM, TY_PTR, TY_FUNC, TY_ARRAY, TY_VLA, // variable-length array TY_STRUCT, TY_UNION, } TypeKind; struct Type { TypeKind kind; int size; // sizeof() value int align; // alignment bool is_unsigned; // unsigned or signed bool is_atomic; // true if _Atomic Type *origin; // for type compatibility check // Pointer-to or array-of type. We intentionally use the same member // to represent pointer/array duality in C. // // In many contexts in which a pointer is expected, we examine this // member instead of "kind" member to determine whether a type is a // pointer or not. That means in many contexts "array of T" is // naturally handled as if it were "pointer to T", as required by // the C spec. Type *base; // Declaration Token *name; Token *name_pos; // Array int array_len; // Variable-length array Node *vla_len; // # of elements Obj *vla_size; // sizeof() value // Struct Member *members; bool is_flexible; bool is_packed; // Function type Type *return_ty; Type *params; bool is_variadic; Type *next; }; // Struct member struct Member { Member *next; Type *ty; Token *tok; // for error message Token *name; int idx; int align; int offset; // Bitfield bool is_bitfield; int bit_offset; int bit_width; }; extern Type *ty_void; extern Type *ty_bool; extern Type *ty_char; extern Type *ty_short; extern Type *ty_int; extern Type *ty_long; extern Type *ty_uchar; extern Type *ty_ushort; extern Type *ty_uint; extern Type *ty_ulong; extern Type *ty_float; extern Type *ty_double; extern Type *ty_ldouble; bool is_integer(Type *ty); bool is_flonum(Type *ty); bool is_numeric(Type *ty); bool is_compatible(Type *t1, Type *t2); Type *copy_type(Type *ty); Type *pointer_to(Type *base); Type *func_type(Type *return_ty); Type *array_of(Type *base, int size); Type *vla_of(Type *base, Node *expr); Type *enum_type(void); Type *struct_type(void); void add_type(Node *node); // // codegen.c // void codegen(Obj *prog, FILE *out); int align_to(int n, int align); // // unicode.c // int encode_utf8(char *buf, uint32_t c); uint32_t decode_utf8(char **new_pos, char *p); bool is_ident1(uint32_t c); bool is_ident2(uint32_t c); int display_width(char *p, int len); // // hashmap.c // typedef struct { char *key; int keylen; void *val; } HashEntry; typedef struct { HashEntry *buckets; int capacity; int used; } HashMap; void *hashmap_get(HashMap *map, char *key); void *hashmap_get2(HashMap *map, char *key, int keylen); void hashmap_put(HashMap *map, char *key, void *val); void hashmap_put2(HashMap *map, char *key, int keylen, void *val); void hashmap_delete(HashMap *map, char *key); void hashmap_delete2(HashMap *map, char *key, int keylen); void hashmap_test(void); // // main.c // bool file_exists(char *path); extern StringArray include_paths; extern bool opt_fpic; extern bool opt_fcommon; extern char *base_file; // BEG: codegen #define GP_MAX 6 #define FP_MAX 8 static FILE *output_file; static int depth; static char *argreg8[] = {"%dil", "%sil", "%dl", "%cl", "%r8b", "%r9b"}; static char *argreg16[] = {"%di", "%si", "%dx", "%cx", "%r8w", "%r9w"}; static char *argreg32[] = {"%edi", "%esi", "%edx", "%ecx", "%r8d", "%r9d"}; static char *argreg64[] = {"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"}; static Obj *current_fn; static void gen_expr(Node *node); static void gen_stmt(Node *node); __attribute__((format(printf, 1, 2))) static void println(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(output_file, fmt, ap); va_end(ap); fprintf(output_file, "\n"); } static int count(void) { static int i = 1; return i++; } static void push(void) { println(" push %%rax"); depth++; } static void pop(char *arg) { println(" pop %s", arg); depth--; } static void pushf(void) { println(" sub $8, %%rsp"); println(" movsd %%xmm0, (%%rsp)"); depth++; } static void popf(int reg) { println(" movsd (%%rsp), %%xmm%d", reg); println(" add $8, %%rsp"); depth--; } // Round up `n` to the nearest multiple of `align`. For instance, // align_to(5, 8) returns 8 and align_to(11, 8) returns 16. int align_to(int n, int align) { return (n + align - 1) / align * align; } static char *reg_dx(int sz) { switch (sz) { case 1: return "%dl"; case 2: return "%dx"; case 4: return "%edx"; case 8: return "%rdx"; } unreachable(); } static char *reg_ax(int sz) { switch (sz) { case 1: return "%al"; case 2: return "%ax"; case 4: return "%eax"; case 8: return "%rax"; } unreachable(); } // Compute the absolute address of a given node. // It's an error if a given node does not reside in memory. static void gen_addr(Node *node) { switch (node->kind) { case ND_VAR: // Variable-length array, which is always local. if (node->var->ty->kind == TY_VLA) { println(" mov %d(%%rbp), %%rax", node->var->offset); return; } // Local variable if (node->var->is_local) { println(" lea %d(%%rbp), %%rax", node->var->offset); return; } if (opt_fpic) { // Thread-local variable if (node->var->is_tls) { println(" data16 lea %s@tlsgd(%%rip), %%rdi", node->var->name); println(" .value 0x6666"); println(" rex64"); println(" call __tls_get_addr@PLT"); return; } // Function or global variable println(" mov %s@GOTPCREL(%%rip), %%rax", node->var->name); return; } // Thread-local variable if (node->var->is_tls) { println(" mov %%fs:0, %%rax"); println(" add $%s@tpoff, %%rax", node->var->name); return; } // Here, we generate an absolute address of a function or a global // variable. Even though they exist at a certain address at runtime, // their addresses are not known at link-time for the following // two reasons. // // - Address randomization: Executables are loaded to memory as a // whole but it is not known what address they are loaded to. // Therefore, at link-time, relative address in the same // exectuable (i.e. the distance between two functions in the // same executable) is known, but the absolute address is not // known. // // - Dynamic linking: Dynamic shared objects (DSOs) or .so files // are loaded to memory alongside an executable at runtime and // linked by the runtime loader in memory. We know nothing // about addresses of global stuff that may be defined by DSOs // until the runtime relocation is complete. // // In order to deal with the former case, we use RIP-relative // addressing, denoted by `(%rip)`. For the latter, we obtain an // address of a stuff that may be in a shared object file from the // Global Offset Table using `@GOTPCREL(%rip)` notation. // Function if (node->ty->kind == TY_FUNC) { if (node->var->is_definition) println(" lea %s(%%rip), %%rax", node->var->name); else println(" mov %s@GOTPCREL(%%rip), %%rax", node->var->name); return; } // Global variable println(" lea %s(%%rip), %%rax", node->var->name); return; case ND_DEREF: gen_expr(node->lhs); return; case ND_COMMA: gen_expr(node->lhs); gen_addr(node->rhs); return; case ND_MEMBER: gen_addr(node->lhs); println(" add $%d, %%rax", node->member->offset); return; case ND_FUNCALL: if (node->ret_buffer) { gen_expr(node); return; } break; case ND_ASSIGN: case ND_COND: if (node->ty->kind == TY_STRUCT || node->ty->kind == TY_UNION) { gen_expr(node); return; } break; case ND_VLA_PTR: println(" lea %d(%%rbp), %%rax", node->var->offset); return; } error_tok(node->tok, "not an lvalue"); } // Load a value from where %rax is pointing to. static void load(Type *ty) { switch (ty->kind) { case TY_ARRAY: case TY_STRUCT: case TY_UNION: case TY_FUNC: case TY_VLA: // If it is an array, do not attempt to load a value to the // register because in general we can't load an entire array to a // register. As a result, the result of an evaluation of an array // becomes not the array itself but the address of the array. // This is where "array is automatically converted to a pointer to // the first element of the array in C" occurs. return; case TY_FLOAT: println(" movss (%%rax), %%xmm0"); return; case TY_DOUBLE: println(" movsd (%%rax), %%xmm0"); return; case TY_LDOUBLE: println(" fldt (%%rax)"); return; } char *insn = ty->is_unsigned ? "movz" : "movs"; // When we load a char or a short value to a register, we always // extend them to the size of int, so we can assume the lower half of // a register always contains a valid value. The upper half of a // register for char, short and int may contain garbage. When we load // a long value to a register, it simply occupies the entire register. if (ty->size == 1) println(" %sbl (%%rax), %%eax", insn); else if (ty->size == 2) println(" %swl (%%rax), %%eax", insn); else if (ty->size == 4) println(" movsxd (%%rax), %%rax"); else println(" mov (%%rax), %%rax"); } // Store %rax to an address that the stack top is pointing to. static void store(Type *ty) { pop("%rdi"); switch (ty->kind) { case TY_STRUCT: case TY_UNION: for (int i = 0; i < ty->size; i++) { println(" mov %d(%%rax), %%r8b", i); println(" mov %%r8b, %d(%%rdi)", i); } return; case TY_FLOAT: println(" movss %%xmm0, (%%rdi)"); return; case TY_DOUBLE: println(" movsd %%xmm0, (%%rdi)"); return; case TY_LDOUBLE: println(" fstpt (%%rdi)"); return; } if (ty->size == 1) println(" mov %%al, (%%rdi)"); else if (ty->size == 2) println(" mov %%ax, (%%rdi)"); else if (ty->size == 4) println(" mov %%eax, (%%rdi)"); else println(" mov %%rax, (%%rdi)"); } static void cmp_zero(Type *ty) { switch (ty->kind) { case TY_FLOAT: println(" xorps %%xmm1, %%xmm1"); println(" ucomiss %%xmm1, %%xmm0"); return; case TY_DOUBLE: println(" xorpd %%xmm1, %%xmm1"); println(" ucomisd %%xmm1, %%xmm0"); return; case TY_LDOUBLE: println(" fldz"); println(" fucomip"); println(" fstp %%st(0)"); return; } if (is_integer(ty) && ty->size <= 4) println(" cmp $0, %%eax"); else println(" cmp $0, %%rax"); } enum { I8, I16, I32, I64, U8, U16, U32, U64, F32, F64, F80 }; static int getTypeId(Type *ty) { switch (ty->kind) { case TY_CHAR: return ty->is_unsigned ? U8 : I8; case TY_SHORT: return ty->is_unsigned ? U16 : I16; case TY_INT: return ty->is_unsigned ? U32 : I32; case TY_LONG: return ty->is_unsigned ? U64 : I64; case TY_FLOAT: return F32; case TY_DOUBLE: return F64; case TY_LDOUBLE: return F80; } return U64; } // The table for type casts static char i32i8[] = "movsbl %al, %eax"; static char i32u8[] = "movzbl %al, %eax"; static char i32i16[] = "movswl %ax, %eax"; static char i32u16[] = "movzwl %ax, %eax"; static char i32f32[] = "cvtsi2ssl %eax, %xmm0"; static char i32i64[] = "movsxd %eax, %rax"; static char i32f64[] = "cvtsi2sdl %eax, %xmm0"; static char i32f80[] = "mov %eax, -4(%rsp); fildl -4(%rsp)"; static char u32f32[] = "mov %eax, %eax; cvtsi2ssq %rax, %xmm0"; static char u32i64[] = "mov %eax, %eax"; static char u32f64[] = "mov %eax, %eax; cvtsi2sdq %rax, %xmm0"; static char u32f80[] = "mov %eax, %eax; mov %rax, -8(%rsp); fildll -8(%rsp)"; static char i64f32[] = "cvtsi2ssq %rax, %xmm0"; static char i64f64[] = "cvtsi2sdq %rax, %xmm0"; static char i64f80[] = "movq %rax, -8(%rsp); fildll -8(%rsp)"; static char u64f32[] = "cvtsi2ssq %rax, %xmm0"; static char u64f64[] = "test %rax,%rax; js 1f; pxor %xmm0,%xmm0; cvtsi2sd %rax,%xmm0; jmp 2f; " "1: mov %rax,%rdi; and $1,%eax; pxor %xmm0,%xmm0; shr %rdi; " "or %rax,%rdi; cvtsi2sd %rdi,%xmm0; addsd %xmm0,%xmm0; 2:"; static char u64f80[] = "mov %rax, -8(%rsp); fildq -8(%rsp); test %rax, %rax; jns 1f;" "mov $1602224128, %eax; mov %eax, -4(%rsp); fadds -4(%rsp); 1:"; static char f32i8[] = "cvttss2sil %xmm0, %eax; movsbl %al, %eax"; static char f32u8[] = "cvttss2sil %xmm0, %eax; movzbl %al, %eax"; static char f32i16[] = "cvttss2sil %xmm0, %eax; movswl %ax, %eax"; static char f32u16[] = "cvttss2sil %xmm0, %eax; movzwl %ax, %eax"; static char f32i32[] = "cvttss2sil %xmm0, %eax"; static char f32u32[] = "cvttss2siq %xmm0, %rax"; static char f32i64[] = "cvttss2siq %xmm0, %rax"; static char f32u64[] = "cvttss2siq %xmm0, %rax"; static char f32f64[] = "cvtss2sd %xmm0, %xmm0"; static char f32f80[] = "movss %xmm0, -4(%rsp); flds -4(%rsp)"; static char f64i8[] = "cvttsd2sil %xmm0, %eax; movsbl %al, %eax"; static char f64u8[] = "cvttsd2sil %xmm0, %eax; movzbl %al, %eax"; static char f64i16[] = "cvttsd2sil %xmm0, %eax; movswl %ax, %eax"; static char f64u16[] = "cvttsd2sil %xmm0, %eax; movzwl %ax, %eax"; static char f64i32[] = "cvttsd2sil %xmm0, %eax"; static char f64u32[] = "cvttsd2siq %xmm0, %rax"; static char f64i64[] = "cvttsd2siq %xmm0, %rax"; static char f64u64[] = "cvttsd2siq %xmm0, %rax"; static char f64f32[] = "cvtsd2ss %xmm0, %xmm0"; static char f64f80[] = "movsd %xmm0, -8(%rsp); fldl -8(%rsp)"; #define FROM_F80_1 \ "fnstcw -10(%rsp); movzwl -10(%rsp), %eax; or $12, %ah; " \ "mov %ax, -12(%rsp); fldcw -12(%rsp); " #define FROM_F80_2 " -24(%rsp); fldcw -10(%rsp); " static char f80i8[] = FROM_F80_1 "fistps" FROM_F80_2 "movsbl -24(%rsp), %eax"; static char f80u8[] = FROM_F80_1 "fistps" FROM_F80_2 "movzbl -24(%rsp), %eax"; static char f80i16[] = FROM_F80_1 "fistps" FROM_F80_2 "movzbl -24(%rsp), %eax"; static char f80u16[] = FROM_F80_1 "fistpl" FROM_F80_2 "movswl -24(%rsp), %eax"; static char f80i32[] = FROM_F80_1 "fistpl" FROM_F80_2 "mov -24(%rsp), %eax"; static char f80u32[] = FROM_F80_1 "fistpl" FROM_F80_2 "mov -24(%rsp), %eax"; static char f80i64[] = FROM_F80_1 "fistpq" FROM_F80_2 "mov -24(%rsp), %rax"; static char f80u64[] = FROM_F80_1 "fistpq" FROM_F80_2 "mov -24(%rsp), %rax"; static char f80f32[] = "fstps -8(%rsp); movss -8(%rsp), %xmm0"; static char f80f64[] = "fstpl -8(%rsp); movsd -8(%rsp), %xmm0"; static char *cast_table[][11] = { // i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 f80 {NULL, NULL, NULL, i32i64, i32u8, i32u16, NULL, i32i64, i32f32, i32f64, i32f80}, // i8 {i32i8, NULL, NULL, i32i64, i32u8, i32u16, NULL, i32i64, i32f32, i32f64, i32f80}, // i16 {i32i8, i32i16, NULL, i32i64, i32u8, i32u16, NULL, i32i64, i32f32, i32f64, i32f80}, // i32 {i32i8, i32i16, NULL, NULL, i32u8, i32u16, NULL, NULL, i64f32, i64f64, i64f80}, // i64 {i32i8, NULL, NULL, i32i64, NULL, NULL, NULL, i32i64, i32f32, i32f64, i32f80}, // u8 {i32i8, i32i16, NULL, i32i64, i32u8, NULL, NULL, i32i64, i32f32, i32f64, i32f80}, // u16 {i32i8, i32i16, NULL, u32i64, i32u8, i32u16, NULL, u32i64, u32f32, u32f64, u32f80}, // u32 {i32i8, i32i16, NULL, NULL, i32u8, i32u16, NULL, NULL, u64f32, u64f64, u64f80}, // u64 {f32i8, f32i16, f32i32, f32i64, f32u8, f32u16, f32u32, f32u64, NULL, f32f64, f32f80}, // f32 {f64i8, f64i16, f64i32, f64i64, f64u8, f64u16, f64u32, f64u64, f64f32, NULL, f64f80}, // f64 {f80i8, f80i16, f80i32, f80i64, f80u8, f80u16, f80u32, f80u64, f80f32, f80f64, NULL}, // f80 }; static void castT(Type *from, Type *to) { if (to->kind == TY_VOID) return; if (to->kind == TY_BOOL) { cmp_zero(from); println(" setne %%al"); println(" movzx %%al, %%eax"); return; } int t1 = getTypeId(from); int t2 = getTypeId(to); if (cast_table[t1][t2]) println(" %s", cast_table[t1][t2]); } // Structs or unions equal or smaller than 16 bytes are passed // using up to two registers. // // If the first 8 bytes contains only floating-point type members, // they are passed in an XMM register. Otherwise, they are passed // in a general-purpose register. // // If a struct/union is larger than 8 bytes, the same rule is // applied to the the next 8 byte chunk. // // This function returns true if `ty` has only floating-point // members in its byte range [lo, hi). static bool has_flonum(Type *ty, int lo, int hi, int offset) { if (ty->kind == TY_STRUCT || ty->kind == TY_UNION) { for (Member *mem = ty->members; mem; mem = mem->next) if (!has_flonum(mem->ty, lo, hi, offset + mem->offset)) return false; return true; } if (ty->kind == TY_ARRAY) { for (int i = 0; i < ty->array_len; i++) if (!has_flonum(ty->base, lo, hi, offset + ty->base->size * i)) return false; return true; } return offset < lo || hi <= offset || ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE; } static bool has_flonum1(Type *ty) { return has_flonum(ty, 0, 8, 0); } static bool has_flonum2(Type *ty) { return has_flonum(ty, 8, 16, 0); } static void push_struct(Type *ty) { int sz = align_to(ty->size, 8); println(" sub $%d, %%rsp", sz); depth += sz / 8; for (int i = 0; i < ty->size; i++) { println(" mov %d(%%rax), %%r10b", i); println(" mov %%r10b, %d(%%rsp)", i); } } static void push_args2(Node *args, bool first_pass) { if (!args) return; push_args2(args->next, first_pass); if ((first_pass && !args->pass_by_stack) || (!first_pass && args->pass_by_stack)) return; gen_expr(args); switch (args->ty->kind) { case TY_STRUCT: case TY_UNION: push_struct(args->ty); break; case TY_FLOAT: case TY_DOUBLE: pushf(); break; case TY_LDOUBLE: println(" sub $16, %%rsp"); println(" fstpt (%%rsp)"); depth += 2; break; default: push(); } } // Load function call arguments. Arguments are already evaluated and // stored to the stack as local variables. What we need to do in this // function is to load them to registers or push them to the stack as // specified by the x86-64 psABI. Here is what the spec says: // // - Up to 6 arguments of integral type are passed using RDI, RSI, // RDX, RCX, R8 and R9. // // - Up to 8 arguments of floating-point type are passed using XMM0 to // XMM7. // // - If all registers of an appropriate type are already used, push an // argument to the stack in the right-to-left order. // // - Each argument passed on the stack takes 8 bytes, and the end of // the argument area must be aligned to a 16 byte boundary. // // - If a function is variadic, set the number of floating-point type // arguments to RAX. static int push_args(Node *node) { int stack = 0, gp = 0, fp = 0; // If the return type is a large struct/union, the caller passes // a pointer to a buffer as if it were the first argument. if (node->ret_buffer && node->ty->size > 16) gp++; // Load as many arguments to the registers as possible. for (Node *arg = node->args; arg; arg = arg->next) { Type *ty = arg->ty; switch (ty->kind) { case TY_STRUCT: case TY_UNION: if (ty->size > 16) { arg->pass_by_stack = true; stack += align_to(ty->size, 8) / 8; } else { bool fp1 = has_flonum1(ty); bool fp2 = has_flonum2(ty); if (fp + fp1 + fp2 < FP_MAX && gp + !fp1 + !fp2 < GP_MAX) { fp = fp + fp1 + fp2; gp = gp + !fp1 + !fp2; } else { arg->pass_by_stack = true; stack += align_to(ty->size, 8) / 8; } } break; case TY_FLOAT: case TY_DOUBLE: if (fp++ >= FP_MAX) { arg->pass_by_stack = true; stack++; } break; case TY_LDOUBLE: arg->pass_by_stack = true; stack += 2; break; default: if (gp++ >= GP_MAX) { arg->pass_by_stack = true; stack++; } } } if ((depth + stack) % 2 == 1) { println(" sub $8, %%rsp"); depth++; stack++; } push_args2(node->args, true); push_args2(node->args, false); // If the return type is a large struct/union, the caller passes // a pointer to a buffer as if it were the first argument. if (node->ret_buffer && node->ty->size > 16) { println(" lea %d(%%rbp), %%rax", node->ret_buffer->offset); push(); } return stack; } static void copy_ret_buffer(Obj *var) { Type *ty = var->ty; int gp = 0, fp = 0; if (has_flonum1(ty)) { assert(ty->size == 4 || 8 <= ty->size); if (ty->size == 4) println(" movss %%xmm0, %d(%%rbp)", var->offset); else println(" movsd %%xmm0, %d(%%rbp)", var->offset); fp++; } else { for (int i = 0; i < MIN(8, ty->size); i++) { println(" mov %%al, %d(%%rbp)", var->offset + i); println(" shr $8, %%rax"); } gp++; } if (ty->size > 8) { if (has_flonum2(ty)) { assert(ty->size == 12 || ty->size == 16); if (ty->size == 12) println(" movss %%xmm%d, %d(%%rbp)", fp, var->offset + 8); else println(" movsd %%xmm%d, %d(%%rbp)", fp, var->offset + 8); } else { char *reg1 = (gp == 0) ? "%al" : "%dl"; char *reg2 = (gp == 0) ? "%rax" : "%rdx"; for (int i = 8; i < MIN(16, ty->size); i++) { println(" mov %s, %d(%%rbp)", reg1, var->offset + i); println(" shr $8, %s", reg2); } } } } static void copy_struct_reg(void) { Type *ty = current_fn->ty->return_ty; int gp = 0, fp = 0; println(" mov %%rax, %%rdi"); if (has_flonum(ty, 0, 8, 0)) { assert(ty->size == 4 || 8 <= ty->size); if (ty->size == 4) println(" movss (%%rdi), %%xmm0"); else println(" movsd (%%rdi), %%xmm0"); fp++; } else { println(" mov $0, %%rax"); for (int i = MIN(8, ty->size) - 1; i >= 0; i--) { println(" shl $8, %%rax"); println(" mov %d(%%rdi), %%al", i); } gp++; } if (ty->size > 8) { if (has_flonum(ty, 8, 16, 0)) { assert(ty->size == 12 || ty->size == 16); if (ty->size == 4) println(" movss 8(%%rdi), %%xmm%d", fp); else println(" movsd 8(%%rdi), %%xmm%d", fp); } else { char *reg1 = (gp == 0) ? "%al" : "%dl"; char *reg2 = (gp == 0) ? "%rax" : "%rdx"; println(" mov $0, %s", reg2); for (int i = MIN(16, ty->size) - 1; i >= 8; i--) { println(" shl $8, %s", reg2); println(" mov %d(%%rdi), %s", i, reg1); } } } } static void copy_struct_mem(void) { Type *ty = current_fn->ty->return_ty; Obj *var = current_fn->params; println(" mov %d(%%rbp), %%rdi", var->offset); for (int i = 0; i < ty->size; i++) { println(" mov %d(%%rax), %%dl", i); println(" mov %%dl, %d(%%rdi)", i); } } static void builtin_allocaf(void) { // Align size to 16 bytes. println(" add $15, %%rdi"); println(" and $0xfffffff0, %%edi"); // Shift the temporary area by %rdi. println(" mov %d(%%rbp), %%rcx", current_fn->alloca_bottom->offset); println(" sub %%rsp, %%rcx"); println(" mov %%rsp, %%rax"); println(" sub %%rdi, %%rsp"); println(" mov %%rsp, %%rdx"); println("1:"); println(" cmp $0, %%rcx"); println(" je 2f"); println(" mov (%%rax), %%r8b"); println(" mov %%r8b, (%%rdx)"); println(" inc %%rdx"); println(" inc %%rax"); println(" dec %%rcx"); println(" jmp 1b"); println("2:"); // Move alloca_bottom pointer. println(" mov %d(%%rbp), %%rax", current_fn->alloca_bottom->offset); println(" sub %%rdi, %%rax"); println(" mov %%rax, %d(%%rbp)", current_fn->alloca_bottom->offset); } // Generate code for a given node. static void gen_expr(Node *node) { println(" .loc %d %d", node->tok->file->file_no, node->tok->line_no); switch (node->kind) { case ND_NULL_EXPR: return; case ND_NUM: { switch (node->ty->kind) { case TY_FLOAT: { union { float f32; uint32_t u32; } u = { node->fval }; println(" mov $%u, %%eax # float %Lf", u.u32, node->fval); println(" movq %%rax, %%xmm0"); return; } case TY_DOUBLE: { union { double f64; uint64_t u64; } u = { node->fval }; println(" mov $%lu, %%rax # double %Lf", u.u64, node->fval); println(" movq %%rax, %%xmm0"); return; } case TY_LDOUBLE: { union { long double f80; uint64_t u64[2]; } u; memset(&u, 0, sizeof(u)); u.f80 = node->fval; println(" mov $%lu, %%rax # long double %Lf", u.u64[0], node->fval); println(" mov %%rax, -16(%%rsp)"); println(" mov $%lu, %%rax", u.u64[1]); println(" mov %%rax, -8(%%rsp)"); println(" fldt -16(%%rsp)"); return; } } println(" mov $%ld, %%rax", node->val); return; } case ND_NEG: gen_expr(node->lhs); switch (node->ty->kind) { case TY_FLOAT: println(" mov $1, %%rax"); println(" shl $31, %%rax"); println(" movq %%rax, %%xmm1"); println(" xorps %%xmm1, %%xmm0"); return; case TY_DOUBLE: println(" mov $1, %%rax"); println(" shl $63, %%rax"); println(" movq %%rax, %%xmm1"); println(" xorpd %%xmm1, %%xmm0"); return; case TY_LDOUBLE: println(" fchs"); return; } println(" neg %%rax"); return; case ND_VAR: gen_addr(node); load(node->ty); return; case ND_MEMBER: { gen_addr(node); load(node->ty); Member *mem = node->member; if (mem->is_bitfield) { println(" shl $%d, %%rax", 64 - mem->bit_width - mem->bit_offset); if (mem->ty->is_unsigned) println(" shr $%d, %%rax", 64 - mem->bit_width); else println(" sar $%d, %%rax", 64 - mem->bit_width); } return; } case ND_DEREF: gen_expr(node->lhs); load(node->ty); return; case ND_ADDR: gen_addr(node->lhs); return; case ND_ASSIGN: gen_addr(node->lhs); push(); gen_expr(node->rhs); if (node->lhs->kind == ND_MEMBER && node->lhs->member->is_bitfield) { println(" mov %%rax, %%r8"); // If the lhs is a bitfield, we need to read the current value // from memory and merge it with a new value. Member *mem = node->lhs->member; println(" mov %%rax, %%rdi"); println(" and $%ld, %%rdi", (1L << mem->bit_width) - 1); println(" shl $%d, %%rdi", mem->bit_offset); println(" mov (%%rsp), %%rax"); load(mem->ty); long mask = ((1L << mem->bit_width) - 1) << mem->bit_offset; println(" mov $%ld, %%r9", ~mask); println(" and %%r9, %%rax"); println(" or %%rdi, %%rax"); store(node->ty); println(" mov %%r8, %%rax"); return; } store(node->ty); return; case ND_STMT_EXPR: for (Node *n = node->body; n; n = n->next) gen_stmt(n); return; case ND_COMMA: gen_expr(node->lhs); gen_expr(node->rhs); return; case ND_CAST: gen_expr(node->lhs); castT(node->lhs->ty, node->ty); return; case ND_MEMZERO: // `rep stosb` is equivalent to `memset(%rdi, %al, %rcx)`. println(" mov $%d, %%rcx", node->var->ty->size); println(" lea %d(%%rbp), %%rdi", node->var->offset); println(" mov $0, %%al"); println(" rep stosb"); return; case ND_COND: { int c = count(); gen_expr(node->cond); cmp_zero(node->cond->ty); println(" je .L.else.%d", c); gen_expr(node->then); println(" jmp .L.end.%d", c); println(".L.else.%d:", c); gen_expr(node->els); println(".L.end.%d:", c); return; } case ND_NOT: gen_expr(node->lhs); cmp_zero(node->lhs->ty); println(" sete %%al"); println(" movzx %%al, %%rax"); return; case ND_BITNOT: gen_expr(node->lhs); println(" not %%rax"); return; case ND_LOGAND: { int c = count(); gen_expr(node->lhs); cmp_zero(node->lhs->ty); println(" je .L.false.%d", c); gen_expr(node->rhs); cmp_zero(node->rhs->ty); println(" je .L.false.%d", c); println(" mov $1, %%rax"); println(" jmp .L.end.%d", c); println(".L.false.%d:", c); println(" mov $0, %%rax"); println(".L.end.%d:", c); return; } case ND_LOGOR: { int c = count(); gen_expr(node->lhs); cmp_zero(node->lhs->ty); println(" jne .L.true.%d", c); gen_expr(node->rhs); cmp_zero(node->rhs->ty); println(" jne .L.true.%d", c); println(" mov $0, %%rax"); println(" jmp .L.end.%d", c); println(".L.true.%d:", c); println(" mov $1, %%rax"); println(".L.end.%d:", c); return; } case ND_FUNCALL: { if (node->lhs->kind == ND_VAR && !strcmp(node->lhs->var->name, "alloca")) { gen_expr(node->args); println(" mov %%rax, %%rdi"); builtin_allocaf(); return; } int stack_args = push_args(node); gen_expr(node->lhs); int gp = 0, fp = 0; // If the return type is a large struct/union, the caller passes // a pointer to a buffer as if it were the first argument. if (node->ret_buffer && node->ty->size > 16) pop(argreg64[gp++]); for (Node *arg = node->args; arg; arg = arg->next) { Type *ty = arg->ty; switch (ty->kind) { case TY_STRUCT: case TY_UNION: if (ty->size > 16) continue; bool fp1 = has_flonum1(ty); bool fp2 = has_flonum2(ty); if (fp + fp1 + fp2 < FP_MAX && gp + !fp1 + !fp2 < GP_MAX) { if (fp1) popf(fp++); else pop(argreg64[gp++]); if (ty->size > 8) { if (fp2) popf(fp++); else pop(argreg64[gp++]); } } break; case TY_FLOAT: case TY_DOUBLE: if (fp < FP_MAX) popf(fp++); break; case TY_LDOUBLE: break; default: if (gp < GP_MAX) pop(argreg64[gp++]); } } println(" mov %%rax, %%r10"); println(" mov $%d, %%rax", fp); println(" call *%%r10"); println(" add $%d, %%rsp", stack_args * 8); depth -= stack_args; // It looks like the most significant 48 or 56 bits in RAX may // contain garbage if a function return type is short or bool/char, // respectively. We clear the upper bits here. switch (node->ty->kind) { case TY_BOOL: println(" movzx %%al, %%eax"); return; case TY_CHAR: if (node->ty->is_unsigned) println(" movzbl %%al, %%eax"); else println(" movsbl %%al, %%eax"); return; case TY_SHORT: if (node->ty->is_unsigned) println(" movzwl %%ax, %%eax"); else println(" movswl %%ax, %%eax"); return; } // If the return type is a small struct, a value is returned // using up to two registers. if (node->ret_buffer && node->ty->size <= 16) { copy_ret_buffer(node->ret_buffer); println(" lea %d(%%rbp), %%rax", node->ret_buffer->offset); } return; } case ND_LABEL_VAL: println(" lea %s(%%rip), %%rax", node->unique_label); return; case ND_CAS: { gen_expr(node->cas_addr); push(); gen_expr(node->cas_new); push(); gen_expr(node->cas_old); println(" mov %%rax, %%r8"); load(node->cas_old->ty->base); pop("%rdx"); // new pop("%rdi"); // addr int sz = node->cas_addr->ty->base->size; println(" lock cmpxchg %s, (%%rdi)", reg_dx(sz)); println(" sete %%cl"); println(" je 1f"); println(" mov %s, (%%r8)", reg_ax(sz)); println("1:"); println(" movzbl %%cl, %%eax"); return; } case ND_EXCH: { gen_expr(node->lhs); push(); gen_expr(node->rhs); pop("%rdi"); int sz = node->lhs->ty->base->size; println(" xchg %s, (%%rdi)", reg_ax(sz)); return; } } switch (node->lhs->ty->kind) { case TY_FLOAT: case TY_DOUBLE: { gen_expr(node->rhs); pushf(); gen_expr(node->lhs); popf(1); char *sz = (node->lhs->ty->kind == TY_FLOAT) ? "ss" : "sd"; switch (node->kind) { case ND_ADD: println(" add%s %%xmm1, %%xmm0", sz); return; case ND_SUB: println(" sub%s %%xmm1, %%xmm0", sz); return; case ND_MUL: println(" mul%s %%xmm1, %%xmm0", sz); return; case ND_DIV: println(" div%s %%xmm1, %%xmm0", sz); return; case ND_EQ: case ND_NE: case ND_LT: case ND_LE: println(" ucomi%s %%xmm0, %%xmm1", sz); if (node->kind == ND_EQ) { println(" sete %%al"); println(" setnp %%dl"); println(" and %%dl, %%al"); } else if (node->kind == ND_NE) { println(" setne %%al"); println(" setp %%dl"); println(" or %%dl, %%al"); } else if (node->kind == ND_LT) { println(" seta %%al"); } else { println(" setae %%al"); } println(" and $1, %%al"); println(" movzb %%al, %%rax"); return; } error_tok(node->tok, "invalid expression"); } case TY_LDOUBLE: { gen_expr(node->lhs); gen_expr(node->rhs); switch (node->kind) { case ND_ADD: println(" faddp"); return; case ND_SUB: println(" fsubrp"); return; case ND_MUL: println(" fmulp"); return; case ND_DIV: println(" fdivrp"); return; case ND_EQ: case ND_NE: case ND_LT: case ND_LE: println(" fcomip"); println(" fstp %%st(0)"); if (node->kind == ND_EQ) println(" sete %%al"); else if (node->kind == ND_NE) println(" setne %%al"); else if (node->kind == ND_LT) println(" seta %%al"); else println(" setae %%al"); println(" movzb %%al, %%rax"); return; } error_tok(node->tok, "invalid expression"); } } gen_expr(node->rhs); push(); gen_expr(node->lhs); pop("%rdi"); char *ax, *di, *dx; if (node->lhs->ty->kind == TY_LONG || node->lhs->ty->base) { ax = "%rax"; di = "%rdi"; dx = "%rdx"; } else { ax = "%eax"; di = "%edi"; dx = "%edx"; } switch (node->kind) { case ND_ADD: println(" add %s, %s", di, ax); return; case ND_SUB: println(" sub %s, %s", di, ax); return; case ND_MUL: println(" imul %s, %s", di, ax); return; case ND_DIV: case ND_MOD: if (node->ty->is_unsigned) { println(" mov $0, %s", dx); println(" div %s", di); } else { if (node->lhs->ty->size == 8) println(" cqo"); else println(" cdq"); println(" idiv %s", di); } if (node->kind == ND_MOD) println(" mov %%rdx, %%rax"); return; case ND_BITAND: println(" and %s, %s", di, ax); return; case ND_BITOR: println(" or %s, %s", di, ax); return; case ND_BITXOR: println(" xor %s, %s", di, ax); return; case ND_EQ: case ND_NE: case ND_LT: case ND_LE: println(" cmp %s, %s", di, ax); if (node->kind == ND_EQ) { println(" sete %%al"); } else if (node->kind == ND_NE) { println(" setne %%al"); } else if (node->kind == ND_LT) { if (node->lhs->ty->is_unsigned) println(" setb %%al"); else println(" setl %%al"); } else if (node->kind == ND_LE) { if (node->lhs->ty->is_unsigned) println(" setbe %%al"); else println(" setle %%al"); } println(" movzb %%al, %%rax"); return; case ND_SHL: println(" mov %%rdi, %%rcx"); println(" shl %%cl, %s", ax); return; case ND_SHR: println(" mov %%rdi, %%rcx"); if (node->lhs->ty->is_unsigned) println(" shr %%cl, %s", ax); else println(" sar %%cl, %s", ax); return; } error_tok(node->tok, "invalid expression"); } static void gen_stmt(Node *node) { println(" .loc %d %d", node->tok->file->file_no, node->tok->line_no); switch (node->kind) { case ND_IF: { int c = count(); gen_expr(node->cond); cmp_zero(node->cond->ty); println(" je .L.else.%d", c); gen_stmt(node->then); println(" jmp .L.end.%d", c); println(".L.else.%d:", c); if (node->els) gen_stmt(node->els); println(".L.end.%d:", c); return; } case ND_FOR: { int c = count(); if (node->init) gen_stmt(node->init); println(".L.begin.%d:", c); if (node->cond) { gen_expr(node->cond); cmp_zero(node->cond->ty); println(" je %s", node->brk_label); } gen_stmt(node->then); println("%s:", node->cont_label); if (node->inc) gen_expr(node->inc); println(" jmp .L.begin.%d", c); println("%s:", node->brk_label); return; } case ND_DO: { int c = count(); println(".L.begin.%d:", c); gen_stmt(node->then); println("%s:", node->cont_label); gen_expr(node->cond); cmp_zero(node->cond->ty); println(" jne .L.begin.%d", c); println("%s:", node->brk_label); return; } case ND_SWITCH: gen_expr(node->cond); for (Node *n = node->case_next; n; n = n->case_next) { char *ax = (node->cond->ty->size == 8) ? "%rax" : "%eax"; char *di = (node->cond->ty->size == 8) ? "%rdi" : "%edi"; if (n->begin == n->end) { println(" cmp $%ld, %s", n->begin, ax); println(" je %s", n->label); continue; } // [GNU] Case ranges println(" mov %s, %s", ax, di); println(" sub $%ld, %s", n->begin, di); println(" cmp $%ld, %s", n->end - n->begin, di); println(" jbe %s", n->label); } if (node->default_case) println(" jmp %s", node->default_case->label); println(" jmp %s", node->brk_label); gen_stmt(node->then); println("%s:", node->brk_label); return; case ND_CASE: println("%s:", node->label); gen_stmt(node->lhs); return; case ND_BLOCK: for (Node *n = node->body; n; n = n->next) gen_stmt(n); return; case ND_GOTO: println(" jmp %s", node->unique_label); return; case ND_GOTO_EXPR: gen_expr(node->lhs); println(" jmp *%%rax"); return; case ND_LABEL: println("%s:", node->unique_label); gen_stmt(node->lhs); return; case ND_RETURN: if (node->lhs) { gen_expr(node->lhs); Type *ty = node->lhs->ty; switch (ty->kind) { case TY_STRUCT: case TY_UNION: if (ty->size <= 16) copy_struct_reg(); else copy_struct_mem(); break; } } println(" jmp .L.return.%s", current_fn->name); return; case ND_EXPR_STMT: gen_expr(node->lhs); return; case ND_ASM: println(" %s", node->asm_str); return; } error_tok(node->tok, "invalid statement"); } // Assign offsets to local variables. static void assign_lvar_offsets(Obj *prog) { for (Obj *fn = prog; fn; fn = fn->next) { if (!fn->is_function) continue; // If a function has many parameters, some parameters are // inevitably passed by stack rather than by register. // The first passed-by-stack parameter resides at RBP+16. int top = 16; int bottom = 0; int gp = 0, fp = 0; // Assign offsets to pass-by-stack parameters. for (Obj *var = fn->params; var; var = var->next) { Type *ty = var->ty; switch (ty->kind) { case TY_STRUCT: case TY_UNION: if (ty->size <= 16) { bool fp1 = has_flonum(ty, 0, 8, 0); bool fp2 = has_flonum(ty, 8, 16, 8); if (fp + fp1 + fp2 < FP_MAX && gp + !fp1 + !fp2 < GP_MAX) { fp = fp + fp1 + fp2; gp = gp + !fp1 + !fp2; continue; } } break; case TY_FLOAT: case TY_DOUBLE: if (fp++ < FP_MAX) continue; break; case TY_LDOUBLE: break; default: if (gp++ < GP_MAX) continue; } top = align_to(top, 8); var->offset = top; top += var->ty->size; } // Assign offsets to pass-by-register parameters and local variables. for (Obj *var = fn->locals; var; var = var->next) { if (var->offset) continue; // AMD64 System V ABI has a special alignment rule for an array of // length at least 16 bytes. We need to align such array to at least // 16-byte boundaries. See p.14 of // https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-draft.pdf. int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16) ? MAX(16, var->align) : var->align; bottom += var->ty->size; bottom = align_to(bottom, align); var->offset = -bottom; } fn->stack_size = align_to(bottom, 16); } } static void emit_data(Obj *prog) { for (Obj *var = prog; var; var = var->next) { if (var->is_function || !var->is_definition) continue; if (var->is_static) println(" .local %s", var->name); else println(" .globl %s", var->name); int align = (var->ty->kind == TY_ARRAY && var->ty->size >= 16) ? MAX(16, var->align) : var->align; // Common symbol if (opt_fcommon && var->is_tentative) { println(" .comm %s, %d, %d", var->name, var->ty->size, align); continue; } // .data or .tdata if (var->init_data) { if (var->is_tls) println(" .section .tdata,\"awT\",@progbits"); else println(" .data"); println(" .type %s, @object", var->name); println(" .size %s, %d", var->name, var->ty->size); println(" .align %d", align); println("%s:", var->name); Relocation *rel = var->rel; int pos = 0; while (pos < var->ty->size) { if (rel && rel->offset == pos) { println(" .quad %s%+ld", *rel->label, rel->addend); rel = rel->next; pos += 8; } else { println(" .byte %d", var->init_data[pos++]); } } continue; } // .bss or .tbss if (var->is_tls) println(" .section .tbss,\"awT\",@nobits"); else println(" .bss"); println(" .align %d", align); println("%s:", var->name); println(" .zero %d", var->ty->size); } } static void store_fp(int r, int offset, int sz) { switch (sz) { case 4: println(" movss %%xmm%d, %d(%%rbp)", r, offset); return; case 8: println(" movsd %%xmm%d, %d(%%rbp)", r, offset); return; } unreachable(); } static void store_gp(int r, int offset, int sz) { switch (sz) { case 1: println(" mov %s, %d(%%rbp)", argreg8[r], offset); return; case 2: println(" mov %s, %d(%%rbp)", argreg16[r], offset); return; case 4: println(" mov %s, %d(%%rbp)", argreg32[r], offset); return; case 8: println(" mov %s, %d(%%rbp)", argreg64[r], offset); return; default: for (int i = 0; i < sz; i++) { println(" mov %s, %d(%%rbp)", argreg8[r], offset + i); println(" shr $8, %s", argreg64[r]); } return; } } static void emit_text(Obj *prog) { for (Obj *fn = prog; fn; fn = fn->next) { if (!fn->is_function || !fn->is_definition) continue; // No code is emitted for "static inline" functions // if no one is referencing them. if (!fn->is_live) continue; if (fn->is_static) println(" .local %s", fn->name); else println(" .globl %s", fn->name); println(" .text"); println(" .type %s, @function", fn->name); println("%s:", fn->name); current_fn = fn; // Prologue println(" push %%rbp"); println(" mov %%rsp, %%rbp"); println(" sub $%d, %%rsp", fn->stack_size); println(" mov %%rsp, %d(%%rbp)", fn->alloca_bottom->offset); // Save arg registers if function is variadic if (fn->va_area) { int gp = 0, fp = 0; for (Obj *var = fn->params; var; var = var->next) { if (is_flonum(var->ty)) fp++; else gp++; } int off = fn->va_area->offset; // va_elem println(" movl $%d, %d(%%rbp)", gp * 8, off); // gp_offset println(" movl $%d, %d(%%rbp)", fp * 8 + 48, off + 4); // fp_offset println(" movq %%rbp, %d(%%rbp)", off + 8); // overflow_arg_area println(" addq $16, %d(%%rbp)", off + 8); println(" movq %%rbp, %d(%%rbp)", off + 16); // reg_save_area println(" addq $%d, %d(%%rbp)", off + 24, off + 16); // __reg_save_area__ println(" movq %%rdi, %d(%%rbp)", off + 24); println(" movq %%rsi, %d(%%rbp)", off + 32); println(" movq %%rdx, %d(%%rbp)", off + 40); println(" movq %%rcx, %d(%%rbp)", off + 48); println(" movq %%r8, %d(%%rbp)", off + 56); println(" movq %%r9, %d(%%rbp)", off + 64); println(" movsd %%xmm0, %d(%%rbp)", off + 72); println(" movsd %%xmm1, %d(%%rbp)", off + 80); println(" movsd %%xmm2, %d(%%rbp)", off + 88); println(" movsd %%xmm3, %d(%%rbp)", off + 96); println(" movsd %%xmm4, %d(%%rbp)", off + 104); println(" movsd %%xmm5, %d(%%rbp)", off + 112); println(" movsd %%xmm6, %d(%%rbp)", off + 120); println(" movsd %%xmm7, %d(%%rbp)", off + 128); } // Save passed-by-register arguments to the stack int gp = 0, fp = 0; for (Obj *var = fn->params; var; var = var->next) { if (var->offset > 0) continue; Type *ty = var->ty; switch (ty->kind) { case TY_STRUCT: case TY_UNION: assert(ty->size <= 16); if (has_flonum(ty, 0, 8, 0)) store_fp(fp++, var->offset, MIN(8, ty->size)); else store_gp(gp++, var->offset, MIN(8, ty->size)); if (ty->size > 8) { if (has_flonum(ty, 8, 16, 0)) store_fp(fp++, var->offset + 8, ty->size - 8); else store_gp(gp++, var->offset + 8, ty->size - 8); } break; case TY_FLOAT: case TY_DOUBLE: store_fp(fp++, var->offset, ty->size); break; default: store_gp(gp++, var->offset, ty->size); } } // Emit code gen_stmt(fn->body); assert(depth == 0); // [https://www.sigbus.info/n1570#5.1.2.2.3p1] The C spec defines // a special rule for the main function. Reaching the end of the // main function is equivalent to returning 0, even though the // behavior is undefined for the other functions. if (strcmp(fn->name, "main") == 0) println(" mov $0, %%rax"); // Epilogue println(".L.return.%s:", fn->name); println(" mov %%rbp, %%rsp"); println(" pop %%rbp"); println(" ret"); } } void codegen(Obj *prog, FILE *out) { output_file = out; File **files = get_input_files(); for (int i = 0; files[i]; i++) println(" .file %d \"%s\"", files[i]->file_no, files[i]->name); assign_lvar_offsets(prog); emit_data(prog); emit_text(prog); } // This is an implementation of the open-addressing hash table. // BEG hashmap // Initial hash bucket size #define INIT_SIZE 16 // Rehash if the usage exceeds 70%. #define HIGH_WATERMARK 70 // We'll keep the usage below 50% after rehashing. #define LOW_WATERMARK 50 // Represents a deleted hash entry #define TOMBSTONE ((void *)-1) static uint64_t fnv_hash(char *s, int len) { uint64_t hash = 0xcbf29ce484222325; for (int i = 0; i < len; i++) { hash *= 0x100000001b3; hash ^= (unsigned char)s[i]; } return hash; } // Make room for new entires in a given hashmap by removing // tombstones and possibly extending the bucket size. static void rehash(HashMap *map) { // Compute the size of the new hashmap. int nkeys = 0; for (int i = 0; i < map->capacity; i++) if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE) nkeys++; int cap = map->capacity; while ((nkeys * 100) / cap >= LOW_WATERMARK) cap = cap * 2; assert(cap > 0); // Create a new hashmap and copy all key-values. HashMap map2 = {}; map2.buckets = calloc(cap, sizeof(HashEntry)); map2.capacity = cap; for (int i = 0; i < map->capacity; i++) { HashEntry *ent = &map->buckets[i]; if (ent->key && ent->key != TOMBSTONE) hashmap_put2(&map2, ent->key, ent->keylen, ent->val); } assert(map2.used == nkeys); *map = map2; } static bool match(HashEntry *ent, char *key, int keylen) { return ent->key && ent->key != TOMBSTONE && ent->keylen == keylen && memcmp(ent->key, key, keylen) == 0; } static HashEntry *get_entry(HashMap *map, char *key, int keylen) { if (!map->buckets) return NULL; uint64_t hash = fnv_hash(key, keylen); for (int i = 0; i < map->capacity; i++) { HashEntry *ent = &map->buckets[(hash + i) % map->capacity]; if (match(ent, key, keylen)) return ent; if (ent->key == NULL) return NULL; } unreachable(); } static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) { if (!map->buckets) { map->buckets = calloc(INIT_SIZE, sizeof(HashEntry)); map->capacity = INIT_SIZE; } else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) { rehash(map); } uint64_t hash = fnv_hash(key, keylen); for (int i = 0; i < map->capacity; i++) { HashEntry *ent = &map->buckets[(hash + i) % map->capacity]; if (match(ent, key, keylen)) return ent; if (ent->key == TOMBSTONE) { ent->key = key; ent->keylen = keylen; return ent; } if (ent->key == NULL) { ent->key = key; ent->keylen = keylen; map->used++; return ent; } } unreachable(); } void *hashmap_get(HashMap *map, char *key) { return hashmap_get2(map, key, strlen(key)); } void *hashmap_get2(HashMap *map, char *key, int keylen) { HashEntry *ent = get_entry(map, key, keylen); return ent ? ent->val : NULL; } void hashmap_put(HashMap *map, char *key, void *val) { hashmap_put2(map, key, strlen(key), val); } void hashmap_put2(HashMap *map, char *key, int keylen, void *val) { HashEntry *ent = get_or_insert_entry(map, key, keylen); ent->val = val; } void hashmap_delete(HashMap *map, char *key) { hashmap_delete2(map, key, strlen(key)); } void hashmap_delete2(HashMap *map, char *key, int keylen) { HashEntry *ent = get_entry(map, key, keylen); if (ent) ent->key = TOMBSTONE; } void hashmap_test(void) { HashMap *map = calloc(1, sizeof(HashMap)); for (int i = 0; i < 5000; i++) hashmap_put(map, format("key %d", i), (void *)(size_t)i); for (int i = 1000; i < 2000; i++) hashmap_delete(map, format("key %d", i)); for (int i = 1500; i < 1600; i++) hashmap_put(map, format("key %d", i), (void *)(size_t)i); for (int i = 6000; i < 7000; i++) hashmap_put(map, format("key %d", i), (void *)(size_t)i); for (int i = 0; i < 1000; i++) assert((size_t)hashmap_get(map, format("key %d", i)) == i); for (int i = 1000; i < 1500; i++) assert(hashmap_get(map, "no such key") == NULL); for (int i = 1500; i < 1600; i++) assert((size_t)hashmap_get(map, format("key %d", i)) == i); for (int i = 1600; i < 2000; i++) assert(hashmap_get(map, "no such key") == NULL); for (int i = 2000; i < 5000; i++) assert((size_t)hashmap_get(map, format("key %d", i)) == i); for (int i = 5000; i < 6000; i++) assert(hashmap_get(map, "no such key") == NULL); for (int i = 6000; i < 7000; i++) hashmap_put(map, format("key %d", i), (void *)(size_t)i); assert(hashmap_get(map, "no such key") == NULL); printf("OK\n"); } // This file contains a recursive descent parser for C. // // Most functions in this file are named after the symbols they are // supposed to read from an input token list. For example, stmt() is // responsible for reading a statement from a token list. The function // then construct an AST node representing a statement. // // Each function conceptually returns two values, an AST node and // remaining part of the input tokens. Since C doesn't support // multiple return values, the remaining tokens are returned to the // caller via a pointer argument. // // Input tokens are represented by a linked list. Unlike many recursive // descent parsers, we don't have the notion of the "input token stream". // Most parsing functions don't change the global state of the parser. // So it is very easy to lookahead arbitrary number of tokens in this // parser. // BEG parse // Scope for local variables, global variables, typedefs // or enum constants typedef struct { Obj *var; Type *type_def; Type *enum_ty; int enum_val; } VarScope; // Represents a block scope. typedef struct Scope Scope; struct Scope { Scope *next; // C has two block scopes; one is for variables/typedefs and // the other is for struct/union/enum tags. HashMap vars; HashMap tags; }; // Variable attributes such as typedef or extern. typedef struct { bool is_typedef; bool is_static; bool is_extern; bool is_inline; bool is_tls; int align; } VarAttr; // This struct represents a variable initializer. Since initializers // can be nested (e.g. `int x[2][2] = {{1, 2}, {3, 4}}`), this struct // is a tree data structure. typedef struct Initializer Initializer; struct Initializer { Initializer *next; Type *ty; Token *tok; bool is_flexible; // If it's not an aggregate type and has an initializer, // `expr` has an initialization expression. Node *expr; // If it's an initializer for an aggregate type (e.g. array or struct), // `children` has initializers for its children. Initializer **children; // Only one member can be initialized for a union. // `mem` is used to clarify which member is initialized. Member *mem; }; // For local variable initializer. typedef struct InitDesg InitDesg; struct InitDesg { InitDesg *next; int idx; Member *member; Obj *var; }; // All local variable instances created during parsing are // accumulated to this list. static Obj *locals; // Likewise, global variables are accumulated to this list. static Obj *globals; static Scope *scope = &(Scope){}; // Points to the function object the parser is currently parsing. static Obj *current_fn; // Lists of all goto statements and labels in the curent function. static Node *gotos; static Node *labels; // Current "goto" and "continue" jump targets. static char *brk_label; static char *cont_label; // Points to a node representing a switch if we are parsing // a switch statement. Otherwise, NULL. static Node *current_switch; static Obj *builtin_alloca; static bool is_typename(Token *tok); static Type *declspec(Token **rest, Token *tok, VarAttr *attr); static Type *typename(Token **rest, Token *tok); static Type *enum_specifier(Token **rest, Token *tok); static Type *typeof_specifier(Token **rest, Token *tok); static Type *type_suffix(Token **rest, Token *tok, Type *ty); static Type *declarator(Token **rest, Token *tok, Type *ty); static Node *declaration(Token **rest, Token *tok, Type *basety, VarAttr *attr); static void array_initializer2(Token **rest, Token *tok, Initializer *init, int i); static void struct_initializer2(Token **rest, Token *tok, Initializer *init, Member *mem); static void initializer2(Token **rest, Token *tok, Initializer *init); static Initializer *initializer(Token **rest, Token *tok, Type *ty, Type **new_ty); static Node *lvar_initializer(Token **rest, Token *tok, Obj *var); static void gvar_initializer(Token **rest, Token *tok, Obj *var); static Node *compound_stmt(Token **rest, Token *tok); static Node *stmt(Token **rest, Token *tok); static Node *expr_stmt(Token **rest, Token *tok); static Node *expr(Token **rest, Token *tok); static int64_t eval(Node *node); static int64_t eval2(Node *node, char ***label); static int64_t eval_rval(Node *node, char ***label); static bool is_const_expr(Node *node); static Node *assign(Token **rest, Token *tok); static Node *logor(Token **rest, Token *tok); static double eval_double(Node *node); static Node *conditional(Token **rest, Token *tok); static Node *logand(Token **rest, Token *tok); static Node *bitor(Token **rest, Token *tok); static Node *bitxor(Token **rest, Token *tok); static Node *bitand(Token **rest, Token *tok); static Node *equality(Token **rest, Token *tok); static Node *relational(Token **rest, Token *tok); static Node *shift(Token **rest, Token *tok); static Node *add(Token **rest, Token *tok); static Node *new_add(Node *lhs, Node *rhs, Token *tok); static Node *new_sub(Node *lhs, Node *rhs, Token *tok); static Node *mul(Token **rest, Token *tok); static Node *cast(Token **rest, Token *tok); static Member *get_struct_member(Type *ty, Token *tok); static Type *struct_decl(Token **rest, Token *tok); static Type *union_decl(Token **rest, Token *tok); static Node *postfix(Token **rest, Token *tok); static Node *funcall(Token **rest, Token *tok, Node *node); static Node *unary(Token **rest, Token *tok); static Node *primary(Token **rest, Token *tok); static Token *parse_typedef(Token *tok, Type *basety); static bool is_function(Token *tok); static Token *function(Token *tok, Type *basety, VarAttr *attr); static Token *global_variable(Token *tok, Type *basety, VarAttr *attr); static int align_down(int n, int align) { return align_to(n - align + 1, align); } static void enter_scope(void) { Scope *sc = calloc(1, sizeof(Scope)); sc->next = scope; scope = sc; } static void leave_scope(void) { scope = scope->next; } // Find a variable by name. static VarScope *find_var(Token *tok) { for (Scope *sc = scope; sc; sc = sc->next) { VarScope *sc2 = hashmap_get2(&sc->vars, tok->loc, tok->len); if (sc2) return sc2; } return NULL; } static Type *find_tag(Token *tok) { for (Scope *sc = scope; sc; sc = sc->next) { Type *ty = hashmap_get2(&sc->tags, tok->loc, tok->len); if (ty) return ty; } return NULL; } static Node *new_node(NodeKind kind, Token *tok) { Node *node = calloc(1, sizeof(Node)); node->kind = kind; node->tok = tok; return node; } static Node *new_binary(NodeKind kind, Node *lhs, Node *rhs, Token *tok) { Node *node = new_node(kind, tok); node->lhs = lhs; node->rhs = rhs; return node; } static Node *new_unary(NodeKind kind, Node *expr, Token *tok) { Node *node = new_node(kind, tok); node->lhs = expr; return node; } static Node *new_num(int64_t val, Token *tok) { Node *node = new_node(ND_NUM, tok); node->val = val; return node; } static Node *new_long(int64_t val, Token *tok) { Node *node = new_node(ND_NUM, tok); node->val = val; node->ty = ty_long; return node; } static Node *new_ulong(long val, Token *tok) { Node *node = new_node(ND_NUM, tok); node->val = val; node->ty = ty_ulong; return node; } static Node *new_var_node(Obj *var, Token *tok) { Node *node = new_node(ND_VAR, tok); node->var = var; return node; } static Node *new_vla_ptr(Obj *var, Token *tok) { Node *node = new_node(ND_VLA_PTR, tok); node->var = var; return node; } Node *new_cast(Node *expr, Type *ty) { add_type(expr); Node *node = calloc(1, sizeof(Node)); node->kind = ND_CAST; node->tok = expr->tok; node->lhs = expr; node->ty = copy_type(ty); return node; } static VarScope *push_scope(char *name) { VarScope *sc = calloc(1, sizeof(VarScope)); hashmap_put(&scope->vars, name, sc); return sc; } static Initializer *new_initializer(Type *ty, bool is_flexible) { Initializer *init = calloc(1, sizeof(Initializer)); init->ty = ty; if (ty->kind == TY_ARRAY) { if (is_flexible && ty->size < 0) { init->is_flexible = true; return init; } init->children = calloc(ty->array_len, sizeof(Initializer *)); for (int i = 0; i < ty->array_len; i++) init->children[i] = new_initializer(ty->base, false); return init; } if (ty->kind == TY_STRUCT || ty->kind == TY_UNION) { // Count the number of struct members. int len = 0; for (Member *mem = ty->members; mem; mem = mem->next) len++; init->children = calloc(len, sizeof(Initializer *)); for (Member *mem = ty->members; mem; mem = mem->next) { if (is_flexible && ty->is_flexible && !mem->next) { Initializer *child = calloc(1, sizeof(Initializer)); child->ty = mem->ty; child->is_flexible = true; init->children[mem->idx] = child; } else { init->children[mem->idx] = new_initializer(mem->ty, false); } } return init; } return init; } static Obj *new_var(char *name, Type *ty) { Obj *var = calloc(1, sizeof(Obj)); var->name = name; var->ty = ty; var->align = ty->align; push_scope(name)->var = var; return var; } static Obj *new_lvar(char *name, Type *ty) { Obj *var = new_var(name, ty); var->is_local = true; var->next = locals; locals = var; return var; } static Obj *new_gvar(char *name, Type *ty) { Obj *var = new_var(name, ty); var->next = globals; var->is_static = true; var->is_definition = true; globals = var; return var; } static char *new_unique_name(void) { static int id = 0; return format(".L..%d", id++); } static Obj *new_anon_gvar(Type *ty) { return new_gvar(new_unique_name(), ty); } static Obj *new_string_literal(char *p, Type *ty) { Obj *var = new_anon_gvar(ty); var->init_data = p; return var; } static char *get_ident(Token *tok) { if (tok->kind != TK_IDENT) error_tok(tok, "expected an identifier"); return strndup(tok->loc, tok->len); } static Type *find_typedef(Token *tok) { if (tok->kind == TK_IDENT) { VarScope *sc = find_var(tok); if (sc) return sc->type_def; } return NULL; } static void push_tag_scope(Token *tok, Type *ty) { hashmap_put2(&scope->tags, tok->loc, tok->len, ty); } // declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long" // | "typedef" | "static" | "extern" | "inline" // | "_Thread_local" | "__thread" // | "signed" | "unsigned" // | struct-decl | union-decl | typedef-name // | enum-specifier | typeof-specifier // | "const" | "volatile" | "auto" | "register" | "restrict" // | "__restrict" | "__restrict__" | "_Noreturn")+ // // The order of typenames in a type-specifier doesn't matter. For // example, `int long static` means the same as `static long int`. // That can also be written as `static long` because you can omit // `int` if `long` or `short` are specified. However, something like // `char int` is not a valid type specifier. We have to accept only a // limited combinations of the typenames. // // In this function, we count the number of occurrences of each typename // while keeping the "current" type object that the typenames up // until that point represent. When we reach a non-typename token, // we returns the current type object. static Type *declspec(Token **rest, Token *tok, VarAttr *attr) { // We use a single integer as counters for all typenames. // For example, bits 0 and 1 represents how many times we saw the // keyword "void" so far. With this, we can use a switch statement // as you can see below. enum { VOID = 1 << 0, BOOL = 1 << 2, CHAR = 1 << 4, SHORT = 1 << 6, INT = 1 << 8, LONG = 1 << 10, FLOAT = 1 << 12, DOUBLE = 1 << 14, OTHER = 1 << 16, SIGNED = 1 << 17, UNSIGNED = 1 << 18, }; Type *ty = ty_int; int counter = 0; bool is_atomic = false; while (is_typename(tok)) { // Handle storage class specifiers. if (equal(tok, "typedef") || equal(tok, "static") || equal(tok, "extern") || equal(tok, "inline") || equal(tok, "_Thread_local") || equal(tok, "__thread")) { if (!attr) error_tok(tok, "storage class specifier is not allowed in this context"); if (equal(tok, "typedef")) attr->is_typedef = true; else if (equal(tok, "static")) attr->is_static = true; else if (equal(tok, "extern")) attr->is_extern = true; else if (equal(tok, "inline")) attr->is_inline = true; else attr->is_tls = true; if (attr->is_typedef && attr->is_static + attr->is_extern + attr->is_inline + attr->is_tls > 1) error_tok(tok, "typedef may not be used together with static," " extern, inline, __thread or _Thread_local"); tok = tok->next; continue; } // These keywords are recognized but ignored. if (consume(&tok, tok, "const") || consume(&tok, tok, "volatile") || consume(&tok, tok, "auto") || consume(&tok, tok, "register") || consume(&tok, tok, "restrict") || consume(&tok, tok, "__restrict") || consume(&tok, tok, "__restrict__") || consume(&tok, tok, "_Noreturn")) continue; if (equal(tok, "_Atomic")) { tok = tok->next; if (equal(tok , "(")) { ty = typename(&tok, tok->next); tok = skip(tok, ")"); } is_atomic = true; continue; } if (equal(tok, "_Alignas")) { if (!attr) error_tok(tok, "_Alignas is not allowed in this context"); tok = skip(tok->next, "("); if (is_typename(tok)) attr->align = typename(&tok, tok)->align; else attr->align = const_expr(&tok, tok); tok = skip(tok, ")"); continue; } // Handle user-defined types. Type *ty2 = find_typedef(tok); if (equal(tok, "struct") || equal(tok, "union") || equal(tok, "enum") || equal(tok, "typeof") || ty2) { if (counter) break; if (equal(tok, "struct")) { ty = struct_decl(&tok, tok->next); } else if (equal(tok, "union")) { ty = union_decl(&tok, tok->next); } else if (equal(tok, "enum")) { ty = enum_specifier(&tok, tok->next); } else if (equal(tok, "typeof")) { ty = typeof_specifier(&tok, tok->next); } else { ty = ty2; tok = tok->next; } counter += OTHER; continue; } // Handle built-in types. if (equal(tok, "void")) counter += VOID; else if (equal(tok, "_Bool")) counter += BOOL; else if (equal(tok, "char")) counter += CHAR; else if (equal(tok, "short")) counter += SHORT; else if (equal(tok, "int")) counter += INT; else if (equal(tok, "long")) counter += LONG; else if (equal(tok, "float")) counter += FLOAT; else if (equal(tok, "double")) counter += DOUBLE; else if (equal(tok, "signed")) counter |= SIGNED; else if (equal(tok, "unsigned")) counter |= UNSIGNED; else unreachable(); switch (counter) { case VOID: ty = ty_void; break; case BOOL: ty = ty_bool; break; case CHAR: case SIGNED + CHAR: ty = ty_char; break; case UNSIGNED + CHAR: ty = ty_uchar; break; case SHORT: case SHORT + INT: case SIGNED + SHORT: case SIGNED + SHORT + INT: ty = ty_short; break; case UNSIGNED + SHORT: case UNSIGNED + SHORT + INT: ty = ty_ushort; break; case INT: case SIGNED: case SIGNED + INT: ty = ty_int; break; case UNSIGNED: case UNSIGNED + INT: ty = ty_uint; break; case LONG: case LONG + INT: case LONG + LONG: case LONG + LONG + INT: case SIGNED + LONG: case SIGNED + LONG + INT: case SIGNED + LONG + LONG: case SIGNED + LONG + LONG + INT: ty = ty_long; break; case UNSIGNED + LONG: case UNSIGNED + LONG + INT: case UNSIGNED + LONG + LONG: case UNSIGNED + LONG + LONG + INT: ty = ty_ulong; break; case FLOAT: ty = ty_float; break; case DOUBLE: ty = ty_double; break; case LONG + DOUBLE: ty = ty_ldouble; break; default: error_tok(tok, "invalid type"); } tok = tok->next; } if (is_atomic) { ty = copy_type(ty); ty->is_atomic = true; } *rest = tok; return ty; } // func-params = ("void" | param ("," param)* ("," "...")?)? ")" // param = declspec declarator static Type *func_params(Token **rest, Token *tok, Type *ty) { if (equal(tok, "void") && equal(tok->next, ")")) { *rest = tok->next->next; return func_type(ty); } Type head = {}; Type *cur = &head; bool is_variadic = false; while (!equal(tok, ")")) { if (cur != &head) tok = skip(tok, ","); if (equal(tok, "...")) { is_variadic = true; tok = tok->next; skip(tok, ")"); break; } Type *ty2 = declspec(&tok, tok, NULL); ty2 = declarator(&tok, tok, ty2); Token *name = ty2->name; if (ty2->kind == TY_ARRAY) { // "array of T" is converted to "pointer to T" only in the parameter // context. For example, *argv[] is converted to **argv by this. ty2 = pointer_to(ty2->base); ty2->name = name; } else if (ty2->kind == TY_FUNC) { // Likewise, a function is converted to a pointer to a function // only in the parameter context. ty2 = pointer_to(ty2); ty2->name = name; } cur = cur->next = copy_type(ty2); } if (cur == &head) is_variadic = true; ty = func_type(ty); ty->params = head.next; ty->is_variadic = is_variadic; *rest = tok->next; return ty; } // array-dimensions = ("static" | "restrict")* const-expr? "]" type-suffix static Type *array_dimensions(Token **rest, Token *tok, Type *ty) { while (equal(tok, "static") || equal(tok, "restrict")) tok = tok->next; if (equal(tok, "]")) { ty = type_suffix(rest, tok->next, ty); return array_of(ty, -1); } Node *expr = conditional(&tok, tok); tok = skip(tok, "]"); ty = type_suffix(rest, tok, ty); if (ty->kind == TY_VLA || !is_const_expr(expr)) return vla_of(ty, expr); return array_of(ty, eval(expr)); } // type-suffix = "(" func-params // | "[" array-dimensions // | ε static Type *type_suffix(Token **rest, Token *tok, Type *ty) { if (equal(tok, "(")) return func_params(rest, tok->next, ty); if (equal(tok, "[")) return array_dimensions(rest, tok->next, ty); *rest = tok; return ty; } // pointers = ("*" ("const" | "volatile" | "restrict")*)* static Type *pointers(Token **rest, Token *tok, Type *ty) { while (consume(&tok, tok, "*")) { ty = pointer_to(ty); while (equal(tok, "const") || equal(tok, "volatile") || equal(tok, "restrict") || equal(tok, "__restrict") || equal(tok, "__restrict__")) tok = tok->next; } *rest = tok; return ty; } // declarator = pointers ("(" ident ")" | "(" declarator ")" | ident) type-suffix static Type *declarator(Token **rest, Token *tok, Type *ty) { ty = pointers(&tok, tok, ty); if (equal(tok, "(")) { Token *start = tok; Type dummy = {}; declarator(&tok, start->next, &dummy); tok = skip(tok, ")"); ty = type_suffix(rest, tok, ty); return declarator(&tok, start->next, ty); } Token *name = NULL; Token *name_pos = tok; if (tok->kind == TK_IDENT) { name = tok; tok = tok->next; } ty = type_suffix(rest, tok, ty); ty->name = name; ty->name_pos = name_pos; return ty; } // abstract-declarator = pointers ("(" abstract-declarator ")")? type-suffix static Type *abstract_declarator(Token **rest, Token *tok, Type *ty) { ty = pointers(&tok, tok, ty); if (equal(tok, "(")) { Token *start = tok; Type dummy = {}; abstract_declarator(&tok, start->next, &dummy); tok = skip(tok, ")"); ty = type_suffix(rest, tok, ty); return abstract_declarator(&tok, start->next, ty); } return type_suffix(rest, tok, ty); } // type-name = declspec abstract-declarator static Type *typename(Token **rest, Token *tok) { Type *ty = declspec(&tok, tok, NULL); return abstract_declarator(rest, tok, ty); } static bool is_end(Token *tok) { return equal(tok, "}") || (equal(tok, ",") && equal(tok->next, "}")); } static bool consume_end(Token **rest, Token *tok) { if (equal(tok, "}")) { *rest = tok->next; return true; } if (equal(tok, ",") && equal(tok->next, "}")) { *rest = tok->next->next; return true; } return false; } // enum-specifier = ident? "{" enum-list? "}" // | ident ("{" enum-list? "}")? // // enum-list = ident ("=" num)? ("," ident ("=" num)?)* ","? static Type *enum_specifier(Token **rest, Token *tok) { Type *ty = enum_type(); // Read a struct tag. Token *tag = NULL; if (tok->kind == TK_IDENT) { tag = tok; tok = tok->next; } if (tag && !equal(tok, "{")) { Type *ty = find_tag(tag); if (!ty) error_tok(tag, "unknown enum type"); if (ty->kind != TY_ENUM) error_tok(tag, "not an enum tag"); *rest = tok; return ty; } tok = skip(tok, "{"); // Read an enum-list. int i = 0; int val = 0; while (!consume_end(rest, tok)) { if (i++ > 0) tok = skip(tok, ","); char *name = get_ident(tok); tok = tok->next; if (equal(tok, "=")) val = const_expr(&tok, tok->next); VarScope *sc = push_scope(name); sc->enum_ty = ty; sc->enum_val = val++; } if (tag) push_tag_scope(tag, ty); return ty; } // typeof-specifier = "(" (expr | typename) ")" static Type *typeof_specifier(Token **rest, Token *tok) { tok = skip(tok, "("); Type *ty; if (is_typename(tok)) { ty = typename(&tok, tok); } else { Node *node = expr(&tok, tok); add_type(node); ty = node->ty; } *rest = skip(tok, ")"); return ty; } // Generate code for computing a VLA size. static Node *compute_vla_size(Type *ty, Token *tok) { Node *node = new_node(ND_NULL_EXPR, tok); if (ty->base) node = new_binary(ND_COMMA, node, compute_vla_size(ty->base, tok), tok); if (ty->kind != TY_VLA) return node; Node *base_sz; if (ty->base->kind == TY_VLA) base_sz = new_var_node(ty->base->vla_size, tok); else base_sz = new_num(ty->base->size, tok); ty->vla_size = new_lvar("", ty_ulong); Node *expr = new_binary(ND_ASSIGN, new_var_node(ty->vla_size, tok), new_binary(ND_MUL, ty->vla_len, base_sz, tok), tok); return new_binary(ND_COMMA, node, expr, tok); } static Node *new_alloca(Node *sz) { Node *node = new_unary(ND_FUNCALL, new_var_node(builtin_alloca, sz->tok), sz->tok); node->func_ty = builtin_alloca->ty; node->ty = builtin_alloca->ty->return_ty; node->args = sz; add_type(sz); return node; } // declaration = declspec (declarator ("=" expr)? ("," declarator ("=" expr)?)*)? ";" static Node *declaration(Token **rest, Token *tok, Type *basety, VarAttr *attr) { Node head = {}; Node *cur = &head; int i = 0; while (!equal(tok, ";")) { if (i++ > 0) tok = skip(tok, ","); Type *ty = declarator(&tok, tok, basety); if (ty->kind == TY_VOID) error_tok(tok, "variable declared void"); if (!ty->name) error_tok(ty->name_pos, "variable name omitted"); if (attr && attr->is_static) { // static local variable Obj *var = new_anon_gvar(ty); push_scope(get_ident(ty->name))->var = var; if (equal(tok, "=")) gvar_initializer(&tok, tok->next, var); continue; } // Generate code for computing a VLA size. We need to do this // even if ty is not VLA because ty may be a pointer to VLA // (e.g. int (*foo)[n][m] where n and m are variables.) cur = cur->next = new_unary(ND_EXPR_STMT, compute_vla_size(ty, tok), tok); if (ty->kind == TY_VLA) { if (equal(tok, "=")) error_tok(tok, "variable-sized object may not be initialized"); // Variable length arrays (VLAs) are translated to alloca() calls. // For example, `int x[n+2]` is translated to `tmp = n + 2, // x = alloca(tmp)`. Obj *var = new_lvar(get_ident(ty->name), ty); Token *tok = ty->name; Node *expr = new_binary(ND_ASSIGN, new_vla_ptr(var, tok), new_alloca(new_var_node(ty->vla_size, tok)), tok); cur = cur->next = new_unary(ND_EXPR_STMT, expr, tok); continue; } Obj *var = new_lvar(get_ident(ty->name), ty); if (attr && attr->align) var->align = attr->align; if (equal(tok, "=")) { Node *expr = lvar_initializer(&tok, tok->next, var); cur = cur->next = new_unary(ND_EXPR_STMT, expr, tok); } if (var->ty->size < 0) error_tok(ty->name, "variable has incomplete type"); if (var->ty->kind == TY_VOID) error_tok(ty->name, "variable declared void"); } Node *node = new_node(ND_BLOCK, tok); node->body = head.next; *rest = tok->next; return node; } static Token *skip_excess_element(Token *tok) { if (equal(tok, "{")) { tok = skip_excess_element(tok->next); return skip(tok, "}"); } assign(&tok, tok); return tok; } // string-initializer = string-literal static void string_initializer(Token **rest, Token *tok, Initializer *init) { if (init->is_flexible) *init = *new_initializer(array_of(init->ty->base, tok->ty->array_len), false); int len = MIN(init->ty->array_len, tok->ty->array_len); switch (init->ty->base->size) { case 1: { char *str = tok->str; for (int i = 0; i < len; i++) init->children[i]->expr = new_num(str[i], tok); break; } case 2: { uint16_t *str = (uint16_t *)tok->str; for (int i = 0; i < len; i++) init->children[i]->expr = new_num(str[i], tok); break; } case 4: { uint32_t *str = (uint32_t *)tok->str; for (int i = 0; i < len; i++) init->children[i]->expr = new_num(str[i], tok); break; } default: unreachable(); } *rest = tok->next; } // array-designator = "[" const-expr "]" // // C99 added the designated initializer to the language, which allows // programmers to move the "cursor" of an initializer to any element. // The syntax looks like this: // // int x[10] = { 1, 2, [5]=3, 4, 5, 6, 7 }; // // `[5]` moves the cursor to the 5th element, so the 5th element of x // is set to 3. Initialization then continues forward in order, so // 6th, 7th, 8th and 9th elements are initialized with 4, 5, 6 and 7, // respectively. Unspecified elements (in this case, 3rd and 4th // elements) are initialized with zero. // // Nesting is allowed, so the following initializer is valid: // // int x[5][10] = { [5][8]=1, 2, 3 }; // // It sets x[5][8], x[5][9] and x[6][0] to 1, 2 and 3, respectively. // // Use `.fieldname` to move the cursor for a struct initializer. E.g. // // struct { int a, b, c; } x = { .c=5 }; // // The above initializer sets x.c to 5. static void array_designator(Token **rest, Token *tok, Type *ty, int *begin, int *end) { *begin = const_expr(&tok, tok->next); if (*begin >= ty->array_len) error_tok(tok, "array designator index exceeds array bounds"); if (equal(tok, "...")) { *end = const_expr(&tok, tok->next); if (*end >= ty->array_len) error_tok(tok, "array designator index exceeds array bounds"); if (*end < *begin) error_tok(tok, "array designator range [%d, %d] is empty", *begin, *end); } else { *end = *begin; } *rest = skip(tok, "]"); } // struct-designator = "." ident static Member *struct_designator(Token **rest, Token *tok, Type *ty) { Token *start = tok; tok = skip(tok, "."); if (tok->kind != TK_IDENT) error_tok(tok, "expected a field designator"); for (Member *mem = ty->members; mem; mem = mem->next) { // Anonymous struct member if (mem->ty->kind == TY_STRUCT && !mem->name) { if (get_struct_member(mem->ty, tok)) { *rest = start; return mem; } continue; } // Regular struct member if (mem->name->len == tok->len && !strncmp(mem->name->loc, tok->loc, tok->len)) { *rest = tok->next; return mem; } } error_tok(tok, "struct has no such member"); } // designation = ("[" const-expr "]" | "." ident)* "="? initializer static void designation(Token **rest, Token *tok, Initializer *init) { if (equal(tok, "[")) { if (init->ty->kind != TY_ARRAY) error_tok(tok, "array index in non-array initializer"); int begin, end; array_designator(&tok, tok, init->ty, &begin, &end); Token *tok2; for (int i = begin; i <= end; i++) designation(&tok2, tok, init->children[i]); array_initializer2(rest, tok2, init, begin + 1); return; } if (equal(tok, ".") && init->ty->kind == TY_STRUCT) { Member *mem = struct_designator(&tok, tok, init->ty); designation(&tok, tok, init->children[mem->idx]); init->expr = NULL; struct_initializer2(rest, tok, init, mem->next); return; } if (equal(tok, ".") && init->ty->kind == TY_UNION) { Member *mem = struct_designator(&tok, tok, init->ty); init->mem = mem; designation(rest, tok, init->children[mem->idx]); return; } if (equal(tok, ".")) error_tok(tok, "field name not in struct or union initializer"); if (equal(tok, "=")) tok = tok->next; initializer2(rest, tok, init); } // An array length can be omitted if an array has an initializer // (e.g. `int x[] = {1,2,3}`). If it's omitted, count the number // of initializer elements. static int count_array_init_elements(Token *tok, Type *ty) { bool first = true; Initializer *dummy = new_initializer(ty->base, true); int i = 0, max = 0; while (!consume_end(&tok, tok)) { if (!first) tok = skip(tok, ","); first = false; if (equal(tok, "[")) { i = const_expr(&tok, tok->next); if (equal(tok, "...")) i = const_expr(&tok, tok->next); tok = skip(tok, "]"); designation(&tok, tok, dummy); } else { initializer2(&tok, tok, dummy); } i++; max = MAX(max, i); } return max; } // array-initializer1 = "{" initializer ("," initializer)* ","? "}" static void array_initializer1(Token **rest, Token *tok, Initializer *init) { tok = skip(tok, "{"); if (init->is_flexible) { int len = count_array_init_elements(tok, init->ty); *init = *new_initializer(array_of(init->ty->base, len), false); } bool first = true; if (init->is_flexible) { int len = count_array_init_elements(tok, init->ty); *init = *new_initializer(array_of(init->ty->base, len), false); } for (int i = 0; !consume_end(rest, tok); i++) { if (!first) tok = skip(tok, ","); first = false; if (equal(tok, "[")) { int begin, end; array_designator(&tok, tok, init->ty, &begin, &end); Token *tok2; for (int j = begin; j <= end; j++) designation(&tok2, tok, init->children[j]); tok = tok2; i = end; continue; } if (i < init->ty->array_len) initializer2(&tok, tok, init->children[i]); else tok = skip_excess_element(tok); } } // array-initializer2 = initializer ("," initializer)* static void array_initializer2(Token **rest, Token *tok, Initializer *init, int i) { if (init->is_flexible) { int len = count_array_init_elements(tok, init->ty); *init = *new_initializer(array_of(init->ty->base, len), false); } for (; i < init->ty->array_len && !is_end(tok); i++) { Token *start = tok; if (i > 0) tok = skip(tok, ","); if (equal(tok, "[") || equal(tok, ".")) { *rest = start; return; } initializer2(&tok, tok, init->children[i]); } *rest = tok; } // struct-initializer1 = "{" initializer ("," initializer)* ","? "}" static void struct_initializer1(Token **rest, Token *tok, Initializer *init) { tok = skip(tok, "{"); Member *mem = init->ty->members; bool first = true; while (!consume_end(rest, tok)) { if (!first) tok = skip(tok, ","); first = false; if (equal(tok, ".")) { mem = struct_designator(&tok, tok, init->ty); designation(&tok, tok, init->children[mem->idx]); mem = mem->next; continue; } if (mem) { initializer2(&tok, tok, init->children[mem->idx]); mem = mem->next; } else { tok = skip_excess_element(tok); } } } // struct-initializer2 = initializer ("," initializer)* static void struct_initializer2(Token **rest, Token *tok, Initializer *init, Member *mem) { bool first = true; for (; mem && !is_end(tok); mem = mem->next) { Token *start = tok; if (!first) tok = skip(tok, ","); first = false; if (equal(tok, "[") || equal(tok, ".")) { *rest = start; return; } initializer2(&tok, tok, init->children[mem->idx]); } *rest = tok; } static void union_initializer(Token **rest, Token *tok, Initializer *init) { // Unlike structs, union initializers take only one initializer, // and that initializes the first union member by default. // You can initialize other member using a designated initializer. if (equal(tok, "{") && equal(tok->next, ".")) { Member *mem = struct_designator(&tok, tok->next, init->ty); init->mem = mem; designation(&tok, tok, init->children[mem->idx]); *rest = skip(tok, "}"); return; } init->mem = init->ty->members; if (equal(tok, "{")) { initializer2(&tok, tok->next, init->children[0]); consume(&tok, tok, ","); *rest = skip(tok, "}"); } else { initializer2(rest, tok, init->children[0]); } } // initializer = string-initializer | array-initializer // | struct-initializer | union-initializer // | assign static void initializer2(Token **rest, Token *tok, Initializer *init) { if (init->ty->kind == TY_ARRAY && tok->kind == TK_STR) { string_initializer(rest, tok, init); return; } if (init->ty->kind == TY_ARRAY) { if (equal(tok, "{")) array_initializer1(rest, tok, init); else array_initializer2(rest, tok, init, 0); return; } if (init->ty->kind == TY_STRUCT) { if (equal(tok, "{")) { struct_initializer1(rest, tok, init); return; } // A struct can be initialized with another struct. E.g. // `struct T x = y;` where y is a variable of type `struct T`. // Handle that case first. Node *expr = assign(rest, tok); add_type(expr); if (expr->ty->kind == TY_STRUCT) { init->expr = expr; return; } struct_initializer2(rest, tok, init, init->ty->members); return; } if (init->ty->kind == TY_UNION) { union_initializer(rest, tok, init); return; } if (equal(tok, "{")) { // An initializer for a scalar variable can be surrounded by // braces. E.g. `int x = {3};`. Handle that case. initializer2(&tok, tok->next, init); *rest = skip(tok, "}"); return; } init->expr = assign(rest, tok); } static Type *copy_struct_type(Type *ty) { ty = copy_type(ty); Member head = {}; Member *cur = &head; for (Member *mem = ty->members; mem; mem = mem->next) { Member *m = calloc(1, sizeof(Member)); *m = *mem; cur = cur->next = m; } ty->members = head.next; return ty; } static Initializer *initializer(Token **rest, Token *tok, Type *ty, Type **new_ty) { Initializer *init = new_initializer(ty, true); initializer2(rest, tok, init); if ((ty->kind == TY_STRUCT || ty->kind == TY_UNION) && ty->is_flexible) { ty = copy_struct_type(ty); Member *mem = ty->members; while (mem->next) mem = mem->next; mem->ty = init->children[mem->idx]->ty; ty->size += mem->ty->size; *new_ty = ty; return init; } *new_ty = init->ty; return init; } static Node *init_desg_expr(InitDesg *desg, Token *tok) { if (desg->var) return new_var_node(desg->var, tok); if (desg->member) { Node *node = new_unary(ND_MEMBER, init_desg_expr(desg->next, tok), tok); node->member = desg->member; return node; } Node *lhs = init_desg_expr(desg->next, tok); Node *rhs = new_num(desg->idx, tok); return new_unary(ND_DEREF, new_add(lhs, rhs, tok), tok); } static Node *create_lvar_init(Initializer *init, Type *ty, InitDesg *desg, Token *tok) { if (ty->kind == TY_ARRAY) { Node *node = new_node(ND_NULL_EXPR, tok); for (int i = 0; i < ty->array_len; i++) { InitDesg desg2 = {desg, i}; Node *rhs = create_lvar_init(init->children[i], ty->base, &desg2, tok); node = new_binary(ND_COMMA, node, rhs, tok); } return node; } if (ty->kind == TY_STRUCT && !init->expr) { Node *node = new_node(ND_NULL_EXPR, tok); for (Member *mem = ty->members; mem; mem = mem->next) { InitDesg desg2 = {desg, 0, mem}; Node *rhs = create_lvar_init(init->children[mem->idx], mem->ty, &desg2, tok); node = new_binary(ND_COMMA, node, rhs, tok); } return node; } if (ty->kind == TY_UNION) { Member *mem = init->mem ? init->mem : ty->members; InitDesg desg2 = {desg, 0, mem}; return create_lvar_init(init->children[mem->idx], mem->ty, &desg2, tok); } if (!init->expr) return new_node(ND_NULL_EXPR, tok); Node *lhs = init_desg_expr(desg, tok); return new_binary(ND_ASSIGN, lhs, init->expr, tok); } // A variable definition with an initializer is a shorthand notation // for a variable definition followed by assignments. This function // generates assignment expressions for an initializer. For example, // `int x[2][2] = {{6, 7}, {8, 9}}` is converted to the following // expressions: // // x[0][0] = 6; // x[0][1] = 7; // x[1][0] = 8; // x[1][1] = 9; static Node *lvar_initializer(Token **rest, Token *tok, Obj *var) { Initializer *init = initializer(rest, tok, var->ty, &var->ty); InitDesg desg = {NULL, 0, NULL, var}; // If a partial initializer list is given, the standard requires // that unspecified elements are set to 0. Here, we simply // zero-initialize the entire memory region of a variable before // initializing it with user-supplied values. Node *lhs = new_node(ND_MEMZERO, tok); lhs->var = var; Node *rhs = create_lvar_init(init, var->ty, &desg, tok); return new_binary(ND_COMMA, lhs, rhs, tok); } static uint64_t read_buf(char *buf, int sz) { if (sz == 1) return *buf; if (sz == 2) return *(uint16_t *)buf; if (sz == 4) return *(uint32_t *)buf; if (sz == 8) return *(uint64_t *)buf; unreachable(); } static void write_buf(char *buf, uint64_t val, int sz) { if (sz == 1) *buf = val; else if (sz == 2) *(uint16_t *)buf = val; else if (sz == 4) *(uint32_t *)buf = val; else if (sz == 8) *(uint64_t *)buf = val; else unreachable(); } static Relocation * write_gvar_data(Relocation *cur, Initializer *init, Type *ty, char *buf, int offset) { if (ty->kind == TY_ARRAY) { int sz = ty->base->size; for (int i = 0; i < ty->array_len; i++) cur = write_gvar_data(cur, init->children[i], ty->base, buf, offset + sz * i); return cur; } if (ty->kind == TY_STRUCT) { for (Member *mem = ty->members; mem; mem = mem->next) { if (mem->is_bitfield) { Node *expr = init->children[mem->idx]->expr; if (!expr) break; char *loc = buf + offset + mem->offset; uint64_t oldval = read_buf(loc, mem->ty->size); uint64_t newval = eval(expr); uint64_t mask = (1L << mem->bit_width) - 1; uint64_t combined = oldval | ((newval & mask) << mem->bit_offset); write_buf(loc, combined, mem->ty->size); } else { cur = write_gvar_data(cur, init->children[mem->idx], mem->ty, buf, offset + mem->offset); } } return cur; } if (ty->kind == TY_UNION) { if (!init->mem) return cur; return write_gvar_data(cur, init->children[init->mem->idx], init->mem->ty, buf, offset); } if (!init->expr) return cur; if (ty->kind == TY_FLOAT) { *(float *)(buf + offset) = eval_double(init->expr); return cur; } if (ty->kind == TY_DOUBLE) { *(double *)(buf + offset) = eval_double(init->expr); return cur; } char **label = NULL; uint64_t val = eval2(init->expr, &label); if (!label) { write_buf(buf + offset, val, ty->size); return cur; } Relocation *rel = calloc(1, sizeof(Relocation)); rel->offset = offset; rel->label = label; rel->addend = val; cur->next = rel; return cur->next; } // Initializers for global variables are evaluated at compile-time and // embedded to .data section. This function serializes Initializer // objects to a flat byte array. It is a compile error if an // initializer list contains a non-constant expression. static void gvar_initializer(Token **rest, Token *tok, Obj *var) { Initializer *init = initializer(rest, tok, var->ty, &var->ty); Relocation head = {}; char *buf = calloc(1, var->ty->size); write_gvar_data(&head, init, var->ty, buf, 0); var->init_data = buf; var->rel = head.next; } // Returns true if a given token represents a type. static bool is_typename(Token *tok) { static HashMap map; if (map.capacity == 0) { static char *kw[] = { "void", "_Bool", "char", "short", "int", "long", "struct", "union", "typedef", "enum", "static", "extern", "_Alignas", "signed", "unsigned", "const", "volatile", "auto", "register", "restrict", "__restrict", "__restrict__", "_Noreturn", "float", "double", "typeof", "inline", "_Thread_local", "__thread", "_Atomic", }; for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) hashmap_put(&map, kw[i], (void *)1); } return hashmap_get2(&map, tok->loc, tok->len) || find_typedef(tok); } // asm-stmt = "asm" ("volatile" | "inline")* "(" string-literal ")" static Node *asm_stmt(Token **rest, Token *tok) { Node *node = new_node(ND_ASM, tok); tok = tok->next; while (equal(tok, "volatile") || equal(tok, "inline")) tok = tok->next; tok = skip(tok, "("); if (tok->kind != TK_STR || tok->ty->base->kind != TY_CHAR) error_tok(tok, "expected string literal"); node->asm_str = tok->str; *rest = skip(tok->next, ")"); return node; } // stmt = "return" expr? ";" // | "if" "(" expr ")" stmt ("else" stmt)? // | "switch" "(" expr ")" stmt // | "case" const-expr ("..." const-expr)? ":" stmt // | "default" ":" stmt // | "for" "(" expr-stmt expr? ";" expr? ")" stmt // | "while" "(" expr ")" stmt // | "do" stmt "while" "(" expr ")" ";" // | "asm" asm-stmt // | "goto" (ident | "*" expr) ";" // | "break" ";" // | "continue" ";" // | ident ":" stmt // | "{" compound-stmt // | expr-stmt static Node *stmt(Token **rest, Token *tok) { if (equal(tok, "return")) { Node *node = new_node(ND_RETURN, tok); if (consume(rest, tok->next, ";")) return node; Node *exp = expr(&tok, tok->next); *rest = skip(tok, ";"); add_type(exp); Type *ty = current_fn->ty->return_ty; if (ty->kind != TY_STRUCT && ty->kind != TY_UNION) exp = new_cast(exp, current_fn->ty->return_ty); node->lhs = exp; return node; } if (equal(tok, "if")) { Node *node = new_node(ND_IF, tok); tok = skip(tok->next, "("); node->cond = expr(&tok, tok); tok = skip(tok, ")"); node->then = stmt(&tok, tok); if (equal(tok, "else")) node->els = stmt(&tok, tok->next); *rest = tok; return node; } if (equal(tok, "switch")) { Node *node = new_node(ND_SWITCH, tok); tok = skip(tok->next, "("); node->cond = expr(&tok, tok); tok = skip(tok, ")"); Node *sw = current_switch; current_switch = node; char *brk = brk_label; brk_label = node->brk_label = new_unique_name(); node->then = stmt(rest, tok); current_switch = sw; brk_label = brk; return node; } if (equal(tok, "case")) { if (!current_switch) error_tok(tok, "stray case"); Node *node = new_node(ND_CASE, tok); int begin = const_expr(&tok, tok->next); int end; if (equal(tok, "...")) { // [GNU] Case ranges, e.g. "case 1 ... 5:" end = const_expr(&tok, tok->next); if (end < begin) error_tok(tok, "empty case range specified"); } else { end = begin; } tok = skip(tok, ":"); node->label = new_unique_name(); node->lhs = stmt(rest, tok); node->begin = begin; node->end = end; node->case_next = current_switch->case_next; current_switch->case_next = node; return node; } if (equal(tok, "default")) { if (!current_switch) error_tok(tok, "stray default"); Node *node = new_node(ND_CASE, tok); tok = skip(tok->next, ":"); node->label = new_unique_name(); node->lhs = stmt(rest, tok); current_switch->default_case = node; return node; } if (equal(tok, "for")) { Node *node = new_node(ND_FOR, tok); tok = skip(tok->next, "("); enter_scope(); char *brk = brk_label; char *cont = cont_label; brk_label = node->brk_label = new_unique_name(); cont_label = node->cont_label = new_unique_name(); if (is_typename(tok)) { Type *basety = declspec(&tok, tok, NULL); node->init = declaration(&tok, tok, basety, NULL); } else { node->init = expr_stmt(&tok, tok); } if (!equal(tok, ";")) node->cond = expr(&tok, tok); tok = skip(tok, ";"); if (!equal(tok, ")")) node->inc = expr(&tok, tok); tok = skip(tok, ")"); node->then = stmt(rest, tok); leave_scope(); brk_label = brk; cont_label = cont; return node; } if (equal(tok, "while")) { Node *node = new_node(ND_FOR, tok); tok = skip(tok->next, "("); node->cond = expr(&tok, tok); tok = skip(tok, ")"); char *brk = brk_label; char *cont = cont_label; brk_label = node->brk_label = new_unique_name(); cont_label = node->cont_label = new_unique_name(); node->then = stmt(rest, tok); brk_label = brk; cont_label = cont; return node; } if (equal(tok, "do")) { Node *node = new_node(ND_DO, tok); char *brk = brk_label; char *cont = cont_label; brk_label = node->brk_label = new_unique_name(); cont_label = node->cont_label = new_unique_name(); node->then = stmt(&tok, tok->next); brk_label = brk; cont_label = cont; tok = skip(tok, "while"); tok = skip(tok, "("); node->cond = expr(&tok, tok); tok = skip(tok, ")"); *rest = skip(tok, ";"); return node; } if (equal(tok, "asm")) return asm_stmt(rest, tok); if (equal(tok, "goto")) { if (equal(tok->next, "*")) { // [GNU] `goto *ptr` jumps to the address specified by `ptr`. Node *node = new_node(ND_GOTO_EXPR, tok); node->lhs = expr(&tok, tok->next->next); *rest = skip(tok, ";"); return node; } Node *node = new_node(ND_GOTO, tok); node->label = get_ident(tok->next); node->goto_next = gotos; gotos = node; *rest = skip(tok->next->next, ";"); return node; } if (equal(tok, "break")) { if (!brk_label) error_tok(tok, "stray break"); Node *node = new_node(ND_GOTO, tok); node->unique_label = brk_label; *rest = skip(tok->next, ";"); return node; } if (equal(tok, "continue")) { if (!cont_label) error_tok(tok, "stray continue"); Node *node = new_node(ND_GOTO, tok); node->unique_label = cont_label; *rest = skip(tok->next, ";"); return node; } if (tok->kind == TK_IDENT && equal(tok->next, ":")) { Node *node = new_node(ND_LABEL, tok); node->label = strndup(tok->loc, tok->len); node->unique_label = new_unique_name(); node->lhs = stmt(rest, tok->next->next); node->goto_next = labels; labels = node; return node; } if (equal(tok, "{")) return compound_stmt(rest, tok->next); return expr_stmt(rest, tok); } // compound-stmt = (typedef | declaration | stmt)* "}" static Node *compound_stmt(Token **rest, Token *tok) { Node *node = new_node(ND_BLOCK, tok); Node head = {}; Node *cur = &head; enter_scope(); while (!equal(tok, "}")) { if (is_typename(tok) && !equal(tok->next, ":")) { VarAttr attr = {}; Type *basety = declspec(&tok, tok, &attr); if (attr.is_typedef) { tok = parse_typedef(tok, basety); continue; } if (is_function(tok)) { tok = function(tok, basety, &attr); continue; } if (attr.is_extern) { tok = global_variable(tok, basety, &attr); continue; } cur = cur->next = declaration(&tok, tok, basety, &attr); } else { cur = cur->next = stmt(&tok, tok); } add_type(cur); } leave_scope(); node->body = head.next; *rest = tok->next; return node; } // expr-stmt = expr? ";" static Node *expr_stmt(Token **rest, Token *tok) { if (equal(tok, ";")) { *rest = tok->next; return new_node(ND_BLOCK, tok); } Node *node = new_node(ND_EXPR_STMT, tok); node->lhs = expr(&tok, tok); *rest = skip(tok, ";"); return node; } // expr = assign ("," expr)? static Node *expr(Token **rest, Token *tok) { Node *node = assign(&tok, tok); if (equal(tok, ",")) return new_binary(ND_COMMA, node, expr(rest, tok->next), tok); *rest = tok; return node; } static int64_t eval(Node *node) { return eval2(node, NULL); } // Evaluate a given node as a constant expression. // // A constant expression is either just a number or ptr+n where ptr // is a pointer to a global variable and n is a postiive/negative // number. The latter form is accepted only as an initialization // expression for a global variable. static int64_t eval2(Node *node, char ***label) { add_type(node); if (is_flonum(node->ty)) return eval_double(node); switch (node->kind) { case ND_ADD: return eval2(node->lhs, label) + eval(node->rhs); case ND_SUB: return eval2(node->lhs, label) - eval(node->rhs); case ND_MUL: return eval(node->lhs) * eval(node->rhs); case ND_DIV: if (node->ty->is_unsigned) return (uint64_t)eval(node->lhs) / eval(node->rhs); return eval(node->lhs) / eval(node->rhs); case ND_NEG: return -eval(node->lhs); case ND_MOD: if (node->ty->is_unsigned) return (uint64_t)eval(node->lhs) % eval(node->rhs); return eval(node->lhs) % eval(node->rhs); case ND_BITAND: return eval(node->lhs) & eval(node->rhs); case ND_BITOR: return eval(node->lhs) | eval(node->rhs); case ND_BITXOR: return eval(node->lhs) ^ eval(node->rhs); case ND_SHL: return eval(node->lhs) << eval(node->rhs); case ND_SHR: if (node->ty->is_unsigned && node->ty->size == 8) return (uint64_t)eval(node->lhs) >> eval(node->rhs); return eval(node->lhs) >> eval(node->rhs); case ND_EQ: return eval(node->lhs) == eval(node->rhs); case ND_NE: return eval(node->lhs) != eval(node->rhs); case ND_LT: if (node->lhs->ty->is_unsigned) return (uint64_t)eval(node->lhs) < eval(node->rhs); return eval(node->lhs) < eval(node->rhs); case ND_LE: if (node->lhs->ty->is_unsigned) return (uint64_t)eval(node->lhs) <= eval(node->rhs); return eval(node->lhs) <= eval(node->rhs); case ND_COND: return eval(node->cond) ? eval2(node->then, label) : eval2(node->els, label); case ND_COMMA: return eval2(node->rhs, label); case ND_NOT: return !eval(node->lhs); case ND_BITNOT: return ~eval(node->lhs); case ND_LOGAND: return eval(node->lhs) && eval(node->rhs); case ND_LOGOR: return eval(node->lhs) || eval(node->rhs); case ND_CAST: { int64_t val = eval2(node->lhs, label); if (is_integer(node->ty)) { switch (node->ty->size) { case 1: return node->ty->is_unsigned ? (uint8_t)val : (int8_t)val; case 2: return node->ty->is_unsigned ? (uint16_t)val : (int16_t)val; case 4: return node->ty->is_unsigned ? (uint32_t)val : (int32_t)val; } } return val; } case ND_ADDR: return eval_rval(node->lhs, label); case ND_LABEL_VAL: *label = &node->unique_label; return 0; case ND_MEMBER: if (!label) error_tok(node->tok, "not a compile-time constant"); if (node->ty->kind != TY_ARRAY) error_tok(node->tok, "invalid initializer"); return eval_rval(node->lhs, label) + node->member->offset; case ND_VAR: if (!label) error_tok(node->tok, "not a compile-time constant"); if (node->var->ty->kind != TY_ARRAY && node->var->ty->kind != TY_FUNC) error_tok(node->tok, "invalid initializer"); *label = &node->var->name; return 0; case ND_NUM: return node->val; } error_tok(node->tok, "not a compile-time constant"); } static int64_t eval_rval(Node *node, char ***label) { switch (node->kind) { case ND_VAR: if (node->var->is_local) error_tok(node->tok, "not a compile-time constant"); *label = &node->var->name; return 0; case ND_DEREF: return eval2(node->lhs, label); case ND_MEMBER: return eval_rval(node->lhs, label) + node->member->offset; } error_tok(node->tok, "invalid initializer"); } static bool is_const_expr(Node *node) { add_type(node); switch (node->kind) { case ND_ADD: case ND_SUB: case ND_MUL: case ND_DIV: case ND_BITAND: case ND_BITOR: case ND_BITXOR: case ND_SHL: case ND_SHR: case ND_EQ: case ND_NE: case ND_LT: case ND_LE: case ND_LOGAND: case ND_LOGOR: return is_const_expr(node->lhs) && is_const_expr(node->rhs); case ND_COND: if (!is_const_expr(node->cond)) return false; return is_const_expr(eval(node->cond) ? node->then : node->els); case ND_COMMA: return is_const_expr(node->rhs); case ND_NEG: case ND_NOT: case ND_BITNOT: case ND_CAST: return is_const_expr(node->lhs); case ND_NUM: return true; } return false; } int64_t const_expr(Token **rest, Token *tok) { Node *node = conditional(rest, tok); return eval(node); } static double eval_double(Node *node) { add_type(node); if (is_integer(node->ty)) { if (node->ty->is_unsigned) return (unsigned long)eval(node); return eval(node); } switch (node->kind) { case ND_ADD: return eval_double(node->lhs) + eval_double(node->rhs); case ND_SUB: return eval_double(node->lhs) - eval_double(node->rhs); case ND_MUL: return eval_double(node->lhs) * eval_double(node->rhs); case ND_DIV: return eval_double(node->lhs) / eval_double(node->rhs); case ND_NEG: return -eval_double(node->lhs); case ND_COND: return eval_double(node->cond) ? eval_double(node->then) : eval_double(node->els); case ND_COMMA: return eval_double(node->rhs); case ND_CAST: if (is_flonum(node->lhs->ty)) return eval_double(node->lhs); return eval(node->lhs); case ND_NUM: return node->fval; } error_tok(node->tok, "not a compile-time constant"); } // Convert op= operators to expressions containing an assignment. // // In general, `A op= C` is converted to ``tmp = &A, *tmp = *tmp op B`. // However, if a given expression is of form `A.x op= C`, the input is // converted to `tmp = &A, (*tmp).x = (*tmp).x op C` to handle assignments // to bitfields. static Node *to_assign(Node *binary) { add_type(binary->lhs); add_type(binary->rhs); Token *tok = binary->tok; // Convert `A.x op= C` to `tmp = &A, (*tmp).x = (*tmp).x op C`. if (binary->lhs->kind == ND_MEMBER) { Obj *var = new_lvar("", pointer_to(binary->lhs->lhs->ty)); Node *expr1 = new_binary(ND_ASSIGN, new_var_node(var, tok), new_unary(ND_ADDR, binary->lhs->lhs, tok), tok); Node *expr2 = new_unary(ND_MEMBER, new_unary(ND_DEREF, new_var_node(var, tok), tok), tok); expr2->member = binary->lhs->member; Node *expr3 = new_unary(ND_MEMBER, new_unary(ND_DEREF, new_var_node(var, tok), tok), tok); expr3->member = binary->lhs->member; Node *expr4 = new_binary(ND_ASSIGN, expr2, new_binary(binary->kind, expr3, binary->rhs, tok), tok); return new_binary(ND_COMMA, expr1, expr4, tok); } // If A is an atomic type, Convert `A op= B` to // // ({ // T1 *addr = &A; T2 val = (B); T1 old = *addr; T1 new; // do { // new = old op val; // } while (!atomic_compare_exchange_strong(addr, &old, new)); // new; // }) if (binary->lhs->ty->is_atomic) { Node head = {}; Node *cur = &head; Obj *addr = new_lvar("", pointer_to(binary->lhs->ty)); Obj *val = new_lvar("", binary->rhs->ty); Obj *old = new_lvar("", binary->lhs->ty); Obj *new = new_lvar("", binary->lhs->ty); cur = cur->next = new_unary(ND_EXPR_STMT, new_binary(ND_ASSIGN, new_var_node(addr, tok), new_unary(ND_ADDR, binary->lhs, tok), tok), tok); cur = cur->next = new_unary(ND_EXPR_STMT, new_binary(ND_ASSIGN, new_var_node(val, tok), binary->rhs, tok), tok); cur = cur->next = new_unary(ND_EXPR_STMT, new_binary(ND_ASSIGN, new_var_node(old, tok), new_unary(ND_DEREF, new_var_node(addr, tok), tok), tok), tok); Node *loop = new_node(ND_DO, tok); loop->brk_label = new_unique_name(); loop->cont_label = new_unique_name(); Node *body = new_binary(ND_ASSIGN, new_var_node(new, tok), new_binary(binary->kind, new_var_node(old, tok), new_var_node(val, tok), tok), tok); loop->then = new_node(ND_BLOCK, tok); loop->then->body = new_unary(ND_EXPR_STMT, body, tok); Node *cas = new_node(ND_CAS, tok); cas->cas_addr = new_var_node(addr, tok); cas->cas_old = new_unary(ND_ADDR, new_var_node(old, tok), tok); cas->cas_new = new_var_node(new, tok); loop->cond = new_unary(ND_NOT, cas, tok); cur = cur->next = loop; cur = cur->next = new_unary(ND_EXPR_STMT, new_var_node(new, tok), tok); Node *node = new_node(ND_STMT_EXPR, tok); node->body = head.next; return node; } // Convert `A op= B` to ``tmp = &A, *tmp = *tmp op B`. Obj *var = new_lvar("", pointer_to(binary->lhs->ty)); Node *expr1 = new_binary(ND_ASSIGN, new_var_node(var, tok), new_unary(ND_ADDR, binary->lhs, tok), tok); Node *expr2 = new_binary(ND_ASSIGN, new_unary(ND_DEREF, new_var_node(var, tok), tok), new_binary(binary->kind, new_unary(ND_DEREF, new_var_node(var, tok), tok), binary->rhs, tok), tok); return new_binary(ND_COMMA, expr1, expr2, tok); } // assign = conditional (assign-op assign)? // assign-op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" // | "<<=" | ">>=" static Node *assign(Token **rest, Token *tok) { Node *node = conditional(&tok, tok); if (equal(tok, "=")) return new_binary(ND_ASSIGN, node, assign(rest, tok->next), tok); if (equal(tok, "+=")) return to_assign(new_add(node, assign(rest, tok->next), tok)); if (equal(tok, "-=")) return to_assign(new_sub(node, assign(rest, tok->next), tok)); if (equal(tok, "*=")) return to_assign(new_binary(ND_MUL, node, assign(rest, tok->next), tok)); if (equal(tok, "/=")) return to_assign(new_binary(ND_DIV, node, assign(rest, tok->next), tok)); if (equal(tok, "%=")) return to_assign(new_binary(ND_MOD, node, assign(rest, tok->next), tok)); if (equal(tok, "&=")) return to_assign(new_binary(ND_BITAND, node, assign(rest, tok->next), tok)); if (equal(tok, "|=")) return to_assign(new_binary(ND_BITOR, node, assign(rest, tok->next), tok)); if (equal(tok, "^=")) return to_assign(new_binary(ND_BITXOR, node, assign(rest, tok->next), tok)); if (equal(tok, "<<=")) return to_assign(new_binary(ND_SHL, node, assign(rest, tok->next), tok)); if (equal(tok, ">>=")) return to_assign(new_binary(ND_SHR, node, assign(rest, tok->next), tok)); *rest = tok; return node; } // conditional = logor ("?" expr? ":" conditional)? static Node *conditional(Token **rest, Token *tok) { Node *cond = logor(&tok, tok); if (!equal(tok, "?")) { *rest = tok; return cond; } if (equal(tok->next, ":")) { // [GNU] Compile `a ?: b` as `tmp = a, tmp ? tmp : b`. add_type(cond); Obj *var = new_lvar("", cond->ty); Node *lhs = new_binary(ND_ASSIGN, new_var_node(var, tok), cond, tok); Node *rhs = new_node(ND_COND, tok); rhs->cond = new_var_node(var, tok); rhs->then = new_var_node(var, tok); rhs->els = conditional(rest, tok->next->next); return new_binary(ND_COMMA, lhs, rhs, tok); } Node *node = new_node(ND_COND, tok); node->cond = cond; node->then = expr(&tok, tok->next); tok = skip(tok, ":"); node->els = conditional(rest, tok); return node; } // logor = logand ("||" logand)* static Node *logor(Token **rest, Token *tok) { Node *node = logand(&tok, tok); while (equal(tok, "||")) { Token *start = tok; node = new_binary(ND_LOGOR, node, logand(&tok, tok->next), start); } *rest = tok; return node; } // logand = bitor ("&&" bitor)* static Node *logand(Token **rest, Token *tok) { Node *node = bitor(&tok, tok); while (equal(tok, "&&")) { Token *start = tok; node = new_binary(ND_LOGAND, node, bitor(&tok, tok->next), start); } *rest = tok; return node; } // bitor = bitxor ("|" bitxor)* static Node *bitor(Token **rest, Token *tok) { Node *node = bitxor(&tok, tok); while (equal(tok, "|")) { Token *start = tok; node = new_binary(ND_BITOR, node, bitxor(&tok, tok->next), start); } *rest = tok; return node; } // bitxor = bitand ("^" bitand)* static Node *bitxor(Token **rest, Token *tok) { Node *node = bitand(&tok, tok); while (equal(tok, "^")) { Token *start = tok; node = new_binary(ND_BITXOR, node, bitand(&tok, tok->next), start); } *rest = tok; return node; } // bitand = equality ("&" equality)* static Node *bitand(Token **rest, Token *tok) { Node *node = equality(&tok, tok); while (equal(tok, "&")) { Token *start = tok; node = new_binary(ND_BITAND, node, equality(&tok, tok->next), start); } *rest = tok; return node; } // equality = relational ("==" relational | "!=" relational)* static Node *equality(Token **rest, Token *tok) { Node *node = relational(&tok, tok); for (;;) { Token *start = tok; if (equal(tok, "==")) { node = new_binary(ND_EQ, node, relational(&tok, tok->next), start); continue; } if (equal(tok, "!=")) { node = new_binary(ND_NE, node, relational(&tok, tok->next), start); continue; } *rest = tok; return node; } } // relational = shift ("<" shift | "<=" shift | ">" shift | ">=" shift)* static Node *relational(Token **rest, Token *tok) { Node *node = shift(&tok, tok); for (;;) { Token *start = tok; if (equal(tok, "<")) { node = new_binary(ND_LT, node, shift(&tok, tok->next), start); continue; } if (equal(tok, "<=")) { node = new_binary(ND_LE, node, shift(&tok, tok->next), start); continue; } if (equal(tok, ">")) { node = new_binary(ND_LT, shift(&tok, tok->next), node, start); continue; } if (equal(tok, ">=")) { node = new_binary(ND_LE, shift(&tok, tok->next), node, start); continue; } *rest = tok; return node; } } // shift = add ("<<" add | ">>" add)* static Node *shift(Token **rest, Token *tok) { Node *node = add(&tok, tok); for (;;) { Token *start = tok; if (equal(tok, "<<")) { node = new_binary(ND_SHL, node, add(&tok, tok->next), start); continue; } if (equal(tok, ">>")) { node = new_binary(ND_SHR, node, add(&tok, tok->next), start); continue; } *rest = tok; return node; } } // In C, `+` operator is overloaded to perform the pointer arithmetic. // If p is a pointer, p+n adds not n but sizeof(*p)*n to the value of p, // so that p+n points to the location n elements (not bytes) ahead of p. // In other words, we need to scale an integer value before adding to a // pointer value. This function takes care of the scaling. static Node *new_add(Node *lhs, Node *rhs, Token *tok) { add_type(lhs); add_type(rhs); // num + num if (is_numeric(lhs->ty) && is_numeric(rhs->ty)) return new_binary(ND_ADD, lhs, rhs, tok); if (lhs->ty->base && rhs->ty->base) error_tok(tok, "invalid operands"); // Canonicalize `num + ptr` to `ptr + num`. if (!lhs->ty->base && rhs->ty->base) { Node *tmp = lhs; lhs = rhs; rhs = tmp; } // VLA + num if (lhs->ty->base->kind == TY_VLA) { rhs = new_binary(ND_MUL, rhs, new_var_node(lhs->ty->base->vla_size, tok), tok); return new_binary(ND_ADD, lhs, rhs, tok); } // ptr + num rhs = new_binary(ND_MUL, rhs, new_long(lhs->ty->base->size, tok), tok); return new_binary(ND_ADD, lhs, rhs, tok); } // Like `+`, `-` is overloaded for the pointer type. static Node *new_sub(Node *lhs, Node *rhs, Token *tok) { add_type(lhs); add_type(rhs); // num - num if (is_numeric(lhs->ty) && is_numeric(rhs->ty)) return new_binary(ND_SUB, lhs, rhs, tok); // VLA + num if (lhs->ty->base->kind == TY_VLA) { rhs = new_binary(ND_MUL, rhs, new_var_node(lhs->ty->base->vla_size, tok), tok); add_type(rhs); Node *node = new_binary(ND_SUB, lhs, rhs, tok); node->ty = lhs->ty; return node; } // ptr - num if (lhs->ty->base && is_integer(rhs->ty)) { rhs = new_binary(ND_MUL, rhs, new_long(lhs->ty->base->size, tok), tok); add_type(rhs); Node *node = new_binary(ND_SUB, lhs, rhs, tok); node->ty = lhs->ty; return node; } // ptr - ptr, which returns how many elements are between the two. if (lhs->ty->base && rhs->ty->base) { Node *node = new_binary(ND_SUB, lhs, rhs, tok); node->ty = ty_long; return new_binary(ND_DIV, node, new_num(lhs->ty->base->size, tok), tok); } error_tok(tok, "invalid operands"); } // add = mul ("+" mul | "-" mul)* static Node *add(Token **rest, Token *tok) { Node *node = mul(&tok, tok); for (;;) { Token *start = tok; if (equal(tok, "+")) { node = new_add(node, mul(&tok, tok->next), start); continue; } if (equal(tok, "-")) { node = new_sub(node, mul(&tok, tok->next), start); continue; } *rest = tok; return node; } } // mul = cast ("*" cast | "/" cast | "%" cast)* static Node *mul(Token **rest, Token *tok) { Node *node = cast(&tok, tok); for (;;) { Token *start = tok; if (equal(tok, "*")) { node = new_binary(ND_MUL, node, cast(&tok, tok->next), start); continue; } if (equal(tok, "/")) { node = new_binary(ND_DIV, node, cast(&tok, tok->next), start); continue; } if (equal(tok, "%")) { node = new_binary(ND_MOD, node, cast(&tok, tok->next), start); continue; } *rest = tok; return node; } } // cast = "(" type-name ")" cast | unary static Node *cast(Token **rest, Token *tok) { if (equal(tok, "(") && is_typename(tok->next)) { Token *start = tok; Type *ty = typename(&tok, tok->next); tok = skip(tok, ")"); // compound literal if (equal(tok, "{")) return unary(rest, start); // type cast Node *node = new_cast(cast(rest, tok), ty); node->tok = start; return node; } return unary(rest, tok); } // unary = ("+" | "-" | "*" | "&" | "!" | "~") cast // | ("++" | "--") unary // | "&&" ident // | postfix static Node *unary(Token **rest, Token *tok) { if (equal(tok, "+")) return cast(rest, tok->next); if (equal(tok, "-")) return new_unary(ND_NEG, cast(rest, tok->next), tok); if (equal(tok, "&")) { Node *lhs = cast(rest, tok->next); add_type(lhs); if (lhs->kind == ND_MEMBER && lhs->member->is_bitfield) error_tok(tok, "cannot take address of bitfield"); return new_unary(ND_ADDR, lhs, tok); } if (equal(tok, "*")) { // [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity // in the C spec, but dereferencing a function shouldn't do // anything. If foo is a function, `*foo`, `**foo` or `*****foo` // are all equivalent to just `foo`. Node *node = cast(rest, tok->next); add_type(node); if (node->ty->kind == TY_FUNC) return node; return new_unary(ND_DEREF, node, tok); } if (equal(tok, "!")) return new_unary(ND_NOT, cast(rest, tok->next), tok); if (equal(tok, "~")) return new_unary(ND_BITNOT, cast(rest, tok->next), tok); // Read ++i as i+=1 if (equal(tok, "++")) return to_assign(new_add(unary(rest, tok->next), new_num(1, tok), tok)); // Read --i as i-=1 if (equal(tok, "--")) return to_assign(new_sub(unary(rest, tok->next), new_num(1, tok), tok)); // [GNU] labels-as-values if (equal(tok, "&&")) { Node *node = new_node(ND_LABEL_VAL, tok); node->label = get_ident(tok->next); node->goto_next = gotos; gotos = node; *rest = tok->next->next; return node; } return postfix(rest, tok); } // struct-members = (declspec declarator ("," declarator)* ";")* static void struct_members(Token **rest, Token *tok, Type *ty) { Member head = {}; Member *cur = &head; int idx = 0; while (!equal(tok, "}")) { VarAttr attr = {}; Type *basety = declspec(&tok, tok, &attr); bool first = true; // Anonymous struct member if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) && consume(&tok, tok, ";")) { Member *mem = calloc(1, sizeof(Member)); mem->ty = basety; mem->idx = idx++; mem->align = attr.align ? attr.align : mem->ty->align; cur = cur->next = mem; continue; } // Regular struct members while (!consume(&tok, tok, ";")) { if (!first) tok = skip(tok, ","); first = false; Member *mem = calloc(1, sizeof(Member)); mem->ty = declarator(&tok, tok, basety); mem->name = mem->ty->name; mem->idx = idx++; mem->align = attr.align ? attr.align : mem->ty->align; if (consume(&tok, tok, ":")) { mem->is_bitfield = true; mem->bit_width = const_expr(&tok, tok); } cur = cur->next = mem; } } // If the last element is an array of incomplete type, it's // called a "flexible array member". It should behave as if // if were a zero-sized array. if (cur != &head && cur->ty->kind == TY_ARRAY && cur->ty->array_len < 0) { cur->ty = array_of(cur->ty->base, 0); ty->is_flexible = true; } *rest = tok->next; ty->members = head.next; } // attribute = ("__attribute__" "(" "(" "packed" ")" ")")* static Token *attribute_list(Token *tok, Type *ty) { while (consume(&tok, tok, "__attribute__")) { tok = skip(tok, "("); tok = skip(tok, "("); bool first = true; while (!consume(&tok, tok, ")")) { if (!first) tok = skip(tok, ","); first = false; if (consume(&tok, tok, "packed")) { ty->is_packed = true; continue; } if (consume(&tok, tok, "aligned")) { tok = skip(tok, "("); ty->align = const_expr(&tok, tok); tok = skip(tok, ")"); continue; } error_tok(tok, "unknown attribute"); } tok = skip(tok, ")"); } return tok; } // struct-union-decl = attribute? ident? ("{" struct-members)? static Type *struct_union_decl(Token **rest, Token *tok) { Type *ty = struct_type(); tok = attribute_list(tok, ty); // Read a tag. Token *tag = NULL; if (tok->kind == TK_IDENT) { tag = tok; tok = tok->next; } if (tag && !equal(tok, "{")) { *rest = tok; Type *ty2 = find_tag(tag); if (ty2) return ty2; ty->size = -1; push_tag_scope(tag, ty); return ty; } tok = skip(tok, "{"); // Construct a struct object. struct_members(&tok, tok, ty); *rest = attribute_list(tok, ty); if (tag) { // If this is a redefinition, overwrite a previous type. // Otherwise, register the struct type. Type *ty2 = hashmap_get2(&scope->tags, tag->loc, tag->len); if (ty2) { *ty2 = *ty; return ty2; } push_tag_scope(tag, ty); } return ty; } // struct-decl = struct-union-decl static Type *struct_decl(Token **rest, Token *tok) { Type *ty = struct_union_decl(rest, tok); ty->kind = TY_STRUCT; if (ty->size < 0) return ty; // Assign offsets within the struct to members. int bits = 0; for (Member *mem = ty->members; mem; mem = mem->next) { if (mem->is_bitfield && mem->bit_width == 0) { // Zero-width anonymous bitfield has a special meaning. // It affects only alignment. bits = align_to(bits, mem->ty->size * 8); } else if (mem->is_bitfield) { int sz = mem->ty->size; if (bits / (sz * 8) != (bits + mem->bit_width - 1) / (sz * 8)) bits = align_to(bits, sz * 8); mem->offset = align_down(bits / 8, sz); mem->bit_offset = bits % (sz * 8); bits += mem->bit_width; } else { if (!ty->is_packed) bits = align_to(bits, mem->align * 8); mem->offset = bits / 8; bits += mem->ty->size * 8; } if (!ty->is_packed && ty->align < mem->align) ty->align = mem->align; } ty->size = align_to(bits, ty->align * 8) / 8; return ty; } // union-decl = struct-union-decl static Type *union_decl(Token **rest, Token *tok) { Type *ty = struct_union_decl(rest, tok); ty->kind = TY_UNION; if (ty->size < 0) return ty; // If union, we don't have to assign offsets because they // are already initialized to zero. We need to compute the // alignment and the size though. for (Member *mem = ty->members; mem; mem = mem->next) { if (ty->align < mem->align) ty->align = mem->align; if (ty->size < mem->ty->size) ty->size = mem->ty->size; } ty->size = align_to(ty->size, ty->align); return ty; } // Find a struct member by name. static Member *get_struct_member(Type *ty, Token *tok) { for (Member *mem = ty->members; mem; mem = mem->next) { // Anonymous struct member if ((mem->ty->kind == TY_STRUCT || mem->ty->kind == TY_UNION) && !mem->name) { if (get_struct_member(mem->ty, tok)) return mem; continue; } // Regular struct member if (mem->name->len == tok->len && !strncmp(mem->name->loc, tok->loc, tok->len)) return mem; } return NULL; } // Create a node representing a struct member access, such as foo.bar // where foo is a struct and bar is a member name. // // C has a feature called "anonymous struct" which allows a struct to // have another unnamed struct as a member like this: // // struct { struct { int a; }; int b; } x; // // The members of an anonymous struct belong to the outer struct's // member namespace. Therefore, in the above example, you can access // member "a" of the anonymous struct as "x.a". // // This function takes care of anonymous structs. static Node *struct_ref(Node *node, Token *tok) { add_type(node); if (node->ty->kind != TY_STRUCT && node->ty->kind != TY_UNION) error_tok(node->tok, "not a struct nor a union"); Type *ty = node->ty; for (;;) { Member *mem = get_struct_member(ty, tok); if (!mem) error_tok(tok, "no such member"); node = new_unary(ND_MEMBER, node, tok); node->member = mem; if (mem->name) break; ty = mem->ty; } return node; } // Convert A++ to `(typeof A)((A += 1) - 1)` static Node *new_inc_dec(Node *node, Token *tok, int addend) { add_type(node); return new_cast(new_add(to_assign(new_add(node, new_num(addend, tok), tok)), new_num(-addend, tok), tok), node->ty); } // postfix = "(" type-name ")" "{" initializer-list "}" // = ident "(" func-args ")" postfix-tail* // | primary postfix-tail* // // postfix-tail = "[" expr "]" // | "(" func-args ")" // | "." ident // | "->" ident // | "++" // | "--" static Node *postfix(Token **rest, Token *tok) { if (equal(tok, "(") && is_typename(tok->next)) { // Compound literal Token *start = tok; Type *ty = typename(&tok, tok->next); tok = skip(tok, ")"); if (scope->next == NULL) { Obj *var = new_anon_gvar(ty); gvar_initializer(rest, tok, var); return new_var_node(var, start); } Obj *var = new_lvar("", ty); Node *lhs = lvar_initializer(rest, tok, var); Node *rhs = new_var_node(var, tok); return new_binary(ND_COMMA, lhs, rhs, start); } Node *node = primary(&tok, tok); for (;;) { if (equal(tok, "(")) { node = funcall(&tok, tok->next, node); continue; } if (equal(tok, "[")) { // x[y] is short for *(x+y) Token *start = tok; Node *idx = expr(&tok, tok->next); tok = skip(tok, "]"); node = new_unary(ND_DEREF, new_add(node, idx, start), start); continue; } if (equal(tok, ".")) { node = struct_ref(node, tok->next); tok = tok->next->next; continue; } if (equal(tok, "->")) { // x->y is short for (*x).y node = new_unary(ND_DEREF, node, tok); node = struct_ref(node, tok->next); tok = tok->next->next; continue; } if (equal(tok, "++")) { node = new_inc_dec(node, tok, 1); tok = tok->next; continue; } if (equal(tok, "--")) { node = new_inc_dec(node, tok, -1); tok = tok->next; continue; } *rest = tok; return node; } } // funcall = (assign ("," assign)*)? ")" static Node *funcall(Token **rest, Token *tok, Node *fn) { add_type(fn); if (fn->ty->kind != TY_FUNC && (fn->ty->kind != TY_PTR || fn->ty->base->kind != TY_FUNC)) error_tok(fn->tok, "not a function"); Type *ty = (fn->ty->kind == TY_FUNC) ? fn->ty : fn->ty->base; Type *param_ty = ty->params; Node head = {}; Node *cur = &head; while (!equal(tok, ")")) { if (cur != &head) tok = skip(tok, ","); Node *arg = assign(&tok, tok); add_type(arg); if (!param_ty && !ty->is_variadic) error_tok(tok, "too many arguments"); if (param_ty) { if (param_ty->kind != TY_STRUCT && param_ty->kind != TY_UNION) arg = new_cast(arg, param_ty); param_ty = param_ty->next; } else if (arg->ty->kind == TY_FLOAT) { // If parameter type is omitted (e.g. in "..."), float // arguments are promoted to double. arg = new_cast(arg, ty_double); } cur = cur->next = arg; } if (param_ty) error_tok(tok, "too few arguments"); *rest = skip(tok, ")"); Node *node = new_unary(ND_FUNCALL, fn, tok); node->func_ty = ty; node->ty = ty->return_ty; node->args = head.next; // If a function returns a struct, it is caller's responsibility // to allocate a space for the return value. if (node->ty->kind == TY_STRUCT || node->ty->kind == TY_UNION) node->ret_buffer = new_lvar("", node->ty); return node; } // generic-selection = "(" assign "," generic-assoc ("," generic-assoc)* ")" // // generic-assoc = type-name ":" assign // | "default" ":" assign static Node *generic_selection(Token **rest, Token *tok) { Token *start = tok; tok = skip(tok, "("); Node *ctrl = assign(&tok, tok); add_type(ctrl); Type *t1 = ctrl->ty; if (t1->kind == TY_FUNC) t1 = pointer_to(t1); else if (t1->kind == TY_ARRAY) t1 = pointer_to(t1->base); Node *ret = NULL; while (!consume(rest, tok, ")")) { tok = skip(tok, ","); if (equal(tok, "default")) { tok = skip(tok->next, ":"); Node *node = assign(&tok, tok); if (!ret) ret = node; continue; } Type *t2 = typename(&tok, tok); tok = skip(tok, ":"); Node *node = assign(&tok, tok); if (is_compatible(t1, t2)) ret = node; } if (!ret) error_tok(start, "controlling expression type not compatible with" " any generic association type"); return ret; } // primary = "(" "{" stmt+ "}" ")" // | "(" expr ")" // | "sizeof" "(" type-name ")" // | "sizeof" unary // | "_Alignof" "(" type-name ")" // | "_Alignof" unary // | "_Generic" generic-selection // | "__builtin_types_compatible_p" "(" type-name, type-name, ")" // | "__builtin_reg_class" "(" type-name ")" // | ident // | str // | num static Node *primary(Token **rest, Token *tok) { Token *start = tok; if (equal(tok, "(") && equal(tok->next, "{")) { // This is a GNU statement expresssion. Node *node = new_node(ND_STMT_EXPR, tok); node->body = compound_stmt(&tok, tok->next->next)->body; *rest = skip(tok, ")"); return node; } if (equal(tok, "(")) { Node *node = expr(&tok, tok->next); *rest = skip(tok, ")"); return node; } if (equal(tok, "sizeof") && equal(tok->next, "(") && is_typename(tok->next->next)) { Type *ty = typename(&tok, tok->next->next); *rest = skip(tok, ")"); if (ty->kind == TY_VLA) { if (ty->vla_size) return new_var_node(ty->vla_size, tok); Node *lhs = compute_vla_size(ty, tok); Node *rhs = new_var_node(ty->vla_size, tok); return new_binary(ND_COMMA, lhs, rhs, tok); } return new_ulong(ty->size, start); } if (equal(tok, "sizeof")) { Node *node = unary(rest, tok->next); add_type(node); if (node->ty->kind == TY_VLA) return new_var_node(node->ty->vla_size, tok); return new_ulong(node->ty->size, tok); } if (equal(tok, "_Alignof") && equal(tok->next, "(") && is_typename(tok->next->next)) { Type *ty = typename(&tok, tok->next->next); *rest = skip(tok, ")"); return new_ulong(ty->align, tok); } if (equal(tok, "_Alignof")) { Node *node = unary(rest, tok->next); add_type(node); return new_ulong(node->ty->align, tok); } if (equal(tok, "_Generic")) return generic_selection(rest, tok->next); if (equal(tok, "__builtin_types_compatible_p")) { tok = skip(tok->next, "("); Type *t1 = typename(&tok, tok); tok = skip(tok, ","); Type *t2 = typename(&tok, tok); *rest = skip(tok, ")"); return new_num(is_compatible(t1, t2), start); } if (equal(tok, "__builtin_reg_class")) { tok = skip(tok->next, "("); Type *ty = typename(&tok, tok); *rest = skip(tok, ")"); if (is_integer(ty) || ty->kind == TY_PTR) return new_num(0, start); if (is_flonum(ty)) return new_num(1, start); return new_num(2, start); } if (equal(tok, "__builtin_compare_and_swap")) { Node *node = new_node(ND_CAS, tok); tok = skip(tok->next, "("); node->cas_addr = assign(&tok, tok); tok = skip(tok, ","); node->cas_old = assign(&tok, tok); tok = skip(tok, ","); node->cas_new = assign(&tok, tok); *rest = skip(tok, ")"); return node; } if (equal(tok, "__builtin_atomic_exchange")) { Node *node = new_node(ND_EXCH, tok); tok = skip(tok->next, "("); node->lhs = assign(&tok, tok); tok = skip(tok, ","); node->rhs = assign(&tok, tok); *rest = skip(tok, ")"); return node; } if (tok->kind == TK_IDENT) { // Variable or enum constant VarScope *sc = find_var(tok); *rest = tok->next; // For "static inline" function if (sc && sc->var && sc->var->is_function) { if (current_fn) strarray_push(¤t_fn->refs, sc->var->name); else sc->var->is_root = true; } if (sc) { if (sc->var) return new_var_node(sc->var, tok); if (sc->enum_ty) return new_num(sc->enum_val, tok); } if (equal(tok->next, "(")) error_tok(tok, "implicit declaration of a function"); error_tok(tok, "undefined variable"); } if (tok->kind == TK_STR) { Obj *var = new_string_literal(tok->str, tok->ty); *rest = tok->next; return new_var_node(var, tok); } if (tok->kind == TK_NUM) { Node *node; if (is_flonum(tok->ty)) { node = new_node(ND_NUM, tok); node->fval = tok->fval; } else { node = new_num(tok->val, tok); } node->ty = tok->ty; *rest = tok->next; return node; } error_tok(tok, "expected an expression"); } static Token *parse_typedef(Token *tok, Type *basety) { bool first = true; while (!consume(&tok, tok, ";")) { if (!first) tok = skip(tok, ","); first = false; Type *ty = declarator(&tok, tok, basety); if (!ty->name) error_tok(ty->name_pos, "typedef name omitted"); push_scope(get_ident(ty->name))->type_def = ty; } return tok; } static void create_param_lvars(Type *param) { if (param) { create_param_lvars(param->next); if (!param->name) error_tok(param->name_pos, "parameter name omitted"); new_lvar(get_ident(param->name), param); } } // This function matches gotos or labels-as-values with labels. // // We cannot resolve gotos as we parse a function because gotos // can refer a label that appears later in the function. // So, we need to do this after we parse the entire function. static void resolve_goto_labels(void) { for (Node *x = gotos; x; x = x->goto_next) { for (Node *y = labels; y; y = y->goto_next) { if (!strcmp(x->label, y->label)) { x->unique_label = y->unique_label; break; } } if (x->unique_label == NULL) error_tok(x->tok->next, "use of undeclared label"); } gotos = labels = NULL; } static Obj *find_func(char *name) { Scope *sc = scope; while (sc->next) sc = sc->next; VarScope *sc2 = hashmap_get(&sc->vars, name); if (sc2 && sc2->var && sc2->var->is_function) return sc2->var; return NULL; } static void mark_live(Obj *var) { if (!var->is_function || var->is_live) return; var->is_live = true; for (int i = 0; i < var->refs.len; i++) { Obj *fn = find_func(var->refs.data[i]); if (fn) mark_live(fn); } } static Token *function(Token *tok, Type *basety, VarAttr *attr) { Type *ty = declarator(&tok, tok, basety); if (!ty->name) error_tok(ty->name_pos, "function name omitted"); char *name_str = get_ident(ty->name); Obj *fn = find_func(name_str); if (fn) { // Redeclaration if (!fn->is_function) error_tok(tok, "redeclared as a different kind of symbol"); if (fn->is_definition && equal(tok, "{")) error_tok(tok, "redefinition of %s", name_str); if (!fn->is_static && attr->is_static) error_tok(tok, "static declaration follows a non-static declaration"); fn->is_definition = fn->is_definition || equal(tok, "{"); } else { fn = new_gvar(name_str, ty); fn->is_function = true; fn->is_definition = equal(tok, "{"); fn->is_static = attr->is_static || (attr->is_inline && !attr->is_extern); fn->is_inline = attr->is_inline; } fn->is_root = !(fn->is_static && fn->is_inline); if (consume(&tok, tok, ";")) return tok; current_fn = fn; locals = NULL; enter_scope(); create_param_lvars(ty->params); // A buffer for a struct/union return value is passed // as the hidden first parameter. Type *rty = ty->return_ty; if ((rty->kind == TY_STRUCT || rty->kind == TY_UNION) && rty->size > 16) new_lvar("", pointer_to(rty)); fn->params = locals; if (ty->is_variadic) fn->va_area = new_lvar("__va_area__", array_of(ty_char, 136)); fn->alloca_bottom = new_lvar("__alloca_size__", pointer_to(ty_char)); tok = skip(tok, "{"); // [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is // automatically defined as a local variable containing the // current function name. push_scope("__func__")->var = new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1)); // [GNU] __FUNCTION__ is yet another name of __func__. push_scope("__FUNCTION__")->var = new_string_literal(fn->name, array_of(ty_char, strlen(fn->name) + 1)); fn->body = compound_stmt(&tok, tok); fn->locals = locals; leave_scope(); resolve_goto_labels(); return tok; } static Token *global_variable(Token *tok, Type *basety, VarAttr *attr) { bool first = true; while (!consume(&tok, tok, ";")) { if (!first) tok = skip(tok, ","); first = false; Type *ty = declarator(&tok, tok, basety); if (!ty->name) error_tok(ty->name_pos, "variable name omitted"); Obj *var = new_gvar(get_ident(ty->name), ty); var->is_definition = !attr->is_extern; var->is_static = attr->is_static; var->is_tls = attr->is_tls; if (attr->align) var->align = attr->align; if (equal(tok, "=")) gvar_initializer(&tok, tok->next, var); else if (!attr->is_extern && !attr->is_tls) var->is_tentative = true; } return tok; } // Lookahead tokens and returns true if a given token is a start // of a function definition or declaration. static bool is_function(Token *tok) { if (equal(tok, ";")) return false; Type dummy = {}; Type *ty = declarator(&tok, tok, &dummy); return ty->kind == TY_FUNC; } // Remove redundant tentative definitions. static void scan_globals(void) { Obj head; Obj *cur = &head; for (Obj *var = globals; var; var = var->next) { if (!var->is_tentative) { cur = cur->next = var; continue; } // Find another definition of the same identifier. Obj *var2 = globals; for (; var2; var2 = var2->next) if (var != var2 && var2->is_definition && !strcmp(var->name, var2->name)) break; // If there's another definition, the tentative definition // is redundant if (!var2) cur = cur->next = var; } cur->next = NULL; globals = head.next; } static void declare_builtin_functions(void) { Type *ty = func_type(pointer_to(ty_void)); ty->params = copy_type(ty_int); builtin_alloca = new_gvar("alloca", ty); builtin_alloca->is_definition = false; } // program = (typedef | function-definition | global-variable)* Obj *parse(Token *tok) { declare_builtin_functions(); globals = NULL; while (tok->kind != TK_EOF) { VarAttr attr = {}; Type *basety = declspec(&tok, tok, &attr); // Typedef if (attr.is_typedef) { tok = parse_typedef(tok, basety); continue; } // Function if (is_function(tok)) { tok = function(tok, basety, &attr); continue; } // Global variable tok = global_variable(tok, basety, &attr); } for (Obj *var = globals; var; var = var->next) if (var->is_root) mark_live(var); // Remove redundant tentative definitions. scan_globals(); return globals; } // This file implements the C preprocessor. // // The preprocessor takes a list of tokens as an input and returns a // new list of tokens as an output. // // The preprocessing language is designed in such a way that that's // guaranteed to stop even if there is a recursive macro. // Informally speaking, a macro is applied only once for each token. // That is, if a macro token T appears in a result of direct or // indirect macro expansion of T, T won't be expanded any further. // For example, if T is defined as U, and U is defined as T, then // token T is expanded to U and then to T and the macro expansion // stops at that point. // // To achieve the above behavior, we attach for each token a set of // macro names from which the token is expanded. The set is called // "hideset". Hideset is initially empty, and every time we expand a // macro, the macro name is added to the resulting tokens' hidesets. // // The above macro expansion algorithm is explained in this document // written by Dave Prossor, which is used as a basis for the // standard's wording: // https://github.com/rui314/chibicc/wiki/cpp.algo.pdf // BEG preprocess typedef struct MacroParam MacroParam; struct MacroParam { MacroParam *next; char *name; }; typedef struct MacroArg MacroArg; struct MacroArg { MacroArg *next; char *name; bool is_va_args; Token *tok; }; typedef Token *macro_handler_fn(Token *); typedef struct Macro Macro; struct Macro { char *name; bool is_objlike; // Object-like or function-like MacroParam *params; char *va_args_name; Token *body; macro_handler_fn *handler; }; // `#if` can be nested, so we use a stack to manage nested `#if`s. typedef struct CondIncl CondIncl; struct CondIncl { CondIncl *next; enum { IN_THEN, IN_ELIF, IN_ELSE } ctx; Token *tok; bool included; }; typedef struct Hideset Hideset; struct Hideset { Hideset *next; char *name; }; static HashMap macros; static CondIncl *cond_incl; static HashMap pragma_once; static int include_next_idx; static Token *preprocess2(Token *tok); static Macro *find_macro(Token *tok); static bool is_hash(Token *tok) { return tok->at_bol && equal(tok, "#"); } // Some preprocessor directives such as #include allow extraneous // tokens before newline. This function skips such tokens. static Token *skip_line(Token *tok) { if (tok->at_bol) return tok; warn_tok(tok, "extra token"); while (tok->at_bol) tok = tok->next; return tok; } static Token *copy_token(Token *tok) { Token *t = calloc(1, sizeof(Token)); *t = *tok; t->next = NULL; return t; } static Token *new_eof(Token *tok) { Token *t = copy_token(tok); t->kind = TK_EOF; t->len = 0; return t; } static Hideset *new_hideset(char *name) { Hideset *hs = calloc(1, sizeof(Hideset)); hs->name = name; return hs; } static Hideset *hideset_union(Hideset *hs1, Hideset *hs2) { Hideset head = {}; Hideset *cur = &head; for (; hs1; hs1 = hs1->next) cur = cur->next = new_hideset(hs1->name); cur->next = hs2; return head.next; } static bool hideset_contains(Hideset *hs, char *s, int len) { for (; hs; hs = hs->next) if (strlen(hs->name) == len && !strncmp(hs->name, s, len)) return true; return false; } static Hideset *hideset_intersection(Hideset *hs1, Hideset *hs2) { Hideset head = {}; Hideset *cur = &head; for (; hs1; hs1 = hs1->next) if (hideset_contains(hs2, hs1->name, strlen(hs1->name))) cur = cur->next = new_hideset(hs1->name); return head.next; } static Token *add_hideset(Token *tok, Hideset *hs) { Token head = {}; Token *cur = &head; for (; tok; tok = tok->next) { Token *t = copy_token(tok); t->hideset = hideset_union(t->hideset, hs); cur = cur->next = t; } return head.next; } // Append tok2 to the end of tok1. static Token *append(Token *tok1, Token *tok2) { if (tok1->kind == TK_EOF) return tok2; Token head = {}; Token *cur = &head; for (; tok1->kind != TK_EOF; tok1 = tok1->next) cur = cur->next = copy_token(tok1); cur->next = tok2; return head.next; } static Token *skip_cond_incl2(Token *tok) { while (tok->kind != TK_EOF) { if (is_hash(tok) && (equal(tok->next, "if") || equal(tok->next, "ifdef") || equal(tok->next, "ifndef"))) { tok = skip_cond_incl2(tok->next->next); continue; } if (is_hash(tok) && equal(tok->next, "endif")) return tok->next->next; tok = tok->next; } return tok; } // Skip until next `#else`, `#elif` or `#endif`. // Nested `#if` and `#endif` are skipped. static Token *skip_cond_incl(Token *tok) { while (tok->kind != TK_EOF) { if (is_hash(tok) && (equal(tok->next, "if") || equal(tok->next, "ifdef") || equal(tok->next, "ifndef"))) { tok = skip_cond_incl2(tok->next->next); continue; } if (is_hash(tok) && (equal(tok->next, "elif") || equal(tok->next, "else") || equal(tok->next, "endif"))) break; tok = tok->next; } return tok; } // Double-quote a given string and returns it. static char *quote_string(char *str) { int bufsize = 3; for (int i = 0; str[i]; i++) { if (str[i] == '\\' || str[i] == '"') bufsize++; bufsize++; } char *buf = calloc(1, bufsize); char *p = buf; *p++ = '"'; for (int i = 0; str[i]; i++) { if (str[i] == '\\' || str[i] == '"') *p++ = '\\'; *p++ = str[i]; } *p++ = '"'; *p++ = '\0'; return buf; } static Token *new_str_token(char *str, Token *tmpl) { char *buf = quote_string(str); return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf)); } // Copy all tokens until the next newline, terminate them with // an EOF token and then returns them. This function is used to // create a new list of tokens for `#if` arguments. static Token *copy_line(Token **rest, Token *tok) { Token head = {}; Token *cur = &head; for (; !tok->at_bol; tok = tok->next) cur = cur->next = copy_token(tok); cur->next = new_eof(tok); *rest = tok; return head.next; } static Token *new_num_token(int val, Token *tmpl) { char *buf = format("%d\n", val); return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf)); } static Token *read_const_expr(Token **rest, Token *tok) { tok = copy_line(rest, tok); Token head = {}; Token *cur = &head; while (tok->kind != TK_EOF) { // "defined(foo)" or "defined foo" becomes "1" if macro "foo" // is defined. Otherwise "0". if (equal(tok, "defined")) { Token *start = tok; bool has_paren = consume(&tok, tok->next, "("); if (tok->kind != TK_IDENT) error_tok(start, "macro name must be an identifier"); Macro *m = find_macro(tok); tok = tok->next; if (has_paren) tok = skip(tok, ")"); cur = cur->next = new_num_token(m ? 1 : 0, start); continue; } cur = cur->next = tok; tok = tok->next; } cur->next = tok; return head.next; } // Read and evaluate a constant expression. static long eval_const_expr(Token **rest, Token *tok) { Token *start = tok; Token *expr = read_const_expr(rest, tok->next); expr = preprocess2(expr); if (expr->kind == TK_EOF) error_tok(start, "no expression"); // [https://www.sigbus.info/n1570#6.10.1p4] The standard requires // we replace remaining non-macro identifiers with "0" before // evaluating a constant expression. For example, `#if foo` is // equivalent to `#if 0` if foo is not defined. for (Token *t = expr; t->kind != TK_EOF; t = t->next) { if (t->kind == TK_IDENT) { Token *next = t->next; *t = *new_num_token(0, t); t->next = next; } } // Convert pp-numbers to regular numbers convert_pp_tokens(expr); Token *rest2; long val = const_expr(&rest2, expr); if (rest2->kind != TK_EOF) error_tok(rest2, "extra token"); return val; } static CondIncl *push_cond_incl(Token *tok, bool included) { CondIncl *ci = calloc(1, sizeof(CondIncl)); ci->next = cond_incl; ci->ctx = IN_THEN; ci->tok = tok; ci->included = included; cond_incl = ci; return ci; } static Macro *find_macro(Token *tok) { if (tok->kind != TK_IDENT) return NULL; return hashmap_get2(¯os, tok->loc, tok->len); } static Macro *add_macro(char *name, bool is_objlike, Token *body) { Macro *m = calloc(1, sizeof(Macro)); m->name = name; m->is_objlike = is_objlike; m->body = body; hashmap_put(¯os, name, m); return m; } static MacroParam *read_macro_params(Token **rest, Token *tok, char **va_args_name) { MacroParam head = {}; MacroParam *cur = &head; while (!equal(tok, ")")) { if (cur != &head) tok = skip(tok, ","); if (equal(tok, "...")) { *va_args_name = "__VA_ARGS__"; *rest = skip(tok->next, ")"); return head.next; } if (tok->kind != TK_IDENT) error_tok(tok, "expected an identifier"); if (equal(tok->next, "...")) { *va_args_name = strndup(tok->loc, tok->len); *rest = skip(tok->next->next, ")"); return head.next; } MacroParam *m = calloc(1, sizeof(MacroParam)); m->name = strndup(tok->loc, tok->len); cur = cur->next = m; tok = tok->next; } *rest = tok->next; return head.next; } static void read_macro_definition(Token **rest, Token *tok) { if (tok->kind != TK_IDENT) error_tok(tok, "macro name must be an identifier"); char *name = strndup(tok->loc, tok->len); tok = tok->next; if (!tok->has_space && equal(tok, "(")) { // Function-like macro char *va_args_name = NULL; MacroParam *params = read_macro_params(&tok, tok->next, &va_args_name); Macro *m = add_macro(name, false, copy_line(rest, tok)); m->params = params; m->va_args_name = va_args_name; } else { // Object-like macro add_macro(name, true, copy_line(rest, tok)); } } static MacroArg *read_macro_arg_one(Token **rest, Token *tok, bool read_rest) { Token head = {}; Token *cur = &head; int level = 0; for (;;) { if (level == 0 && equal(tok, ")")) break; if (level == 0 && !read_rest && equal(tok, ",")) break; if (tok->kind == TK_EOF) error_tok(tok, "premature end of input"); if (equal(tok, "(")) level++; else if (equal(tok, ")")) level--; cur = cur->next = copy_token(tok); tok = tok->next; } cur->next = new_eof(tok); MacroArg *arg = calloc(1, sizeof(MacroArg)); arg->tok = head.next; *rest = tok; return arg; } static MacroArg * read_macro_args(Token **rest, Token *tok, MacroParam *params, char *va_args_name) { Token *start = tok; tok = tok->next->next; MacroArg head = {}; MacroArg *cur = &head; MacroParam *pp = params; for (; pp; pp = pp->next) { if (cur != &head) tok = skip(tok, ","); cur = cur->next = read_macro_arg_one(&tok, tok, false); cur->name = pp->name; } if (va_args_name) { MacroArg *arg; if (equal(tok, ")")) { arg = calloc(1, sizeof(MacroArg)); arg->tok = new_eof(tok); } else { if (pp != params) tok = skip(tok, ","); arg = read_macro_arg_one(&tok, tok, true); } arg->name = va_args_name;; arg->is_va_args = true; cur = cur->next = arg; } else if (pp) { error_tok(start, "too many arguments"); } skip(tok, ")"); *rest = tok; return head.next; } static MacroArg *find_arg(MacroArg *args, Token *tok) { for (MacroArg *ap = args; ap; ap = ap->next) if (tok->len == strlen(ap->name) && !strncmp(tok->loc, ap->name, tok->len)) return ap; return NULL; } // Concatenates all tokens in `tok` and returns a new string. static char *join_tokens(Token *tok, Token *end) { // Compute the length of the resulting token. int len = 1; for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) { if (t != tok && t->has_space) len++; len += t->len; } char *buf = calloc(1, len); // Copy token texts. int pos = 0; for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) { if (t != tok && t->has_space) buf[pos++] = ' '; strncpy(buf + pos, t->loc, t->len); pos += t->len; } buf[pos] = '\0'; return buf; } // Concatenates all tokens in `arg` and returns a new string token. // This function is used for the stringizing operator (#). static Token *stringize(Token *hash, Token *arg) { // Create a new string token. We need to set some value to its // source location for error reporting function, so we use a macro // name token as a template. char *s = join_tokens(arg, NULL); return new_str_token(s, hash); } // Concatenate two tokens to create a new token. static Token *paste(Token *lhs, Token *rhs) { // Paste the two tokens. char *buf = format("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc); // Tokenize the resulting string. Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf)); if (tok->next->kind != TK_EOF) error_tok(lhs, "pasting forms '%s', an invalid token", buf); return tok; } static bool has_varargs(MacroArg *args) { for (MacroArg *ap = args; ap; ap = ap->next) if (!strcmp(ap->name, "__VA_ARGS__")) return ap->tok->kind != TK_EOF; return false; } // Replace func-like macro parameters with given arguments. static Token *subst(Token *tok, MacroArg *args) { Token head = {}; Token *cur = &head; while (tok->kind != TK_EOF) { // "#" followed by a parameter is replaced with stringized actuals. if (equal(tok, "#")) { MacroArg *arg = find_arg(args, tok->next); if (!arg) error_tok(tok->next, "'#' is not followed by a macro parameter"); cur = cur->next = stringize(tok, arg->tok); tok = tok->next->next; continue; } // [GNU] If __VA_ARG__ is empty, `,##__VA_ARGS__` is expanded // to the empty token list. Otherwise, its expaned to `,` and // __VA_ARGS__. if (equal(tok, ",") && equal(tok->next, "##")) { MacroArg *arg = find_arg(args, tok->next->next); if (arg && arg->is_va_args) { if (arg->tok->kind == TK_EOF) { tok = tok->next->next->next; } else { cur = cur->next = copy_token(tok); tok = tok->next->next; } continue; } } if (equal(tok, "##")) { if (cur == &head) error_tok(tok, "'##' cannot appear at start of macro expansion"); if (tok->next->kind == TK_EOF) error_tok(tok, "'##' cannot appear at end of macro expansion"); MacroArg *arg = find_arg(args, tok->next); if (arg) { if (arg->tok->kind != TK_EOF) { *cur = *paste(cur, arg->tok); for (Token *t = arg->tok->next; t->kind != TK_EOF; t = t->next) cur = cur->next = copy_token(t); } tok = tok->next->next; continue; } *cur = *paste(cur, tok->next); tok = tok->next->next; continue; } MacroArg *arg = find_arg(args, tok); if (arg && equal(tok->next, "##")) { Token *rhs = tok->next->next; if (arg->tok->kind == TK_EOF) { MacroArg *arg2 = find_arg(args, rhs); if (arg2) { for (Token *t = arg2->tok; t->kind != TK_EOF; t = t->next) cur = cur->next = copy_token(t); } else { cur = cur->next = copy_token(rhs); } tok = rhs->next; continue; } for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next) cur = cur->next = copy_token(t); tok = tok->next; continue; } // If __VA_ARG__ is empty, __VA_OPT__(x) is expanded to the // empty token list. Otherwise, __VA_OPT__(x) is expanded to x. if (equal(tok, "__VA_OPT__") && equal(tok->next, "(")) { MacroArg *arg = read_macro_arg_one(&tok, tok->next->next, true); if (has_varargs(args)) for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next) cur = cur->next = t; tok = skip(tok, ")"); continue; } // Handle a macro token. Macro arguments are completely macro-expanded // before they are substituted into a macro body. if (arg) { Token *t = preprocess2(arg->tok); t->at_bol = tok->at_bol; t->has_space = tok->has_space; for (; t->kind != TK_EOF; t = t->next) cur = cur->next = copy_token(t); tok = tok->next; continue; } // Handle a non-macro token. cur = cur->next = copy_token(tok); tok = tok->next; continue; } cur->next = tok; return head.next; } // If tok is a macro, expand it and return true. // Otherwise, do nothing and return false. static bool expand_macro(Token **rest, Token *tok) { if (hideset_contains(tok->hideset, tok->loc, tok->len)) return false; Macro *m = find_macro(tok); if (!m) return false; // Built-in dynamic macro application such as __LINE__ if (m->handler) { *rest = m->handler(tok); (*rest)->next = tok->next; return true; } // Object-like macro application if (m->is_objlike) { Hideset *hs = hideset_union(tok->hideset, new_hideset(m->name)); Token *body = add_hideset(m->body, hs); for (Token *t = body; t->kind != TK_EOF; t = t->next) t->origin = tok; *rest = append(body, tok->next); (*rest)->at_bol = tok->at_bol; (*rest)->has_space = tok->has_space; return true; } // If a funclike macro token is not followed by an argument list, // treat it as a normal identifier. if (!equal(tok->next, "(")) return false; // Function-like macro application Token *macro_token = tok; MacroArg *args = read_macro_args(&tok, tok, m->params, m->va_args_name); Token *rparen = tok; // Tokens that consist a func-like macro invocation may have different // hidesets, and if that's the case, it's not clear what the hideset // for the new tokens should be. We take the interesection of the // macro token and the closing parenthesis and use it as a new hideset // as explained in the Dave Prossor's algorithm. Hideset *hs = hideset_intersection(macro_token->hideset, rparen->hideset); hs = hideset_union(hs, new_hideset(m->name)); Token *body = subst(m->body, args); body = add_hideset(body, hs); for (Token *t = body; t->kind != TK_EOF; t = t->next) t->origin = macro_token; *rest = append(body, tok->next); (*rest)->at_bol = macro_token->at_bol; (*rest)->has_space = macro_token->has_space; return true; } char *search_include_paths(char *filename) { if (filename[0] == '/') return filename; static HashMap cache; char *cached = hashmap_get(&cache, filename); if (cached) return cached; // Search a file from the include paths. for (int i = 0; i < include_paths.len; i++) { char *path = format("%s/%s", include_paths.data[i], filename); if (!file_exists(path)) continue; hashmap_put(&cache, filename, path); include_next_idx = i + 1; return path; } return NULL; } static char *search_include_next(char *filename) { for (; include_next_idx < include_paths.len; include_next_idx++) { char *path = format("%s/%s", include_paths.data[include_next_idx], filename); if (file_exists(path)) return path; } return NULL; } // Read an #include argument. static char *read_include_filename(Token **rest, Token *tok, bool *is_dquote) { // Pattern 1: #include "foo.h" if (tok->kind == TK_STR) { // A double-quoted filename for #include is a special kind of // token, and we don't want to interpret any escape sequences in it. // For example, "\f" in "C:\foo" is not a formfeed character but // just two non-control characters, backslash and f. // So we don't want to use token->str. *is_dquote = true; *rest = skip_line(tok->next); return strndup(tok->loc + 1, tok->len - 2); } // Pattern 2: #include <foo.h> if (equal(tok, "<")) { // Reconstruct a filename from a sequence of tokens between // "<" and ">". Token *start = tok; // Find closing ">". for (; !equal(tok, ">"); tok = tok->next) if (tok->at_bol || tok->kind == TK_EOF) error_tok(tok, "expected '>'"); *is_dquote = false; *rest = skip_line(tok->next); return join_tokens(start->next, tok); } // Pattern 3: #include FOO // In this case FOO must be macro-expanded to either // a single string token or a sequence of "<" ... ">". if (tok->kind == TK_IDENT) { Token *tok2 = preprocess2(copy_line(rest, tok)); return read_include_filename(&tok2, tok2, is_dquote); } error_tok(tok, "expected a filename"); } // Detect the following "include guard" pattern. // // #ifndef FOO_H // #define FOO_H // ... // #endif static char *detect_include_guard(Token *tok) { // Detect the first two lines. if (!is_hash(tok) || !equal(tok->next, "ifndef")) return NULL; tok = tok->next->next; if (tok->kind != TK_IDENT) return NULL; char *macro = strndup(tok->loc, tok->len); tok = tok->next; if (!is_hash(tok) || !equal(tok->next, "define") || !equal(tok->next->next, macro)) return NULL; // Read until the end of the file. while (tok->kind != TK_EOF) { if (!is_hash(tok)) { tok = tok->next; continue; } if (equal(tok->next, "endif") && tok->next->next->kind == TK_EOF) return macro; if (equal(tok, "if") || equal(tok, "ifdef") || equal(tok, "ifndef")) tok = skip_cond_incl(tok->next); else tok = tok->next; } return NULL; } static Token *include_file(Token *tok, char *path, Token *filename_tok) { // Check for "#pragma once" if (hashmap_get(&pragma_once, path)) return tok; // If we read the same file before, and if the file was guarded // by the usual #ifndef ... #endif pattern, we may be able to // skip the file without opening it. static HashMap include_guards; char *guard_name = hashmap_get(&include_guards, path); if (guard_name && hashmap_get(¯os, guard_name)) return tok; Token *tok2 = tokenize_file(path); if (!tok2) error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno)); guard_name = detect_include_guard(tok2); if (guard_name) hashmap_put(&include_guards, path, guard_name); return append(tok2, tok); } // Read #line arguments static void read_line_marker(Token **rest, Token *tok) { Token *start = tok; tok = preprocess(copy_line(rest, tok)); if (tok->kind != TK_NUM || tok->ty->kind != TY_INT) error_tok(tok, "invalid line marker"); start->file->line_delta = tok->val - start->line_no; tok = tok->next; if (tok->kind == TK_EOF) return; if (tok->kind != TK_STR) error_tok(tok, "filename expected"); start->file->display_name = tok->str; } // Visit all tokens in `tok` while evaluating preprocessing // macros and directives. static Token *preprocess2(Token *tok) { Token head = {}; Token *cur = &head; while (tok->kind != TK_EOF) { // If it is a macro, expand it. if (expand_macro(&tok, tok)) continue; // Pass through if it is not a "#". if (!is_hash(tok)) { tok->line_delta = tok->file->line_delta; tok->filename = tok->file->display_name; cur = cur->next = tok; tok = tok->next; continue; } Token *start = tok; tok = tok->next; if (equal(tok, "include")) { bool is_dquote; char *filename = read_include_filename(&tok, tok->next, &is_dquote); if (filename[0] != '/' && is_dquote) { char *path = format("%s/%s", dirname(strdup(start->file->name)), filename); if (file_exists(path)) { tok = include_file(tok, path, start->next->next); continue; } } char *path = search_include_paths(filename); tok = include_file(tok, path ? path : filename, start->next->next); continue; } if (equal(tok, "include_next")) { bool ignore; char *filename = read_include_filename(&tok, tok->next, &ignore); char *path = search_include_next(filename); tok = include_file(tok, path ? path : filename, start->next->next); continue; } if (equal(tok, "define")) { read_macro_definition(&tok, tok->next); continue; } if (equal(tok, "undef")) { tok = tok->next; if (tok->kind != TK_IDENT) error_tok(tok, "macro name must be an identifier"); undef_macro(strndup(tok->loc, tok->len)); tok = skip_line(tok->next); continue; } if (equal(tok, "if")) { long val = eval_const_expr(&tok, tok); push_cond_incl(start, val); if (!val) tok = skip_cond_incl(tok); continue; } if (equal(tok, "ifdef")) { bool defined = find_macro(tok->next); push_cond_incl(tok, defined); tok = skip_line(tok->next->next); if (!defined) tok = skip_cond_incl(tok); continue; } if (equal(tok, "ifndef")) { bool defined = find_macro(tok->next); push_cond_incl(tok, !defined); tok = skip_line(tok->next->next); if (defined) tok = skip_cond_incl(tok); continue; } if (equal(tok, "elif")) { if (!cond_incl || cond_incl->ctx == IN_ELSE) error_tok(start, "stray #elif"); cond_incl->ctx = IN_ELIF; if (!cond_incl->included && eval_const_expr(&tok, tok)) cond_incl->included = true; else tok = skip_cond_incl(tok); continue; } if (equal(tok, "else")) { if (!cond_incl || cond_incl->ctx == IN_ELSE) error_tok(start, "stray #else"); cond_incl->ctx = IN_ELSE; tok = skip_line(tok->next); if (cond_incl->included) tok = skip_cond_incl(tok); continue; } if (equal(tok, "endif")) { if (!cond_incl) error_tok(start, "stray #endif"); cond_incl = cond_incl->next; tok = skip_line(tok->next); continue; } if (equal(tok, "line")) { read_line_marker(&tok, tok->next); continue; } if (tok->kind == TK_PP_NUM) { read_line_marker(&tok, tok); continue; } if (equal(tok, "pragma") && equal(tok->next, "once")) { hashmap_put(&pragma_once, tok->file->name, (void *)1); tok = skip_line(tok->next->next); continue; } if (equal(tok, "pragma")) { do { tok = tok->next; } while (!tok->at_bol); continue; } if (equal(tok, "error")) error_tok(tok, "error"); // `#`-only line is legal. It's called a null directive. if (tok->at_bol) continue; error_tok(tok, "invalid preprocessor directive"); } cur->next = tok; return head.next; } void define_macro(char *name, char *buf) { Token *tok = tokenize(new_file("<built-in>", 1, buf)); add_macro(name, true, tok); } void undef_macro(char *name) { hashmap_delete(¯os, name); } static Macro *add_builtin(char *name, macro_handler_fn *fn) { Macro *m = add_macro(name, true, NULL); m->handler = fn; return m; } static Token *file_macro(Token *tmpl) { while (tmpl->origin) tmpl = tmpl->origin; return new_str_token(tmpl->file->display_name, tmpl); } static Token *line_macro(Token *tmpl) { while (tmpl->origin) tmpl = tmpl->origin; int i = tmpl->line_no + tmpl->file->line_delta; return new_num_token(i, tmpl); } // __COUNTER__ is expanded to serial values starting from 0. static Token *counter_macro(Token *tmpl) { static int i = 0; return new_num_token(i++, tmpl); } // __TIMESTAMP__ is expanded to a string describing the last // modification time of the current file. E.g. // "Fri Jul 24 01:32:50 2020" static Token *timestamp_macro(Token *tmpl) { struct stat st; if (stat(tmpl->file->name, &st) != 0) return new_str_token("??? ??? ?? ??:??:?? ????", tmpl); char buf[30]; ctime_r(&st.st_mtime, buf); buf[24] = '\0'; return new_str_token(buf, tmpl); } static Token *base_file_macro(Token *tmpl) { return new_str_token(base_file, tmpl); } // __DATE__ is expanded to the current date, e.g. "May 17 2020". static char *format_date(struct tm *tm) { static char mon[][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", }; return format("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900); } // __TIME__ is expanded to the current time, e.g. "13:34:03". static char *format_time(struct tm *tm) { return format("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec); } void init_macros(void) { // Define predefined macros define_macro("_LP64", "1"); define_macro("__C99_MACRO_WITH_VA_ARGS", "1"); define_macro("__ELF__", "1"); define_macro("__LP64__", "1"); define_macro("__SIZEOF_DOUBLE__", "8"); define_macro("__SIZEOF_FLOAT__", "4"); define_macro("__SIZEOF_INT__", "4"); define_macro("__SIZEOF_LONG_DOUBLE__", "8"); define_macro("__SIZEOF_LONG_LONG__", "8"); define_macro("__SIZEOF_LONG__", "8"); define_macro("__SIZEOF_POINTER__", "8"); define_macro("__SIZEOF_PTRDIFF_T__", "8"); define_macro("__SIZEOF_SHORT__", "2"); define_macro("__SIZEOF_SIZE_T__", "8"); define_macro("__SIZE_TYPE__", "unsigned long"); define_macro("__STDC_HOSTED__", "1"); define_macro("__STDC_NO_COMPLEX__", "1"); define_macro("__STDC_UTF_16__", "1"); define_macro("__STDC_UTF_32__", "1"); define_macro("__STDC_VERSION__", "201112L"); define_macro("__STDC__", "1"); define_macro("__USER_LABEL_PREFIX__", ""); define_macro("__alignof__", "_Alignof"); define_macro("__amd64", "1"); define_macro("__amd64__", "1"); define_macro("__chibicc__", "1"); define_macro("__const__", "const"); define_macro("__gnu_linux__", "1"); define_macro("__inline__", "inline"); define_macro("__linux", "1"); define_macro("__linux__", "1"); define_macro("__signed__", "signed"); define_macro("__typeof__", "typeof"); define_macro("__unix", "1"); define_macro("__unix__", "1"); define_macro("__volatile__", "volatile"); define_macro("__x86_64", "1"); define_macro("__x86_64__", "1"); define_macro("linux", "1"); define_macro("unix", "1"); add_builtin("__FILE__", file_macro); add_builtin("__LINE__", line_macro); add_builtin("__COUNTER__", counter_macro); add_builtin("__TIMESTAMP__", timestamp_macro); add_builtin("__BASE_FILE__", base_file_macro); time_t now = time(NULL); struct tm *tm = localtime(&now); define_macro("__DATE__", format_date(tm)); define_macro("__TIME__", format_time(tm)); } typedef enum { STR_NONE, STR_UTF8, STR_UTF16, STR_UTF32, STR_WIDE, } StringKind; static StringKind getStringKind(Token *tok) { if (!strcmp(tok->loc, "u8")) return STR_UTF8; switch (tok->loc[0]) { case '"': return STR_NONE; case 'u': return STR_UTF16; case 'U': return STR_UTF32; case 'L': return STR_WIDE; } unreachable(); } // Concatenate adjacent string literals into a single string literal // as per the C spec. static void join_adjacent_string_literals(Token *tok) { // First pass: If regular string literals are adjacent to wide // string literals, regular string literals are converted to a wide // type before concatenation. In this pass, we do the conversion. for (Token *tok1 = tok; tok1->kind != TK_EOF;) { if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) { tok1 = tok1->next; continue; } StringKind kind = getStringKind(tok1); Type *basety = tok1->ty->base; for (Token *t = tok1->next; t->kind == TK_STR; t = t->next) { StringKind k = getStringKind(t); if (kind == STR_NONE) { kind = k; basety = t->ty->base; } else if (k != STR_NONE && kind != k) { error_tok(t, "unsupported non-standard concatenation of string literals"); } } if (basety->size > 1) for (Token *t = tok1; t->kind == TK_STR; t = t->next) if (t->ty->base->size == 1) *t = *tokenize_string_literal(t, basety); while (tok1->kind == TK_STR) tok1 = tok1->next; } // Second pass: concatenate adjacent string literals. for (Token *tok1 = tok; tok1->kind != TK_EOF;) { if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) { tok1 = tok1->next; continue; } Token *tok2 = tok1->next; while (tok2->kind == TK_STR) tok2 = tok2->next; int len = tok1->ty->array_len; for (Token *t = tok1->next; t != tok2; t = t->next) len = len + t->ty->array_len - 1; char *buf = calloc(tok1->ty->base->size, len); int i = 0; for (Token *t = tok1; t != tok2; t = t->next) { memcpy(buf + i, t->str, t->ty->size); i = i + t->ty->size - t->ty->base->size; } *tok1 = *copy_token(tok1); tok1->ty = array_of(tok1->ty->base, len); tok1->str = buf; tok1->next = tok2; tok1 = tok2; } } // Entry point function of the preprocessor. Token *preprocess(Token *tok) { tok = preprocess2(tok); if (cond_incl) error_tok(cond_incl->tok, "unterminated conditional directive"); convert_pp_tokens(tok); join_adjacent_string_literals(tok); for (Token *t = tok; t; t = t->next) t->line_no += t->line_delta; return tok; } // BEG string void strarray_push(StringArray *arr, char *s) { if (!arr->data) { arr->data = calloc(8, sizeof(char *)); arr->capacity = 8; } if (arr->capacity == arr->len) { arr->data = realloc(arr->data, sizeof(char *) * arr->capacity * 2); arr->capacity *= 2; for (int i = arr->len; i < arr->capacity; i++) arr->data[i] = NULL; } arr->data[arr->len++] = s; } // Takes a printf-style format string and returns a formatted string. char *format(char *fmt, ...) { char *buf; size_t buflen; FILE *out = open_memstream(&buf, &buflen); va_list ap; va_start(ap, fmt); vfprintf(out, fmt, ap); va_end(ap); fclose(out); return buf; } // BEG tokenize // Input file static File *current_file; // A list of all input files. static File **input_files; // True if the current position is at the beginning of a line static bool at_bol; // True if the current position follows a space character static bool has_space; // Reports an error and exit. void error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); exit(1); } // Reports an error message in the following format. // // foo.c:10: x = y + 1; // ^ <error message here> static void verror_at(char *filename, char *input, int line_no, char *loc, char *fmt, va_list ap) { // Find a line containing `loc`. char *line = loc; while (input < line && line[-1] != '\n') line--; char *end = loc; while (*end && *end != '\n') end++; // Print out the line. int indent = fprintf(stderr, "%s:%d: ", filename, line_no); fprintf(stderr, "%.*s\n", (int)(end - line), line); // Show the error message. int pos = display_width(line, loc - line) + indent; fprintf(stderr, "%*s", pos, ""); // print pos spaces. fprintf(stderr, "^ "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); } void error_at(char *loc, char *fmt, ...) { int line_no = 1; for (char *p = current_file->contents; p < loc; p++) if (*p == '\n') line_no++; va_list ap; va_start(ap, fmt); verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap); exit(1); } void error_tok(Token *tok, char *fmt, ...) { va_list ap; va_start(ap, fmt); verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt, ap); exit(1); } void warn_tok(Token *tok, char *fmt, ...) { va_list ap; va_start(ap, fmt); verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt, ap); va_end(ap); } // Consumes the current token if it matches `op`. bool equal(Token *tok, char *op) { return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0'; } // Ensure that the current token is `op`. Token *skip(Token *tok, char *op) { if (!equal(tok, op)) error_tok(tok, "expected '%s'", op); return tok->next; } bool consume(Token **rest, Token *tok, char *str) { if (equal(tok, str)) { *rest = tok->next; return true; } *rest = tok; return false; } // Create a new token. static Token *new_token(TokenKind kind, char *start, char *end) { Token *tok = calloc(1, sizeof(Token)); tok->kind = kind; tok->loc = start; tok->len = end - start; tok->file = current_file; tok->filename = current_file->display_name; tok->at_bol = at_bol; tok->has_space = has_space; at_bol = has_space = false; return tok; } static bool startswith(char *p, char *q) { return strncmp(p, q, strlen(q)) == 0; } // Read an identifier and returns the length of it. // If p does not point to a valid identifier, 0 is returned. static int read_ident(char *start) { char *p = start; uint32_t c = decode_utf8(&p, p); if (!is_ident1(c)) return 0; for (;;) { char *q; c = decode_utf8(&q, p); if (!is_ident2(c)) return p - start; p = q; } } static int from_hex(char c) { if ('0' <= c && c <= '9') return c - '0'; if ('a' <= c && c <= 'f') return c - 'a' + 10; return c - 'A' + 10; } // Read a punctuator token from p and returns its length. static int read_punct(char *p) { static char *kw[] = { "<<=", ">>=", "...", "==", "!=", "<=", ">=", "->", "+=", "-=", "*=", "/=", "++", "--", "%=", "&=", "|=", "^=", "&&", "||", "<<", ">>", "##", }; for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) if (startswith(p, kw[i])) return strlen(kw[i]); return ispunct(*p) ? 1 : 0; } static bool is_keyword(Token *tok) { static HashMap map; if (map.capacity == 0) { static char *kw[] = { "return", "if", "else", "for", "while", "int", "sizeof", "char", "struct", "union", "short", "long", "void", "typedef", "_Bool", "enum", "static", "goto", "break", "continue", "switch", "case", "default", "extern", "_Alignof", "_Alignas", "do", "signed", "unsigned", "const", "volatile", "auto", "register", "restrict", "__restrict", "__restrict__", "_Noreturn", "float", "double", "typeof", "asm", "_Thread_local", "__thread", "_Atomic", "__attribute__", }; for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) hashmap_put(&map, kw[i], (void *)1); } return hashmap_get2(&map, tok->loc, tok->len); } static int read_escaped_char(char **new_pos, char *p) { if ('0' <= *p && *p <= '7') { // Read an octal number. int c = *p++ - '0'; if ('0' <= *p && *p <= '7') { c = (c << 3) + (*p++ - '0'); if ('0' <= *p && *p <= '7') c = (c << 3) + (*p++ - '0'); } *new_pos = p; return c; } if (*p == 'x') { // Read a hexadecimal number. p++; if (!isxdigit(*p)) error_at(p, "invalid hex escape sequence"); int c = 0; for (; isxdigit(*p); p++) c = (c << 4) + from_hex(*p); *new_pos = p; return c; } *new_pos = p + 1; // Escape sequences are defined using themselves here. E.g. // '\n' is implemented using '\n'. This tautological definition // works because the compiler that compiles our compiler knows // what '\n' actually is. In other words, we "inherit" the ASCII // code of '\n' from the compiler that compiles our compiler, // so we don't have to teach the actual code here. // // This fact has huge implications not only for the correctness // of the compiler but also for the security of the generated code. // For more info, read "Reflections on Trusting Trust" by Ken Thompson. // https://github.com/rui314/chibicc/wiki/thompson1984.pdf switch (*p) { case 'a': return '\a'; case 'b': return '\b'; case 't': return '\t'; case 'n': return '\n'; case 'v': return '\v'; case 'f': return '\f'; case 'r': return '\r'; // [GNU] \e for the ASCII escape character is a GNU C extension. case 'e': return 27; default: return *p; } } // Find a closing double-quote. static char *string_literal_end(char *p) { char *start = p; for (; *p != '"'; p++) { if (*p == '\n' || *p == '\0') error_at(start, "unclosed string literal"); if (*p == '\\') p++; } return p; } static Token *read_string_literal(char *start, char *quote) { char *end = string_literal_end(quote + 1); char *buf = calloc(1, end - quote); int len = 0; for (char *p = quote + 1; p < end;) { if (*p == '\\') buf[len++] = read_escaped_char(&p, p + 1); else buf[len++] = *p++; } Token *tok = new_token(TK_STR, start, end + 1); tok->ty = array_of(ty_char, len + 1); tok->str = buf; return tok; } // Read a UTF-8-encoded string literal and transcode it in UTF-16. // // UTF-16 is yet another variable-width encoding for Unicode. Code // points smaller than U+10000 are encoded in 2 bytes. Code points // equal to or larger than that are encoded in 4 bytes. Each 2 bytes // in the 4 byte sequence is called "surrogate", and a 4 byte sequence // is called a "surrogate pair". static Token *read_utf16_string_literal(char *start, char *quote) { char *end = string_literal_end(quote + 1); uint16_t *buf = calloc(2, end - start); int len = 0; for (char *p = quote + 1; p < end;) { if (*p == '\\') { buf[len++] = read_escaped_char(&p, p + 1); continue; } uint32_t c = decode_utf8(&p, p); if (c < 0x10000) { // Encode a code point in 2 bytes. buf[len++] = c; } else { // Encode a code point in 4 bytes. c -= 0x10000; buf[len++] = 0xd800 + ((c >> 10) & 0x3ff); buf[len++] = 0xdc00 + (c & 0x3ff); } } Token *tok = new_token(TK_STR, start, end + 1); tok->ty = array_of(ty_ushort, len + 1); tok->str = (char *)buf; return tok; } // Read a UTF-8-encoded string literal and transcode it in UTF-32. // // UTF-32 is a fixed-width encoding for Unicode. Each code point is // encoded in 4 bytes. static Token *read_utf32_string_literal(char *start, char *quote, Type *ty) { char *end = string_literal_end(quote + 1); uint32_t *buf = calloc(4, end - quote); int len = 0; for (char *p = quote + 1; p < end;) { if (*p == '\\') buf[len++] = read_escaped_char(&p, p + 1); else buf[len++] = decode_utf8(&p, p); } Token *tok = new_token(TK_STR, start, end + 1); tok->ty = array_of(ty, len + 1); tok->str = (char *)buf; return tok; } static Token *read_char_literal(char *start, char *quote, Type *ty) { char *p = quote + 1; if (*p == '\0') error_at(start, "unclosed char literal"); int c; if (*p == '\\') c = read_escaped_char(&p, p + 1); else c = decode_utf8(&p, p); char *end = strchr(p, '\''); if (!end) error_at(p, "unclosed char literal"); Token *tok = new_token(TK_NUM, start, end + 1); tok->val = c; tok->ty = ty; return tok; } static bool convert_pp_int(Token *tok) { char *p = tok->loc; // Read a binary, octal, decimal or hexadecimal number. int base = 10; if (!strncasecmp(p, "0x", 2) && isxdigit(p[2])) { p += 2; base = 16; } else if (!strncasecmp(p, "0b", 2) && (p[2] == '0' || p[2] == '1')) { p += 2; base = 2; } else if (*p == '0') { base = 8; } int64_t val = strtoul(p, &p, base); // Read U, L or LL suffixes. bool l = false; bool u = false; if (startswith(p, "LLU") || startswith(p, "LLu") || startswith(p, "llU") || startswith(p, "llu") || startswith(p, "ULL") || startswith(p, "Ull") || startswith(p, "uLL") || startswith(p, "ull")) { p += 3; l = u = true; } else if (!strncasecmp(p, "lu", 2) || !strncasecmp(p, "ul", 2)) { p += 2; l = u = true; } else if (startswith(p, "LL") || startswith(p, "ll")) { p += 2; l = true; } else if (*p == 'L' || *p == 'l') { p++; l = true; } else if (*p == 'U' || *p == 'u') { p++; u = true; } if (p != tok->loc + tok->len) return false; // Infer a type. Type *ty; if (base == 10) { if (l && u) ty = ty_ulong; else if (l) ty = ty_long; else if (u) ty = (val >> 32) ? ty_ulong : ty_uint; else ty = (val >> 31) ? ty_long : ty_int; } else { if (l && u) ty = ty_ulong; else if (l) ty = (val >> 63) ? ty_ulong : ty_long; else if (u) ty = (val >> 32) ? ty_ulong : ty_uint; else if (val >> 63) ty = ty_ulong; else if (val >> 32) ty = ty_long; else if (val >> 31) ty = ty_uint; else ty = ty_int; } tok->kind = TK_NUM; tok->val = val; tok->ty = ty; return true; } // The definition of the numeric literal at the preprocessing stage // is more relaxed than the definition of that at the later stages. // In order to handle that, a numeric literal is tokenized as a // "pp-number" token first and then converted to a regular number // token after preprocessing. // // This function converts a pp-number token to a regular number token. static void convert_pp_number(Token *tok) { // Try to parse as an integer constant. if (convert_pp_int(tok)) return; // If it's not an integer, it must be a floating point constant. char *end; long double val = strtold(tok->loc, &end); Type *ty; if (*end == 'f' || *end == 'F') { ty = ty_float; end++; } else if (*end == 'l' || *end == 'L') { ty = ty_ldouble; end++; } else { ty = ty_double; } if (tok->loc + tok->len != end) error_tok(tok, "invalid numeric constant"); tok->kind = TK_NUM; tok->fval = val; tok->ty = ty; } void convert_pp_tokens(Token *tok) { for (Token *t = tok; t->kind != TK_EOF; t = t->next) { if (is_keyword(t)) t->kind = TK_KEYWORD; else if (t->kind == TK_PP_NUM) convert_pp_number(t); } } // Initialize line info for all tokens. static void add_line_numbers(Token *tok) { char *p = current_file->contents; int n = 1; do { if (p == tok->loc) { tok->line_no = n; tok = tok->next; } if (*p == '\n') n++; } while (*p++); } Token *tokenize_string_literal(Token *tok, Type *basety) { Token *t; if (basety->size == 2) t = read_utf16_string_literal(tok->loc, tok->loc); else t = read_utf32_string_literal(tok->loc, tok->loc, basety); t->next = tok->next; return t; } // Tokenize a given string and returns new tokens. Token *tokenize(File *file) { current_file = file; char *p = file->contents; Token head = {}; Token *cur = &head; at_bol = true; has_space = false; while (*p) { // Skip line comments. if (startswith(p, "//")) { p += 2; while (*p != '\n') p++; has_space = true; continue; } // Skip block comments. if (startswith(p, "/*")) { char *q = strstr(p + 2, "*/"); if (!q) error_at(p, "unclosed block comment"); p = q + 2; has_space = true; continue; } // Skip newline. if (*p == '\n') { p++; at_bol = true; has_space = false; continue; } // Skip whitespace characters. if (isspace(*p)) { p++; has_space = true; continue; } // Numeric literal if (isdigit(*p) || (*p == '.' && isdigit(p[1]))) { char *q = p++; for (;;) { if (p[0] && p[1] && strchr("eEpP", p[0]) && strchr("+-", p[1])) p += 2; else if (isalnum(*p) || *p == '.') p++; else break; } cur = cur->next = new_token(TK_PP_NUM, q, p); continue; } // String literal if (*p == '"') { cur = cur->next = read_string_literal(p, p); p += cur->len; continue; } // UTF-8 string literal if (startswith(p, "u8\"")) { cur = cur->next = read_string_literal(p, p + 2); p += cur->len; continue; } // UTF-16 string literal if (startswith(p, "u\"")) { cur = cur->next = read_utf16_string_literal(p, p + 1); p += cur->len; continue; } // Wide string literal if (startswith(p, "L\"")) { cur = cur->next = read_utf32_string_literal(p, p + 1, ty_int); p += cur->len; continue; } // UTF-32 string literal if (startswith(p, "U\"")) { cur = cur->next = read_utf32_string_literal(p, p + 1, ty_uint); p += cur->len; continue; } // Character literal if (*p == '\'') { cur = cur->next = read_char_literal(p, p, ty_int); cur->val = (char)cur->val; p += cur->len; continue; } // UTF-16 character literal if (startswith(p, "u'")) { cur = cur->next = read_char_literal(p, p + 1, ty_ushort); cur->val &= 0xffff; p += cur->len; continue; } // Wide character literal if (startswith(p, "L'")) { cur = cur->next = read_char_literal(p, p + 1, ty_int); p += cur->len; continue; } // UTF-32 character literal if (startswith(p, "U'")) { cur = cur->next = read_char_literal(p, p + 1, ty_uint); p += cur->len; continue; } // Identifier or keyword int ident_len = read_ident(p); if (ident_len) { cur = cur->next = new_token(TK_IDENT, p, p + ident_len); p += cur->len; continue; } // Punctuators int punct_len = read_punct(p); if (punct_len) { cur = cur->next = new_token(TK_PUNCT, p, p + punct_len); p += cur->len; continue; } error_at(p, "invalid token"); } cur = cur->next = new_token(TK_EOF, p, p); add_line_numbers(head.next); return head.next; } // Returns the contents of a given file. static char *read_file(char *path) { FILE *fp; if (strcmp(path, "-") == 0) { // By convention, read from stdin if a given filename is "-". fp = stdin; } else { fp = fopen(path, "r"); if (!fp) return NULL; } char *buf; size_t buflen; FILE *out = open_memstream(&buf, &buflen); // Read the entire file. for (;;) { char buf2[4096]; int n = fread(buf2, 1, sizeof(buf2), fp); if (n == 0) break; fwrite(buf2, 1, n, out); } if (fp != stdin) fclose(fp); // Make sure that the last line is properly terminated with '\n'. fflush(out); if (buflen == 0 || buf[buflen - 1] != '\n') fputc('\n', out); fputc('\0', out); fclose(out); return buf; } File **get_input_files(void) { return input_files; } File *new_file(char *name, int file_no, char *contents) { File *file = calloc(1, sizeof(File)); file->name = name; file->display_name = name; file->file_no = file_no; file->contents = contents; return file; } // Replaces \r or \r\n with \n. static void canonicalize_newline(char *p) { int i = 0, j = 0; while (p[i]) { if (p[i] == '\r' && p[i + 1] == '\n') { i += 2; p[j++] = '\n'; } else if (p[i] == '\r') { i++; p[j++] = '\n'; } else { p[j++] = p[i++]; } } p[j] = '\0'; } // Removes backslashes followed by a newline. static void remove_backslash_newline(char *p) { int i = 0, j = 0; // We want to keep the number of newline characters so that // the logical line number matches the physical one. // This counter maintain the number of newlines we have removed. int n = 0; while (p[i]) { if (p[i] == '\\' && p[i + 1] == '\n') { i += 2; n++; } else if (p[i] == '\n') { p[j++] = p[i++]; for (; n > 0; n--) p[j++] = '\n'; } else { p[j++] = p[i++]; } } for (; n > 0; n--) p[j++] = '\n'; p[j] = '\0'; } static uint32_t read_universal_char(char *p, int len) { uint32_t c = 0; for (int i = 0; i < len; i++) { if (!isxdigit(p[i])) return 0; c = (c << 4) | from_hex(p[i]); } return c; } // Replace \u or \U escape sequences with corresponding UTF-8 bytes. static void convert_universal_chars(char *p) { char *q = p; while (*p) { if (startswith(p, "\\u")) { uint32_t c = read_universal_char(p + 2, 4); if (c) { p += 6; q += encode_utf8(q, c); } else { *q++ = *p++; } } else if (startswith(p, "\\U")) { uint32_t c = read_universal_char(p + 2, 8); if (c) { p += 10; q += encode_utf8(q, c); } else { *q++ = *p++; } } else if (p[0] == '\\') { *q++ = *p++; *q++ = *p++; } else { *q++ = *p++; } } *q = '\0'; } Token *tokenize_file(char *path) { char *p = read_file(path); if (!p) return NULL; // UTF-8 texts may start with a 3-byte "BOM" marker sequence. // If exists, just skip them because they are useless bytes. // (It is actually not recommended to add BOM markers to UTF-8 // texts, but it's not uncommon particularly on Windows.) if (!memcmp(p, "\xef\xbb\xbf", 3)) p += 3; canonicalize_newline(p); remove_backslash_newline(p); convert_universal_chars(p); // Save the filename for assembler .file directive. static int file_no; File *file = new_file(path, file_no + 1, p); // Save the filename for assembler .file directive. input_files = realloc(input_files, sizeof(char *) * (file_no + 2)); input_files[file_no] = file; input_files[file_no + 1] = NULL; file_no++; return tokenize(file); } // BEG type Type *ty_void = &(Type){TY_VOID, 1, 1}; Type *ty_bool = &(Type){TY_BOOL, 1, 1}; Type *ty_char = &(Type){TY_CHAR, 1, 1}; Type *ty_short = &(Type){TY_SHORT, 2, 2}; Type *ty_int = &(Type){TY_INT, 4, 4}; Type *ty_long = &(Type){TY_LONG, 8, 8}; Type *ty_uchar = &(Type){TY_CHAR, 1, 1, true}; Type *ty_ushort = &(Type){TY_SHORT, 2, 2, true}; Type *ty_uint = &(Type){TY_INT, 4, 4, true}; Type *ty_ulong = &(Type){TY_LONG, 8, 8, true}; Type *ty_float = &(Type){TY_FLOAT, 4, 4}; Type *ty_double = &(Type){TY_DOUBLE, 8, 8}; Type *ty_ldouble = &(Type){TY_LDOUBLE, 16, 16}; static Type *new_type(TypeKind kind, int size, int align) { Type *ty = calloc(1, sizeof(Type)); ty->kind = kind; ty->size = size; ty->align = align; return ty; } bool is_integer(Type *ty) { TypeKind k = ty->kind; return k == TY_BOOL || k == TY_CHAR || k == TY_SHORT || k == TY_INT || k == TY_LONG || k == TY_ENUM; } bool is_flonum(Type *ty) { return ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE || ty->kind == TY_LDOUBLE; } bool is_numeric(Type *ty) { return is_integer(ty) || is_flonum(ty); } bool is_compatible(Type *t1, Type *t2) { if (t1 == t2) return true; if (t1->origin) return is_compatible(t1->origin, t2); if (t2->origin) return is_compatible(t1, t2->origin); if (t1->kind != t2->kind) return false; switch (t1->kind) { case TY_CHAR: case TY_SHORT: case TY_INT: case TY_LONG: return t1->is_unsigned == t2->is_unsigned; case TY_FLOAT: case TY_DOUBLE: case TY_LDOUBLE: return true; case TY_PTR: return is_compatible(t1->base, t2->base); case TY_FUNC: { if (!is_compatible(t1->return_ty, t2->return_ty)) return false; if (t1->is_variadic != t2->is_variadic) return false; Type *p1 = t1->params; Type *p2 = t2->params; for (; p1 && p2; p1 = p1->next, p2 = p2->next) if (!is_compatible(p1, p2)) return false; return p1 == NULL && p2 == NULL; } case TY_ARRAY: if (!is_compatible(t1->base, t2->base)) return false; return t1->array_len < 0 && t2->array_len < 0 && t1->array_len == t2->array_len; } return false; } Type *copy_type(Type *ty) { Type *ret = calloc(1, sizeof(Type)); *ret = *ty; ret->origin = ty; return ret; } Type *pointer_to(Type *base) { Type *ty = new_type(TY_PTR, 8, 8); ty->base = base; ty->is_unsigned = true; return ty; } Type *func_type(Type *return_ty) { // The C spec disallows sizeof(<function type>), but // GCC allows that and the expression is evaluated to 1. Type *ty = new_type(TY_FUNC, 1, 1); ty->return_ty = return_ty; return ty; } Type *array_of(Type *base, int len) { Type *ty = new_type(TY_ARRAY, base->size * len, base->align); ty->base = base; ty->array_len = len; return ty; } Type *vla_of(Type *base, Node *len) { Type *ty = new_type(TY_VLA, 8, 8); ty->base = base; ty->vla_len = len; return ty; } Type *enum_type(void) { return new_type(TY_ENUM, 4, 4); } Type *struct_type(void) { return new_type(TY_STRUCT, 0, 1); } static Type *get_common_type(Type *ty1, Type *ty2) { if (ty1->base) return pointer_to(ty1->base); if (ty1->kind == TY_FUNC) return pointer_to(ty1); if (ty2->kind == TY_FUNC) return pointer_to(ty2); if (ty1->kind == TY_LDOUBLE || ty2->kind == TY_LDOUBLE) return ty_ldouble; if (ty1->kind == TY_DOUBLE || ty2->kind == TY_DOUBLE) return ty_double; if (ty1->kind == TY_FLOAT || ty2->kind == TY_FLOAT) return ty_float; if (ty1->size < 4) ty1 = ty_int; if (ty2->size < 4) ty2 = ty_int; if (ty1->size != ty2->size) return (ty1->size < ty2->size) ? ty2 : ty1; if (ty2->is_unsigned) return ty2; return ty1; } // For many binary operators, we implicitly promote operands so that // both operands have the same type. Any integral type smaller than // int is always promoted to int. If the type of one operand is larger // than the other's (e.g. "long" vs. "int"), the smaller operand will // be promoted to match with the other. // // This operation is called the "usual arithmetic conversion". static void usual_arith_conv(Node **lhs, Node **rhs) { Type *ty = get_common_type((*lhs)->ty, (*rhs)->ty); *lhs = new_cast(*lhs, ty); *rhs = new_cast(*rhs, ty); } void add_type(Node *node) { if (!node || node->ty) return; add_type(node->lhs); add_type(node->rhs); add_type(node->cond); add_type(node->then); add_type(node->els); add_type(node->init); add_type(node->inc); for (Node *n = node->body; n; n = n->next) add_type(n); for (Node *n = node->args; n; n = n->next) add_type(n); switch (node->kind) { case ND_NUM: node->ty = ty_int; return; case ND_ADD: case ND_SUB: case ND_MUL: case ND_DIV: case ND_MOD: case ND_BITAND: case ND_BITOR: case ND_BITXOR: usual_arith_conv(&node->lhs, &node->rhs); node->ty = node->lhs->ty; return; case ND_NEG: { Type *ty = get_common_type(ty_int, node->lhs->ty); node->lhs = new_cast(node->lhs, ty); node->ty = ty; return; } case ND_ASSIGN: if (node->lhs->ty->kind == TY_ARRAY) error_tok(node->lhs->tok, "not an lvalue"); if (node->lhs->ty->kind != TY_STRUCT) node->rhs = new_cast(node->rhs, node->lhs->ty); node->ty = node->lhs->ty; return; case ND_EQ: case ND_NE: case ND_LT: case ND_LE: usual_arith_conv(&node->lhs, &node->rhs); node->ty = ty_int; return; case ND_FUNCALL: node->ty = node->func_ty->return_ty; return; case ND_NOT: case ND_LOGOR: case ND_LOGAND: node->ty = ty_int; return; case ND_BITNOT: case ND_SHL: case ND_SHR: node->ty = node->lhs->ty; return; case ND_VAR: case ND_VLA_PTR: node->ty = node->var->ty; return; case ND_COND: if (node->then->ty->kind == TY_VOID || node->els->ty->kind == TY_VOID) { node->ty = ty_void; } else { usual_arith_conv(&node->then, &node->els); node->ty = node->then->ty; } return; case ND_COMMA: node->ty = node->rhs->ty; return; case ND_MEMBER: node->ty = node->member->ty; return; case ND_ADDR: { Type *ty = node->lhs->ty; if (ty->kind == TY_ARRAY) node->ty = pointer_to(ty->base); else node->ty = pointer_to(ty); return; } case ND_DEREF: if (!node->lhs->ty->base) error_tok(node->tok, "invalid pointer dereference"); if (node->lhs->ty->base->kind == TY_VOID) error_tok(node->tok, "dereferencing a void pointer"); node->ty = node->lhs->ty->base; return; case ND_STMT_EXPR: if (node->body) { Node *stmt = node->body; while (stmt->next) stmt = stmt->next; if (stmt->kind == ND_EXPR_STMT) { node->ty = stmt->lhs->ty; return; } } error_tok(node->tok, "statement expression returning void is not supported"); return; case ND_LABEL_VAL: node->ty = pointer_to(ty_void); return; case ND_CAS: add_type(node->cas_addr); add_type(node->cas_old); add_type(node->cas_new); node->ty = ty_bool; if (node->cas_addr->ty->kind != TY_PTR) error_tok(node->cas_addr->tok, "pointer expected"); if (node->cas_old->ty->kind != TY_PTR) error_tok(node->cas_old->tok, "pointer expected"); return; case ND_EXCH: if (node->lhs->ty->kind != TY_PTR) error_tok(node->cas_addr->tok, "pointer expected"); node->ty = node->lhs->ty->base; return; } } // BEG unicode // Encode a given character in UTF-8. int encode_utf8(char *buf, uint32_t c) { if (c <= 0x7F) { buf[0] = c; return 1; } if (c <= 0x7FF) { buf[0] = 0b11000000 | (c >> 6); buf[1] = 0b10000000 | (c & 0b00111111); return 2; } if (c <= 0xFFFF) { buf[0] = 0b11100000 | (c >> 12); buf[1] = 0b10000000 | ((c >> 6) & 0b00111111); buf[2] = 0b10000000 | (c & 0b00111111); return 3; } buf[0] = 0b11110000 | (c >> 18); buf[1] = 0b10000000 | ((c >> 12) & 0b00111111); buf[2] = 0b10000000 | ((c >> 6) & 0b00111111); buf[3] = 0b10000000 | (c & 0b00111111); return 4; } // Read a UTF-8-encoded Unicode code point from a source file. // We assume that source files are always in UTF-8. // // UTF-8 is a variable-width encoding in which one code point is // encoded in one to four bytes. One byte UTF-8 code points are // identical to ASCII. Non-ASCII characters are encoded using more // than one byte. uint32_t decode_utf8(char **new_pos, char *p) { if ((unsigned char)*p < 128) { *new_pos = p + 1; return *p; } char *start = p; int len; uint32_t c; if ((unsigned char)*p >= 0b11110000) { len = 4; c = *p & 0b111; } else if ((unsigned char)*p >= 0b11100000) { len = 3; c = *p & 0b1111; } else if ((unsigned char)*p >= 0b11000000) { len = 2; c = *p & 0b11111; } else { error_at(start, "invalid UTF-8 sequence"); } for (int i = 1; i < len; i++) { if ((unsigned char)p[i] >> 6 != 0b10) error_at(start, "invalid UTF-8 sequence"); c = (c << 6) | (p[i] & 0b111111); } *new_pos = p + len; return c; } static bool in_range(uint32_t *range, uint32_t c) { for (int i = 0; range[i] != -1; i += 2) if (range[i] <= c && c <= range[i + 1]) return true; return false; } // [https://www.sigbus.info/n1570#D] C11 allows not only ASCII but // some multibyte characters in certan Unicode ranges to be used in an // identifier. // // This function returns true if a given character is acceptable as // the first character of an identifier. // // For example, ¾ (U+00BE) is a valid identifier because characters in // 0x00BE-0x00C0 are allowed, while neither ⟘ (U+27D8) nor ' ' // (U+3000, full-width space) are allowed because they are out of range. bool is_ident1(uint32_t c) { static uint32_t range[] = { '_', '_', 'a', 'z', 'A', 'Z', '$', '$', 0x00A8, 0x00A8, 0x00AA, 0x00AA, 0x00AD, 0x00AD, 0x00AF, 0x00AF, 0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BC, 0x00BE, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 0x00F8, 0x00FF, 0x0100, 0x02FF, 0x0370, 0x167F, 0x1681, 0x180D, 0x180F, 0x1DBF, 0x1E00, 0x1FFF, 0x200B, 0x200D, 0x202A, 0x202E, 0x203F, 0x2040, 0x2054, 0x2054, 0x2060, 0x206F, 0x2070, 0x20CF, 0x2100, 0x218F, 0x2460, 0x24FF, 0x2776, 0x2793, 0x2C00, 0x2DFF, 0x2E80, 0x2FFF, 0x3004, 0x3007, 0x3021, 0x302F, 0x3031, 0x303F, 0x3040, 0xD7FF, 0xF900, 0xFD3D, 0xFD40, 0xFDCF, 0xFDF0, 0xFE1F, 0xFE30, 0xFE44, 0xFE47, 0xFFFD, 0x10000, 0x1FFFD, 0x20000, 0x2FFFD, 0x30000, 0x3FFFD, 0x40000, 0x4FFFD, 0x50000, 0x5FFFD, 0x60000, 0x6FFFD, 0x70000, 0x7FFFD, 0x80000, 0x8FFFD, 0x90000, 0x9FFFD, 0xA0000, 0xAFFFD, 0xB0000, 0xBFFFD, 0xC0000, 0xCFFFD, 0xD0000, 0xDFFFD, 0xE0000, 0xEFFFD, -1, }; return in_range(range, c); } // Returns true if a given character is acceptable as a non-first // character of an identifier. bool is_ident2(uint32_t c) { static uint32_t range[] = { '0', '9', '$', '$', 0x0300, 0x036F, 0x1DC0, 0x1DFF, 0x20D0, 0x20FF, 0xFE20, 0xFE2F, -1, }; return is_ident1(c) || in_range(range, c); } // Returns the number of columns needed to display a given // character in a fixed-width font. // // Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c static int char_width(uint32_t c) { static uint32_t range1[] = { 0x0000, 0x001F, 0x007f, 0x00a0, 0x0300, 0x036F, 0x0483, 0x0486, 0x0488, 0x0489, 0x0591, 0x05BD, 0x05BF, 0x05BF, 0x05C1, 0x05C2, 0x05C4, 0x05C5, 0x05C7, 0x05C7, 0x0600, 0x0603, 0x0610, 0x0615, 0x064B, 0x065E, 0x0670, 0x0670, 0x06D6, 0x06E4, 0x06E7, 0x06E8, 0x06EA, 0x06ED, 0x070F, 0x070F, 0x0711, 0x0711, 0x0730, 0x074A, 0x07A6, 0x07B0, 0x07EB, 0x07F3, 0x0901, 0x0902, 0x093C, 0x093C, 0x0941, 0x0948, 0x094D, 0x094D, 0x0951, 0x0954, 0x0962, 0x0963, 0x0981, 0x0981, 0x09BC, 0x09BC, 0x09C1, 0x09C4, 0x09CD, 0x09CD, 0x09E2, 0x09E3, 0x0A01, 0x0A02, 0x0A3C, 0x0A3C, 0x0A41, 0x0A42, 0x0A47, 0x0A48, 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A82, 0x0ABC, 0x0ABC, 0x0AC1, 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0ACD, 0x0AE2, 0x0AE3, 0x0B01, 0x0B01, 0x0B3C, 0x0B3C, 0x0B3F, 0x0B3F, 0x0B41, 0x0B43, 0x0B4D, 0x0B4D, 0x0B56, 0x0B56, 0x0B82, 0x0B82, 0x0BC0, 0x0BC0, 0x0BCD, 0x0BCD, 0x0C3E, 0x0C40, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 0x0CBC, 0x0CBC, 0x0CBF, 0x0CBF, 0x0CC6, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D41, 0x0D43, 0x0D4D, 0x0D4D, 0x0DCA, 0x0DCA, 0x0DD2, 0x0DD4, 0x0DD6, 0x0DD6, 0x0E31, 0x0E31, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB1, 0x0EB1, 0x0EB4, 0x0EB9, 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F35, 0x0F35, 0x0F37, 0x0F37, 0x0F39, 0x0F39, 0x0F71, 0x0F7E, 0x0F80, 0x0F84, 0x0F86, 0x0F87, 0x0F90, 0x0F97, 0x0F99, 0x0FBC, 0x0FC6, 0x0FC6, 0x102D, 0x1030, 0x1032, 0x1032, 0x1036, 0x1037, 0x1039, 0x1039, 0x1058, 0x1059, 0x1160, 0x11FF, 0x135F, 0x135F, 0x1712, 0x1714, 0x1732, 0x1734, 0x1752, 0x1753, 0x1772, 0x1773, 0x17B4, 0x17B5, 0x17B7, 0x17BD, 0x17C6, 0x17C6, 0x17C9, 0x17D3, 0x17DD, 0x17DD, 0x180B, 0x180D, 0x18A9, 0x18A9, 0x1920, 0x1922, 0x1927, 0x1928, 0x1932, 0x1932, 0x1939, 0x193B, 0x1A17, 0x1A18, 0x1B00, 0x1B03, 0x1B34, 0x1B34, 0x1B36, 0x1B3A, 0x1B3C, 0x1B3C, 0x1B42, 0x1B42, 0x1B6B, 0x1B73, 0x1DC0, 0x1DCA, 0x1DFE, 0x1DFF, 0x200B, 0x200F, 0x202A, 0x202E, 0x2060, 0x2063, 0x206A, 0x206F, 0x20D0, 0x20EF, 0x302A, 0x302F, 0x3099, 0x309A, 0xA806, 0xA806, 0xA80B, 0xA80B, 0xA825, 0xA826, 0xFB1E, 0xFB1E, 0xFE00, 0xFE0F, 0xFE20, 0xFE23, 0xFEFF, 0xFEFF, 0xFFF9, 0xFFFB, 0x10A01, 0x10A03, 0x10A05, 0x10A06, 0x10A0C, 0x10A0F, 0x10A38, 0x10A3A, 0x10A3F, 0x10A3F, 0x1D167, 0x1D169, 0x1D173, 0x1D182, 0x1D185, 0x1D18B, 0x1D1AA, 0x1D1AD, 0x1D242, 0x1D244, 0xE0001, 0xE0001, 0xE0020, 0xE007F, 0xE0100, 0xE01EF, -1, }; if (in_range(range1, c)) return 0; static uint32_t range2[] = { 0x1100, 0x115F, 0x2329, 0x2329, 0x232A, 0x232A, 0x2E80, 0x303E, 0x3040, 0xA4CF, 0xAC00, 0xD7A3, 0xF900, 0xFAFF, 0xFE10, 0xFE19, 0xFE30, 0xFE6F, 0xFF00, 0xFF60, 0xFFE0, 0xFFE6, 0x1F000, 0x1F644, 0x20000, 0x2FFFD, 0x30000, 0x3FFFD, -1, }; if (in_range(range2, c)) return 2; return 1; } // Returns the number of columns needed to display a given // string in a fixed-width font. int display_width(char *p, int len) { char *start = p; int w = 0; while (p - start < len) { uint32_t c = decode_utf8(&p, p); w += char_width(c); } return w; } // BEG main typedef enum { FILE_NONE, FILE_C, FILE_ASM, FILE_OBJ, FILE_AR, FILE_DSO, } FileType; StringArray include_paths; bool opt_fcommon = true; bool opt_fpic; static FileType opt_x; static StringArray opt_include; static bool opt_E; static bool opt_M; static bool opt_MD; static bool opt_MMD; static bool opt_MP; static bool opt_S; static bool opt_c; static bool opt_cc1; static bool opt_hash_hash_hash; static bool opt_static; static bool opt_shared; static char *opt_MF; static char *opt_MT; static char *opt_o; static StringArray ld_extra_args; static StringArray std_include_paths; char *base_file; static char *output_filec; static StringArray input_paths; static StringArray tmpfiles; static void usage(int status) { fprintf(stderr, "chibicc [ -o <path> ] <file>\n"); exit(status); } static bool take_arg(char *arg) { char *x[] = { "-o", "-I", "-idirafter", "-include", "-x", "-MF", "-MT", "-Xlinker", }; for (int i = 0; i < sizeof(x) / sizeof(*x); i++) if (!strcmp(arg, x[i])) return true; return false; } static void add_default_include_paths(char *argv0) { // We expect that chibicc-specific include files are installed // to ./include relative to argv[0]. strarray_push(&include_paths, format("%s/include", dirname(strdup(argv0)))); // Add standard include paths. strarray_push(&include_paths, "/usr/local/include"); strarray_push(&include_paths, "/usr/include/x86_64-linux-gnu"); strarray_push(&include_paths, "/usr/include"); // Keep a copy of the standard include paths for -MMD option. for (int i = 0; i < include_paths.len; i++) strarray_push(&std_include_paths, include_paths.data[i]); } static void define(char *str) { char *eq = strchr(str, '='); if (eq) define_macro(strndup(str, eq - str), eq + 1); else define_macro(str, "1"); } static FileType parse_opt_x(char *s) { if (!strcmp(s, "c")) return FILE_C; if (!strcmp(s, "assembler")) return FILE_ASM; if (!strcmp(s, "none")) return FILE_NONE; error("<command line>: unknown argument for -x: %s", s); } static char *quote_makefile(char *s) { char *buf = calloc(1, strlen(s) * 2 + 1); for (int i = 0, j = 0; s[i]; i++) { switch (s[i]) { case '$': buf[j++] = '$'; buf[j++] = '$'; break; case '#': buf[j++] = '\\'; buf[j++] = '#'; break; case ' ': case '\t': for (int k = i - 1; k >= 0 && s[k] == '\\'; k--) buf[j++] = '\\'; buf[j++] = '\\'; buf[j++] = s[i]; break; default: buf[j++] = s[i]; break; } } return buf; } static void parse_args(int argc, char **argv) { // Make sure that all command line options that take an argument // have an argument. for (int i = 1; i < argc; i++) if (take_arg(argv[i])) if (!argv[++i]) usage(1); StringArray idirafter = {}; for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "-###")) { opt_hash_hash_hash = true; continue; } if (!strcmp(argv[i], "-cc1")) { opt_cc1 = true; continue; } if (!strcmp(argv[i], "--help")) usage(0); if (!strcmp(argv[i], "-o")) { opt_o = argv[++i]; continue; } if (!strncmp(argv[i], "-o", 2)) { opt_o = argv[i] + 2; continue; } if (!strcmp(argv[i], "-S")) { opt_S = true; continue; } if (!strcmp(argv[i], "-fcommon")) { opt_fcommon = true; continue; } if (!strcmp(argv[i], "-fno-common")) { opt_fcommon = false; continue; } if (!strcmp(argv[i], "-c")) { opt_c = true; continue; } if (!strcmp(argv[i], "-E")) { opt_E = true; continue; } if (!strncmp(argv[i], "-I", 2)) { strarray_push(&include_paths, argv[i] + 2); continue; } if (!strcmp(argv[i], "-D")) { define(argv[++i]); continue; } if (!strncmp(argv[i], "-D", 2)) { define(argv[i] + 2); continue; } if (!strcmp(argv[i], "-U")) { undef_macro(argv[++i]); continue; } if (!strncmp(argv[i], "-U", 2)) { undef_macro(argv[i] + 2); continue; } if (!strcmp(argv[i], "-include")) { strarray_push(&opt_include, argv[++i]); continue; } if (!strcmp(argv[i], "-x")) { opt_x = parse_opt_x(argv[++i]); continue; } if (!strncmp(argv[i], "-x", 2)) { opt_x = parse_opt_x(argv[i] + 2); continue; } if (!strncmp(argv[i], "-l", 2) || !strncmp(argv[i], "-Wl,", 4)) { strarray_push(&input_paths, argv[i]); continue; } if (!strcmp(argv[i], "-Xlinker")) { strarray_push(&ld_extra_args, argv[++i]); continue; } if (!strcmp(argv[i], "-s")) { strarray_push(&ld_extra_args, "-s"); continue; } if (!strcmp(argv[i], "-M")) { opt_M = true; continue; } if (!strcmp(argv[i], "-MF")) { opt_MF = argv[++i]; continue; } if (!strcmp(argv[i], "-MP")) { opt_MP = true; continue; } if (!strcmp(argv[i], "-MT")) { if (opt_MT == NULL) opt_MT = argv[++i]; else opt_MT = format("%s %s", opt_MT, argv[++i]); continue; } if (!strcmp(argv[i], "-MD")) { opt_MD = true; continue; } if (!strcmp(argv[i], "-MQ")) { if (opt_MT == NULL) opt_MT = quote_makefile(argv[++i]); else opt_MT = format("%s %s", opt_MT, quote_makefile(argv[++i])); continue; } if (!strcmp(argv[i], "-MMD")) { opt_MD = opt_MMD = true; continue; } if (!strcmp(argv[i], "-fpic") || !strcmp(argv[i], "-fPIC")) { opt_fpic = true; continue; } if (!strcmp(argv[i], "-cc1-input")) { base_file = argv[++i]; continue; } if (!strcmp(argv[i], "-cc1-output")) { output_filec = argv[++i]; continue; } if (!strcmp(argv[i], "-idirafter")) { strarray_push(&idirafter, argv[i++]); continue; } if (!strcmp(argv[i], "-static")) { opt_static = true; strarray_push(&ld_extra_args, "-static"); continue; } if (!strcmp(argv[i], "-shared")) { opt_shared = true; strarray_push(&ld_extra_args, "-shared"); continue; } if (!strcmp(argv[i], "-L")) { strarray_push(&ld_extra_args, "-L"); strarray_push(&ld_extra_args, argv[++i]); continue; } if (!strncmp(argv[i], "-L", 2)) { strarray_push(&ld_extra_args, "-L"); strarray_push(&ld_extra_args, argv[i] + 2); continue; } if (!strcmp(argv[i], "-hashmap-test")) { hashmap_test(); exit(0); } // These options are ignored for now. if (!strncmp(argv[i], "-O", 2) || !strncmp(argv[i], "-W", 2) || !strncmp(argv[i], "-g", 2) || !strncmp(argv[i], "-std=", 5) || !strcmp(argv[i], "-ffreestanding") || !strcmp(argv[i], "-fno-builtin") || !strcmp(argv[i], "-fno-omit-frame-pointer") || !strcmp(argv[i], "-fno-stack-protector") || !strcmp(argv[i], "-fno-strict-aliasing") || !strcmp(argv[i], "-m64") || !strcmp(argv[i], "-mno-red-zone") || !strcmp(argv[i], "-w")) continue; if (argv[i][0] == '-' && argv[i][1] != '\0') error("unknown argument: %s", argv[i]); strarray_push(&input_paths, argv[i]); } for (int i = 0; i < idirafter.len; i++) strarray_push(&include_paths, idirafter.data[i]); if (input_paths.len == 0) error("no input files"); // -E implies that the input is the C macro language. if (opt_E) opt_x = FILE_C; } static FILE *open_file(char *path) { if (!path || strcmp(path, "-") == 0) return stdout; FILE *out = fopen(path, "w"); if (!out) error("cannot open output file: %s: %s", path, strerror(errno)); return out; } static bool endswith(char *p, char *q) { int len1 = strlen(p); int len2 = strlen(q); return (len1 >= len2) && !strcmp(p + len1 - len2, q); } // Replace file extension static char *replace_extn(char *tmpl, char *extn) { char *filename = basename(strdup(tmpl)); char *dot = strrchr(filename, '.'); if (dot) *dot = '\0'; return format("%s%s", filename, extn); } static void cleanup(void) { for (int i = 0; i < tmpfiles.len; i++) unlink(tmpfiles.data[i]); } static char *create_tmpfile(void) { char *path = strdup("/tmp/chibicc-XXXXXX"); int fd = mkstemp(path); if (fd == -1) error("mkstemp failed: %s", strerror(errno)); close(fd); strarray_push(&tmpfiles, path); return path; } static void run_subprocess(char **argv) { // If -### is given, dump the subprocess's command line. if (opt_hash_hash_hash) { fprintf(stderr, "%s", argv[0]); for (int i = 1; argv[i]; i++) fprintf(stderr, " %s", argv[i]); fprintf(stderr, "\n"); } if (fork() == 0) { // Child process. Run a new command. execvp(argv[0], argv); fprintf(stderr, "exec failed: %s: %s\n", argv[0], strerror(errno)); _exit(1); } // Wait for the child process to finish. int status; while (wait(&status) > 0); if (status != 0) exit(1); } static void run_cc1(int argc, char **argv, char *input, char *output) { char **args = calloc(argc + 10, sizeof(char *)); memcpy(args, argv, argc * sizeof(char *)); args[argc++] = "-cc1"; if (input) { args[argc++] = "-cc1-input"; args[argc++] = input; } if (output) { args[argc++] = "-cc1-output"; args[argc++] = output; } run_subprocess(args); } // Print tokens to stdout. Used for -E. static void print_tokens(Token *tok) { FILE *out = open_file(opt_o ? opt_o : "-"); int line = 1; for (; tok->kind != TK_EOF; tok = tok->next) { if (line > 1 && tok->at_bol) fprintf(out, "\n"); if (tok->has_space && !tok->at_bol) fprintf(out, " "); fprintf(out, "%.*s", tok->len, tok->loc); line++; } fprintf(out, "\n"); } static bool in_std_include_path(char *path) { for (int i = 0; i < std_include_paths.len; i++) { char *dir = std_include_paths.data[i]; int len = strlen(dir); if (strncmp(dir, path, len) == 0 && path[len] == '/') return true; } return false; } // If -M options is given, the compiler write a list of input files to // stdout in a format that "make" command can read. This feature is // used to automate file dependency management. static void print_dependencies(void) { char *path; if (opt_MF) path = opt_MF; else if (opt_MD) path = replace_extn(opt_o ? opt_o : base_file, ".d"); else if (opt_o) path = opt_o; else path = "-"; FILE *out = open_file(path); if (opt_MT) fprintf(out, "%s:", opt_MT); else fprintf(out, "%s:", quote_makefile(replace_extn(base_file, ".o"))); File **files = get_input_files(); for (int i = 0; files[i]; i++) { if (opt_MMD && in_std_include_path(files[i]->name)) continue; fprintf(out, " \\\n %s", files[i]->name); } fprintf(out, "\n\n"); if (opt_MP) { for (int i = 1; files[i]; i++) { if (opt_MMD && in_std_include_path(files[i]->name)) continue; fprintf(out, "%s:\n\n", quote_makefile(files[i]->name)); } } } static Token *must_tokenize_file(char *path) { Token *tok = tokenize_file(path); if (!tok) error("%s: %s", path, strerror(errno)); return tok; } static Token *append_tokens(Token *tok1, Token *tok2) { if (!tok1 || tok1->kind == TK_EOF) return tok2; Token *t = tok1; while (t->next->kind != TK_EOF) t = t->next; t->next = tok2; return tok1; } static void cc1(void) { Token *tok = NULL; // Process -include option for (int i = 0; i < opt_include.len; i++) { char *incl = opt_include.data[i]; char *path; if (file_exists(incl)) { path = incl; } else { path = search_include_paths(incl); if (!path) error("-include: %s: %s", incl, strerror(errno)); } Token *tok2 = must_tokenize_file(path); tok = append_tokens(tok, tok2); } // Tokenize and parse. Token *tok2 = must_tokenize_file(base_file); tok = append_tokens(tok, tok2); tok = preprocess(tok); // If -M or -MD are given, print file dependencies. if (opt_M || opt_MD) { print_dependencies(); if (opt_M) return; } // If -E is given, print out preprocessed C code as a result. if (opt_E) { print_tokens(tok); return; } Obj *prog = parse(tok); // Open a temporary output buffer. char *buf; size_t buflen; FILE *output_buf = open_memstream(&buf, &buflen); // Traverse the AST to emit assembly. codegen(prog, output_buf); fclose(output_buf); // Write the asembly text to a file. FILE *out = open_file(output_filec); fwrite(buf, buflen, 1, out); fclose(out); } static void assemble(char *input, char *output) { char *cmd[] = {"as", "-c", input, "-o", output, NULL}; run_subprocess(cmd); } static char *find_file(char *pattern) { char *path = NULL; glob_t buf = {}; glob(pattern, 0, NULL, &buf); if (buf.gl_pathc > 0) path = strdup(buf.gl_pathv[buf.gl_pathc - 1]); globfree(&buf); return path; } // Returns true if a given file exists. bool file_exists(char *path) { struct stat st; return !stat(path, &st); } static char *find_libpath(void) { if (file_exists("/usr/lib/x86_64-linux-gnu/crti.o")) return "/usr/lib/x86_64-linux-gnu"; if (file_exists("/usr/lib64/crti.o")) return "/usr/lib64"; error("library path is not found"); } static char *find_gcc_libpath(void) { char *paths[] = { "/usr/lib/gcc/x86_64-linux-gnu/*/crtbegin.o", "/usr/lib/gcc/x86_64-pc-linux-gnu/*/crtbegin.o", // For Gentoo "/usr/lib/gcc/x86_64-redhat-linux/*/crtbegin.o", // For Fedora }; for (int i = 0; i < sizeof(paths) / sizeof(*paths); i++) { char *path = find_file(paths[i]); if (path) return dirname(path); } error("gcc library path is not found"); } static void run_linker(StringArray *inputs, char *output) { StringArray arr = {}; strarray_push(&arr, "ld"); strarray_push(&arr, "-o"); strarray_push(&arr, output); strarray_push(&arr, "-m"); strarray_push(&arr, "elf_x86_64"); char *libpath = find_libpath(); char *gcc_libpath = find_gcc_libpath(); if (opt_shared) { strarray_push(&arr, format("%s/crti.o", libpath)); strarray_push(&arr, format("%s/crtbeginS.o", gcc_libpath)); } else { strarray_push(&arr, format("%s/crt1.o", libpath)); strarray_push(&arr, format("%s/crti.o", libpath)); strarray_push(&arr, format("%s/crtbegin.o", gcc_libpath)); } strarray_push(&arr, format("-L%s", gcc_libpath)); strarray_push(&arr, "-L/usr/lib/x86_64-linux-gnu"); strarray_push(&arr, "-L/usr/lib64"); strarray_push(&arr, "-L/lib64"); strarray_push(&arr, "-L/usr/lib/x86_64-linux-gnu"); strarray_push(&arr, "-L/usr/lib/x86_64-pc-linux-gnu"); strarray_push(&arr, "-L/usr/lib/x86_64-redhat-linux"); strarray_push(&arr, "-L/usr/lib"); strarray_push(&arr, "-L/lib"); if (!opt_static) { strarray_push(&arr, "-dynamic-linker"); strarray_push(&arr, "/lib64/ld-linux-x86-64.so.2"); } for (int i = 0; i < ld_extra_args.len; i++) strarray_push(&arr, ld_extra_args.data[i]); for (int i = 0; i < inputs->len; i++) strarray_push(&arr, inputs->data[i]); if (opt_static) { strarray_push(&arr, "--start-group"); strarray_push(&arr, "-lgcc"); strarray_push(&arr, "-lgcc_eh"); strarray_push(&arr, "-lc"); strarray_push(&arr, "--end-group"); } else { strarray_push(&arr, "-lc"); strarray_push(&arr, "-lgcc"); strarray_push(&arr, "--as-needed"); strarray_push(&arr, "-lgcc_s"); strarray_push(&arr, "--no-as-needed"); } if (opt_shared) strarray_push(&arr, format("%s/crtendS.o", gcc_libpath)); else strarray_push(&arr, format("%s/crtend.o", gcc_libpath)); strarray_push(&arr, format("%s/crtn.o", libpath)); strarray_push(&arr, NULL); run_subprocess(arr.data); } static FileType get_file_type(char *filename) { if (opt_x != FILE_NONE) return opt_x; if (endswith(filename, ".a")) return FILE_AR; if (endswith(filename, ".so")) return FILE_DSO; if (endswith(filename, ".o")) return FILE_OBJ; if (endswith(filename, ".c")) return FILE_C; if (endswith(filename, ".s")) return FILE_ASM; error("<command line>: unknown file extension: %s", filename); } int main(int argc, char **argv) { atexit(cleanup); init_macros(); parse_args(argc, argv); if (opt_cc1) { add_default_include_paths(argv[0]); cc1(); return 0; } if (input_paths.len > 1 && opt_o && (opt_c || opt_S | opt_E)) error("cannot specify '-o' with '-c,' '-S' or '-E' with multiple files"); StringArray ld_args = {}; for (int i = 0; i < input_paths.len; i++) { char *input = input_paths.data[i]; if (!strncmp(input, "-l", 2)) { strarray_push(&ld_args, input); continue; } if (!strncmp(input, "-Wl,", 4)) { char *s = strdup(input + 4); char *arg = strtok(s, ","); while (arg) { strarray_push(&ld_args, arg); arg = strtok(NULL, ","); } continue; } char *output; if (opt_o) output = opt_o; else if (opt_S) output = replace_extn(input, ".s"); else output = replace_extn(input, ".o"); FileType type = get_file_type(input); // Handle .o or .a if (type == FILE_OBJ || type == FILE_AR || type == FILE_DSO) { strarray_push(&ld_args, input); continue; } // Handle .s if (type == FILE_ASM) { if (!opt_S) assemble(input, output); continue; } assert(type == FILE_C); // Just preprocess if (opt_E || opt_M) { run_cc1(argc, argv, input, NULL); continue; } // Compile if (opt_S) { run_cc1(argc, argv, input, output); continue; } // Compile and assemble if (opt_c) { char *tmp = create_tmpfile(); run_cc1(argc, argv, input, tmp); assemble(tmp, output); continue; } // Compile, assemble and link char *tmp1 = create_tmpfile(); char *tmp2 = create_tmpfile(); run_cc1(argc, argv, input, tmp1); assemble(tmp1, tmp2); strarray_push(&ld_args, tmp2); continue; } if (ld_args.len > 0) run_linker(&ld_args, opt_o ? opt_o : "a.out"); return 0; }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree