Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
Go
Haskell
HLSL
Hook
Hylo
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Nim
Objective-C
Objective-C++
OCaml
OpenCL C
Pascal
Pony
Python
Racket
Ruby
Rust
Snowball
Scala
Solidity
Spice
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Zig
Javascript
GIMPLE
llvm source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
clang (assertions trunk)
clang (trunk)
clang 10.0.0
clang 10.0.1
clang 11.0.0
clang 11.0.1
clang 12.0.0
clang 12.0.1
clang 13.0.0
clang 14.0.0
clang 15.0.0
clang 16.0.0
clang 17.0.1
clang 18.1.0
clang 4.0.1
clang 5.0.0
clang 6.0.0
clang 7.0.0
clang 8.0.0
clang 9.0.0
llc (assertions trunk)
llc (trunk)
llc 10.0.0
llc 10.0.1
llc 11.0.0
llc 11.0.1
llc 12.0.0
llc 12.0.1
llc 13.0.0
llc 14.0.0
llc 15.0.0
llc 16.0.0
llc 17.0.1
llc 18.1.0
llc 3.2
llc 3.3
llc 3.9.1
llc 4.0.0
llc 4.0.1
llc 5.0.0
llc 6.0.0
llc 7.0.0
llc 8.0.0
llc 9.0.0
opt (assertions trunk)
opt (trunk)
opt 10.0.0
opt 10.0.1
opt 11.0.0
opt 11.0.1
opt 12.0.0
opt 12.0.1
opt 13.0.0
opt 14.0.0
opt 15.0.0
opt 16.0.0
opt 17.0.1
opt 18.1.0
opt 3.2
opt 3.3
opt 3.9.1
opt 4.0.0
opt 4.0.1
opt 5.0.0
opt 6.0.0
opt 7.0.0
opt 8.0.0
opt 9.0.0
Options
Source code
; ModuleID = 'mydot' source_filename = "mydot" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" target triple = "x86_64-generic-linux" %jl_value_t = type opaque @llvm.compiler.used = appending global [3 x i8*] [i8* bitcast (void (%jl_value_t*)* @jl_gc_queue_root to i8*), i8* bitcast (%jl_value_t* (i8*, i32, i32)* @jl_gc_pool_alloc to i8*), i8* bitcast (%jl_value_t* (i8*, i64)* @jl_gc_big_alloc to i8*)], section "llvm.metadata" define double @julia_mydot_8568(%jl_value_t* nonnull align 16 dereferenceable(40) %0, %jl_value_t* nonnull align 16 dereferenceable(40) %1, i64 %2) !dbg !5 { top: %.inv = icmp sgt i64 %2, 0, !dbg !7 %3 = select i1 %.inv, i64 %2, i64 0, !dbg !7 br i1 %.inv, label %L28.lr.ph, label %L55, !dbg !18 L28.lr.ph: ; preds = %top %4 = bitcast %jl_value_t* %0 to double**, !dbg !19 %5 = load double*, double** %4, align 8, !dbg !19, !tbaa !25, !nonnull !4 %6 = bitcast %jl_value_t* %1 to double**, !dbg !19 %7 = load double*, double** %6, align 8, !dbg !19, !tbaa !25, !nonnull !4 %min.iters.check = icmp ult i64 %3, 32, !dbg !30 br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !30 vector.ph: ; preds = %L28.lr.ph %n.mod.vf = urem i64 %3, 32, !dbg !30 %n.vec = sub i64 %3, %n.mod.vf, !dbg !30 br label %vector.body, !dbg !30 vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !31 %vec.phi = phi <8 x double> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ] %vec.phi19 = phi <8 x double> [ zeroinitializer, %vector.ph ], [ %32, %vector.body ] %vec.phi20 = phi <8 x double> [ zeroinitializer, %vector.ph ], [ %33, %vector.body ] %vec.phi21 = phi <8 x double> [ zeroinitializer, %vector.ph ], [ %34, %vector.body ] %8 = add i64 %index, 0, !dbg !31 %9 = getelementptr inbounds double, double* %5, i64 %8, !dbg !35 %10 = getelementptr inbounds double, double* %9, i32 0, !dbg !35 %11 = bitcast double* %10 to <8 x double>*, !dbg !35 %wide.load = load <8 x double>, <8 x double>* %11, align 8, !dbg !35, !tbaa !36 %12 = getelementptr inbounds double, double* %9, i32 8, !dbg !35 %13 = bitcast double* %12 to <8 x double>*, !dbg !35 %wide.load22 = load <8 x double>, <8 x double>* %13, align 8, !dbg !35, !tbaa !36 %14 = getelementptr inbounds double, double* %9, i32 16, !dbg !35 %15 = bitcast double* %14 to <8 x double>*, !dbg !35 %wide.load23 = load <8 x double>, <8 x double>* %15, align 8, !dbg !35, !tbaa !36 %16 = getelementptr inbounds double, double* %9, i32 24, !dbg !35 %17 = bitcast double* %16 to <8 x double>*, !dbg !35 %wide.load24 = load <8 x double>, <8 x double>* %17, align 8, !dbg !35, !tbaa !36 %18 = getelementptr inbounds double, double* %7, i64 %8, !dbg !35 %19 = getelementptr inbounds double, double* %18, i32 0, !dbg !35 %20 = bitcast double* %19 to <8 x double>*, !dbg !35 %wide.load25 = load <8 x double>, <8 x double>* %20, align 8, !dbg !35, !tbaa !36 %21 = getelementptr inbounds double, double* %18, i32 8, !dbg !35 %22 = bitcast double* %21 to <8 x double>*, !dbg !35 %wide.load26 = load <8 x double>, <8 x double>* %22, align 8, !dbg !35, !tbaa !36 %23 = getelementptr inbounds double, double* %18, i32 16, !dbg !35 %24 = bitcast double* %23 to <8 x double>*, !dbg !35 %wide.load27 = load <8 x double>, <8 x double>* %24, align 8, !dbg !35, !tbaa !36 %25 = getelementptr inbounds double, double* %18, i32 24, !dbg !35 %26 = bitcast double* %25 to <8 x double>*, !dbg !35 %wide.load28 = load <8 x double>, <8 x double>* %26, align 8, !dbg !35, !tbaa !36 %27 = fmul contract <8 x double> %wide.load, %wide.load25, !dbg !39 %28 = fmul contract <8 x double> %wide.load22, %wide.load26, !dbg !39 %29 = fmul contract <8 x double> %wide.load23, %wide.load27, !dbg !39 %30 = fmul contract <8 x double> %wide.load24, %wide.load28, !dbg !39 %31 = fadd fast <8 x double> %vec.phi, %27, !dbg !42 %32 = fadd fast <8 x double> %vec.phi19, %28, !dbg !42 %33 = fadd fast <8 x double> %vec.phi20, %29, !dbg !42 %34 = fadd fast <8 x double> %vec.phi21, %30, !dbg !42 %index.next = add i64 %index, 32, !dbg !31 %35 = icmp eq i64 %index.next, %n.vec, !dbg !31 br i1 %35, label %middle.block, label %vector.body, !dbg !31, !llvm.loop !44 middle.block: ; preds = %vector.body %bin.rdx = fadd fast <8 x double> %32, %31, !dbg !30 %bin.rdx29 = fadd fast <8 x double> %33, %bin.rdx, !dbg !30 %bin.rdx30 = fadd fast <8 x double> %34, %bin.rdx29, !dbg !30 %rdx.shuf = shufflevector <8 x double> %bin.rdx30, <8 x double> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>, !dbg !30 %bin.rdx31 = fadd fast <8 x double> %bin.rdx30, %rdx.shuf, !dbg !30 %rdx.shuf32 = shufflevector <8 x double> %bin.rdx31, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, !dbg !30 %bin.rdx33 = fadd fast <8 x double> %bin.rdx31, %rdx.shuf32, !dbg !30 %rdx.shuf34 = shufflevector <8 x double> %bin.rdx33, <8 x double> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, !dbg !30 %bin.rdx35 = fadd fast <8 x double> %bin.rdx33, %rdx.shuf34, !dbg !30 %36 = extractelement <8 x double> %bin.rdx35, i32 0, !dbg !30 %cmp.n = icmp eq i64 %3, %n.vec, !dbg !30 br i1 %cmp.n, label %L55, label %scalar.ph, !dbg !30 scalar.ph: ; preds = %middle.block, %L28.lr.ph %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L28.lr.ph ] %bc.merge.rdx = phi double [ 0.000000e+00, %L28.lr.ph ], [ %36, %middle.block ] br label %L28, !dbg !30 L28: ; preds = %L28, %scalar.ph %value_phi215 = phi i64 [ %bc.resume.val, %scalar.ph ], [ %43, %L28 ] %value_phi14 = phi double [ %bc.merge.rdx, %scalar.ph ], [ %42, %L28 ] %37 = getelementptr inbounds double, double* %5, i64 %value_phi215, !dbg !35 %38 = load double, double* %37, align 8, !dbg !35, !tbaa !36 %39 = getelementptr inbounds double, double* %7, i64 %value_phi215, !dbg !35 %40 = load double, double* %39, align 8, !dbg !35, !tbaa !36 %41 = fmul contract double %38, %40, !dbg !39 %42 = fadd fast double %value_phi14, %41, !dbg !42 %43 = add nuw nsw i64 %value_phi215, 1, !dbg !31 %44 = icmp ult i64 %43, %3, !dbg !46 br i1 %44, label %L28, label %L55, !dbg !30, !llvm.loop !48 L55: ; preds = %L28, %middle.block, %top %value_phi5 = phi double [ 0.000000e+00, %top ], [ %36, %middle.block ], [ %42, %L28 ] ret double %value_phi5, !dbg !50 } define nonnull %jl_value_t* @jfptr_mydot_8569(%jl_value_t* %0, %jl_value_t** %1, i32 %2) #0 { top: %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() %ptls_i8 = getelementptr i8, i8* %thread_ptr, i64 -15720 %ptls = bitcast i8* %ptls_i8 to %jl_value_t*** %3 = load %jl_value_t*, %jl_value_t** %1, align 8, !nonnull !4, !dereferenceable !51, !align !52 %4 = getelementptr inbounds %jl_value_t*, %jl_value_t** %1, i64 1 %5 = load %jl_value_t*, %jl_value_t** %4, align 8, !nonnull !4, !dereferenceable !51, !align !52 %6 = getelementptr inbounds %jl_value_t*, %jl_value_t** %1, i64 2 %7 = bitcast %jl_value_t** %6 to i64** %8 = load i64*, i64** %7, align 8, !nonnull !4, !dereferenceable !53, !align !53 %9 = load i64, i64* %8, align 8 %10 = call double @julia_mydot_8568(%jl_value_t* %3, %jl_value_t* %5, i64 %9) %11 = bitcast %jl_value_t*** %ptls to i8* %12 = call noalias nonnull %jl_value_t* @jl_gc_pool_alloc(i8* %11, i32 1400, i32 16) #1 %13 = bitcast %jl_value_t* %12 to %jl_value_t** %14 = getelementptr %jl_value_t*, %jl_value_t** %13, i64 -1 store %jl_value_t* inttoptr (i64 139643866079424 to %jl_value_t*), %jl_value_t** %14, !tbaa !54 %15 = bitcast %jl_value_t* %12 to double* store double %10, double* %15, align 8, !tbaa !56 ret %jl_value_t* %12 } ; Function Attrs: allocsize(1) declare noalias nonnull %jl_value_t* @julia.gc_alloc_obj(i8*, i64, %jl_value_t*) #1 ; Function Attrs: nounwind readnone speculatable willreturn declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #2 declare nonnull %jl_value_t* @j_throw_overflowerr_binaryop_8569(%jl_value_t*, i64, i64) ; Function Attrs: cold noreturn nounwind declare void @llvm.trap() #3 ; Function Attrs: nounwind readnone speculatable willreturn declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) #2 declare nonnull %jl_value_t* @j_throw_overflowerr_binaryop_8570(%jl_value_t*, i64, i64) declare nonnull %jl_value_t* @j_throw_boundserror_8571([2 x i64]* nocapture readonly, i64) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: inaccessiblemem_or_argmemonly declare void @jl_gc_queue_root(%jl_value_t*) #5 ; Function Attrs: allocsize(1) declare noalias nonnull %jl_value_t* @jl_gc_pool_alloc(i8*, i32, i32) #1 ; Function Attrs: allocsize(1) declare noalias nonnull %jl_value_t* @jl_gc_big_alloc(i8*, i64) #1 ; Function Attrs: allocsize(1) declare noalias nonnull %jl_value_t* @julia.gc_alloc_bytes(i8*, i64) #1 attributes #0 = { "thunk" } attributes #1 = { allocsize(1) } attributes #2 = { nounwind readnone speculatable willreturn } attributes #3 = { cold noreturn nounwind } attributes #4 = { argmemonly nounwind willreturn } attributes #5 = { inaccessiblemem_or_argmemonly } !llvm.module.flags = !{!0, !1} !llvm.dbg.cu = !{!2} !0 = !{i32 2, !"Dwarf Version", i32 4} !1 = !{i32 1, !"Debug Info Version", i32 3} !2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, nameTableKind: GNU) !3 = !DIFile(filename: "REPL[45]", directory: ".") !4 = !{} !5 = distinct !DISubprogram(name: "mydot", linkageName: "julia_mydot_8568", scope: null, file: !3, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !6 = !DISubroutineType(types: !4) !7 = !DILocation(line: 285, scope: !8, inlinedAt: !10) !8 = distinct !DISubprogram(name: "unitrange_last;", linkageName: "unitrange_last", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !9 = !DIFile(filename: "range.jl", directory: ".") !10 = !DILocation(line: 280, scope: !11, inlinedAt: !12) !11 = distinct !DISubprogram(name: "UnitRange;", linkageName: "UnitRange", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !12 = !DILocation(line: 5, scope: !13, inlinedAt: !14) !13 = distinct !DISubprogram(name: "Colon;", linkageName: "Colon", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !14 = !DILocation(line: 69, scope: !15, inlinedAt: !17) !15 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !16, file: !16, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !16 = !DIFile(filename: "simdloop.jl", directory: ".") !17 = !DILocation(line: 3, scope: !5) !18 = !DILocation(line: 72, scope: !15, inlinedAt: !17) !19 = !DILocation(line: 0, scope: !20, inlinedAt: !22) !20 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !21, file: !21, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !21 = !DIFile(filename: "array.jl", directory: ".") !22 = !DILocation(line: 4, scope: !23, inlinedAt: !24) !23 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !3, file: !3, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !24 = !DILocation(line: 77, scope: !15, inlinedAt: !17) !25 = !{!26, !26, i64 0} !26 = !{!"jtbaa_arrayptr", !27, i64 0} !27 = !{!"jtbaa_array", !28, i64 0} !28 = !{!"jtbaa", !29, i64 0} !29 = !{!"jtbaa"} !30 = !DILocation(line: 75, scope: !15, inlinedAt: !17) !31 = !DILocation(line: 87, scope: !32, inlinedAt: !34) !32 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !33, file: !33, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !33 = !DIFile(filename: "int.jl", directory: ".") !34 = !DILocation(line: 78, scope: !15, inlinedAt: !17) !35 = !DILocation(line: 811, scope: !20, inlinedAt: !22) !36 = !{!37, !37, i64 0} !37 = !{!"jtbaa_arraybuf", !38, i64 0} !38 = !{!"jtbaa_data", !28, i64 0} !39 = !DILocation(line: 405, scope: !40, inlinedAt: !22) !40 = distinct !DISubprogram(name: "*;", linkageName: "*", scope: !41, file: !41, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !41 = !DIFile(filename: "float.jl", directory: ".") !42 = !DILocation(line: 401, scope: !43, inlinedAt: !22) !43 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !41, file: !41, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !44 = distinct !{!44, !45} !45 = !{!"llvm.loop.isvectorized", i32 1} !46 = !DILocation(line: 83, scope: !47, inlinedAt: !30) !47 = distinct !DISubprogram(name: "<;", linkageName: "<", scope: !33, file: !33, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !48 = distinct !{!48, !49, !45} !49 = !{!"llvm.loop.unroll.runtime.disable"} !50 = !DILocation(line: 6, scope: !5) !51 = !{i64 40} !52 = !{i64 16} !53 = !{i64 8} !54 = !{!55, !55, i64 0} !55 = !{!"jtbaa_tag", !38, i64 0} !56 = !{!57, !57, i64 0} !57 = !{!"jtbaa_immut", !58, i64 0} !58 = !{!"jtbaa_value", !38, i64 0}
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
About the author
Statistics
Changelog
Version tree