Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
Go
Haskell
HLSL
Hook
Hylo
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Nim
Objective-C
Objective-C++
OCaml
OpenCL C
Pascal
Pony
Python
Racket
Ruby
Rust
Snowball
Scala
Solidity
Spice
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Zig
Javascript
GIMPLE
llvm source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
clang (assertions trunk)
clang (trunk)
clang 10.0.0
clang 10.0.1
clang 11.0.0
clang 11.0.1
clang 12.0.0
clang 12.0.1
clang 13.0.0
clang 14.0.0
clang 15.0.0
clang 16.0.0
clang 17.0.1
clang 18.1.0
clang 4.0.1
clang 5.0.0
clang 6.0.0
clang 7.0.0
clang 8.0.0
clang 9.0.0
llc (assertions trunk)
llc (trunk)
llc 10.0.0
llc 10.0.1
llc 11.0.0
llc 11.0.1
llc 12.0.0
llc 12.0.1
llc 13.0.0
llc 14.0.0
llc 15.0.0
llc 16.0.0
llc 17.0.1
llc 18.1.0
llc 3.2
llc 3.3
llc 3.9.1
llc 4.0.0
llc 4.0.1
llc 5.0.0
llc 6.0.0
llc 7.0.0
llc 8.0.0
llc 9.0.0
opt (assertions trunk)
opt (trunk)
opt 10.0.0
opt 10.0.1
opt 11.0.0
opt 11.0.1
opt 12.0.0
opt 12.0.1
opt 13.0.0
opt 14.0.0
opt 15.0.0
opt 16.0.0
opt 17.0.1
opt 18.1.0
opt 3.2
opt 3.3
opt 3.9.1
opt 4.0.0
opt 4.0.1
opt 5.0.0
opt 6.0.0
opt 7.0.0
opt 8.0.0
opt 9.0.0
Options
Source code
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64 ; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64 ; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32 ; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32 ; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64 ; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64 ; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32 ; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32 define void @load-1byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-1byte-chunk-of-2byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [2 x i8], align 64 %init = load <2 x i8>, ptr %src, align 1 store <2 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 store <1 x i8> %chunk, ptr %dst ret void } define void @load-1byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-1byte-chunk-of-4byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [4 x i8], align 64 %init = load <4 x i8>, ptr %src, align 1 store <4 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 store <1 x i8> %chunk, ptr %dst ret void } define void @load-2byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-2byte-chunk-of-4byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [4 x i8], align 64 %init = load <4 x i8>, ptr %src, align 1 store <4 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 store <2 x i8> %chunk, ptr %dst ret void } define void @load-1byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-1byte-chunk-of-8byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [8 x i8], align 64 %init = load <8 x i8>, ptr %src, align 1 store <8 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 store <1 x i8> %chunk, ptr %dst ret void } define void @load-2byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [8 x i8], align 64 %init = load <8 x i8>, ptr %src, align 1 store <8 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 store <2 x i8> %chunk, ptr %dst ret void } define void @load-4byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-4byte-chunk-of-8byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [8 x i8], align 64 %init = load <8 x i8>, ptr %src, align 1 store <8 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1 store <4 x i8> %chunk, ptr %dst ret void } define void @load-1byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-1byte-chunk-of-16byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [16 x i8], align 64 %init = load <16 x i8>, ptr %src, align 1 store <16 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1 store <1 x i8> %chunk, ptr %dst ret void } define void @load-2byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-2byte-chunk-of-16byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [16 x i8], align 64 %init = load <16 x i8>, ptr %src, align 1 store <16 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1 store <2 x i8> %chunk, ptr %dst ret void } define void @load-4byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-4byte-chunk-of-16byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [16 x i8], align 64 %init = load <16 x i8>, ptr %src, align 1 store <16 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1 store <4 x i8> %chunk, ptr %dst ret void } define void @load-8byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; CHECK-ALL-LABEL: @load-8byte-chunk-of-16byte-alloca( ; CHECK-ALL-NEXT: [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64 ; CHECK-ALL-NEXT: [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1 ; CHECK-ALL-NEXT: store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64 ; CHECK-ALL-NEXT: [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]] ; CHECK-ALL-NEXT: [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1 ; CHECK-ALL-NEXT: store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8 ; CHECK-ALL-NEXT: ret void ; %intermediate = alloca [16 x i8], align 64 %init = load <16 x i8>, ptr %src, align 1 store <16 x i8> %init, ptr %intermediate, align 64 %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1 store <8 x i8> %chunk, ptr %dst ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-BE-32: {{.*}} ; CHECK-BE-64: {{.*}} ; CHECK-LE-32: {{.*}} ; CHECK-LE-64: {{.*}} ; CHECK-SCALAR: {{.*}} ; CHECK-SCALAR-32: {{.*}} ; CHECK-SCALAR-64: {{.*}}
llvm source #2
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
clang (assertions trunk)
clang (trunk)
clang 10.0.0
clang 10.0.1
clang 11.0.0
clang 11.0.1
clang 12.0.0
clang 12.0.1
clang 13.0.0
clang 14.0.0
clang 15.0.0
clang 16.0.0
clang 17.0.1
clang 18.1.0
clang 4.0.1
clang 5.0.0
clang 6.0.0
clang 7.0.0
clang 8.0.0
clang 9.0.0
llc (assertions trunk)
llc (trunk)
llc 10.0.0
llc 10.0.1
llc 11.0.0
llc 11.0.1
llc 12.0.0
llc 12.0.1
llc 13.0.0
llc 14.0.0
llc 15.0.0
llc 16.0.0
llc 17.0.1
llc 18.1.0
llc 3.2
llc 3.3
llc 3.9.1
llc 4.0.0
llc 4.0.1
llc 5.0.0
llc 6.0.0
llc 7.0.0
llc 8.0.0
llc 9.0.0
opt (assertions trunk)
opt (trunk)
opt 10.0.0
opt 10.0.1
opt 11.0.0
opt 11.0.1
opt 12.0.0
opt 12.0.1
opt 13.0.0
opt 14.0.0
opt 15.0.0
opt 16.0.0
opt 17.0.1
opt 18.1.0
opt 3.2
opt 3.3
opt 3.9.1
opt 4.0.0
opt 4.0.1
opt 5.0.0
opt 6.0.0
opt 7.0.0
opt 8.0.0
opt 9.0.0
Options
Source code
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-NO-SHLD,X64-NO-BMI2-NO-SHLD ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-SHLD,X64-HAVE-BMI2-HAVE-SHLD ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-NO-SHLD,X32-NO-BMI2-NO-SHLD ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-SHLD,X32-NO-BMI2-HAVE-SHLD ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-NO-SHLD,X32-HAVE-BMI2-NO-SHLD ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-SHLD,X32-HAVE-BMI2-HAVE-SHLD ; no @load_1byte_chunk_of_1byte_alloca define void @load_1byte_chunk_of_2byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax ; X64-NO-BMI2-NEXT: movb %al, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: movzwl (%rdi), %eax ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax ; X64-BMI2-NEXT: movb %al, (%rdx) ; X64-BMI2-NEXT: retq ; ; X32-NO-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca: ; X32-NO-BMI2: # %bb.0: ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NEXT: movzwl (%eax), %eax ; X32-NO-BMI2-NEXT: shll $3, %ecx ; X32-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X32-NO-BMI2-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NEXT: movb %al, (%edx) ; X32-NO-BMI2-NEXT: retl ; ; X32-BMI2-LABEL: load_1byte_chunk_of_2byte_alloca: ; X32-BMI2: # %bb.0: ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-BMI2-NEXT: movzwl (%edx), %edx ; X32-BMI2-NEXT: shll $3, %ecx ; X32-BMI2-NEXT: shrxl %ecx, %edx, %ecx ; X32-BMI2-NEXT: movb %cl, (%eax) ; X32-BMI2-NEXT: retl %init = load <2 x i8>, ptr %src, align 1 %intermediate.val.frozen = freeze <2 x i8> %init %intermediate.val.frozen.bits = bitcast <2 x i8> %intermediate.val.frozen to i16 %byteOff.tr = trunc i64 %byteOff to i16 %byteOff.numbits.wide = shl i16 %byteOff.tr, 3 %intermediate.val.frozen.bits.positioned = lshr i16 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i16 %intermediate.val.frozen.bits.positioned to i8 %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 store <1 x i8> %1, ptr %dst, align 1 ret void } ; no @load_2byte_chunk_of_2byte_alloca define void @load_1byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movl (%rdi), %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax ; X64-NO-BMI2-NEXT: movb %al, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax ; X64-BMI2-NEXT: movb %al, (%rdx) ; X64-BMI2-NEXT: retq ; ; X32-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca: ; X32-NO-BMI2: # %bb.0: ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NEXT: movl (%eax), %eax ; X32-NO-BMI2-NEXT: shll $3, %ecx ; X32-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X32-NO-BMI2-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NEXT: movb %al, (%edx) ; X32-NO-BMI2-NEXT: retl ; ; X32-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca: ; X32-BMI2: # %bb.0: ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-BMI2-NEXT: shll $3, %ecx ; X32-BMI2-NEXT: shrxl %ecx, (%edx), %ecx ; X32-BMI2-NEXT: movb %cl, (%eax) ; X32-BMI2-NEXT: retl %init = load <4 x i8>, ptr %src, align 1 %intermediate.val.frozen = freeze <4 x i8> %init %intermediate.val.frozen.bits = bitcast <4 x i8> %intermediate.val.frozen to i32 %byteOff.tr = trunc i64 %byteOff to i32 %byteOff.numbits.wide = shl i32 %byteOff.tr, 3 %intermediate.val.frozen.bits.positioned = lshr i32 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i32 %intermediate.val.frozen.bits.positioned to i8 %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 store <1 x i8> %1, ptr %dst, align 1 ret void } define void @load_2byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movl (%rdi), %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax ; X64-NO-BMI2-NEXT: movw %ax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax ; X64-BMI2-NEXT: movw %ax, (%rdx) ; X64-BMI2-NEXT: retq ; ; X32-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca: ; X32-NO-BMI2: # %bb.0: ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-NEXT: movl (%edx), %edx ; X32-NO-BMI2-NEXT: shll $3, %ecx ; X32-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X32-NO-BMI2-NEXT: shrl %cl, %edx ; X32-NO-BMI2-NEXT: movw %dx, (%eax) ; X32-NO-BMI2-NEXT: retl ; ; X32-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca: ; X32-BMI2: # %bb.0: ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-BMI2-NEXT: shll $3, %ecx ; X32-BMI2-NEXT: shrxl %ecx, (%edx), %ecx ; X32-BMI2-NEXT: movw %cx, (%eax) ; X32-BMI2-NEXT: retl %init = load <4 x i8>, ptr %src, align 1 %intermediate.val.frozen = freeze <4 x i8> %init %intermediate.val.frozen.bits = bitcast <4 x i8> %intermediate.val.frozen to i32 %byteOff.tr = trunc i64 %byteOff to i32 %byteOff.numbits.wide = shl i32 %byteOff.tr, 3 %intermediate.val.frozen.bits.positioned = lshr i32 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i32 %intermediate.val.frozen.bits.positioned to i16 store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 ret void } ; no @load_4byte_chunk_of_4byte_alloca define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movq (%rdi), %rax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movb %al, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_1byte_chunk_of_8byte_alloca: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax ; X64-BMI2-NEXT: movb %al, (%rdx) ; X64-BMI2-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %edi ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%edx) ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, (%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_8byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <8 x i8> %init %intermediate.val.frozen.bits = bitcast <8 x i8> %intermediate.val.frozen to i64 %intermediate.val.frozen.bits.positioned = lshr i64 %intermediate.val.frozen.bits, %byteOff.numbits %intermediate.val.frozen.bits.positioned.extracted = trunc i64 %intermediate.val.frozen.bits.positioned to i8 %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 store <1 x i8> %1, ptr %dst, align 1 ret void } define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movq (%rdi), %rax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movw %ax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_2byte_chunk_of_8byte_alloca: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax ; X64-BMI2-NEXT: movw %ax, (%rdx) ; X64-BMI2-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movw %si, (%edx) ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_8byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <8 x i8> %init %intermediate.val.frozen.bits = bitcast <8 x i8> %intermediate.val.frozen to i64 %intermediate.val.frozen.bits.positioned = lshr i64 %intermediate.val.frozen.bits, %byteOff.numbits %intermediate.val.frozen.bits.positioned.extracted = trunc i64 %intermediate.val.frozen.bits.positioned to i16 store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 ret void } define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movq (%rdi), %rax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NEXT: movl %eax, (%rdx) ; X64-NO-BMI2-NEXT: retq ; ; X64-BMI2-LABEL: load_4byte_chunk_of_8byte_alloca: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax ; X64-BMI2-NEXT: movl %eax, (%rdx) ; X64-BMI2-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%edx) ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_8byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <8 x i8> %init %intermediate.val.frozen.bits = bitcast <8 x i8> %intermediate.val.frozen to i64 %intermediate.val.frozen.bits.positioned = lshr i64 %intermediate.val.frozen.bits, %byteOff.numbits %intermediate.val.frozen.bits.positioned.extracted = trunc i64 %intermediate.val.frozen.bits.positioned to i32 store i32 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 4 ret void } ; no @load_8byte_chunk_of_8byte_alloca define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movb %al, (%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %al, (%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al ; X32-NO-BMI2-NO-SHLD-NEXT: negb %al ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $4, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ebp # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i8 %1 = insertelement <1 x i8> poison, i8 %intermediate.val.frozen.bits.positioned.extracted, i64 0 store <1 x i8> %1, ptr %dst, align 1 ret void } define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al ; X32-NO-BMI2-NO-SHLD-NEXT: negb %al ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movw %cx, (%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%ecx) ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %di, (%eax) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i16 store i16 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 2 ret void } define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al ; X32-NO-BMI2-NO-SHLD-NEXT: negb %al ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i32 store i32 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 4 ret void } define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-NO-SHLD: # %bb.0: ; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax ; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: ; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi ; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8 ; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, (%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-NO-SHLD: # %bb.0: ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh ; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh ; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh ; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh ; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) ; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: retl ; ; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X32-NO-BMI2-HAVE-SHLD: # %bb.0: ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) ; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-NO-SHLD: # %bb.0: ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $28, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 ; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) ; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $28, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: retl ; ; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: ; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $28, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $28, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init %intermediate.val.frozen.bits = bitcast <16 x i8> %intermediate.val.frozen to i128 %byteOff.numbits.wide = zext i64 %byteOff.numbits to i128 %intermediate.val.frozen.bits.positioned = lshr i128 %intermediate.val.frozen.bits, %byteOff.numbits.wide %intermediate.val.frozen.bits.positioned.extracted = trunc i128 %intermediate.val.frozen.bits.positioned to i64 store i64 %intermediate.val.frozen.bits.positioned.extracted, ptr %dst, align 8 ret void } ; no @load_16byte_chunk_of_16byte_alloca ; no @load_32byte_chunk_of_32byte_alloca ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} ; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} ; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}}
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
About the author
Statistics
Changelog
Version tree