[llvm] f067dd8 - [LegalizeTypes] When promoting BITREVERSE/BSWAP don't take the shift amount into account when determining the shift amount VT.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 27 12:23:04 PDT 2019


Author: Craig Topper
Date: 2019-10-27T12:20:35-07:00
New Revision: f067dd839eca3103e8afc49c6e0a74d944f25fdd

URL: https://github.com/llvm/llvm-project/commit/f067dd839eca3103e8afc49c6e0a74d944f25fdd
DIFF: https://github.com/llvm/llvm-project/commit/f067dd839eca3103e8afc49c6e0a74d944f25fdd.diff

LOG: [LegalizeTypes] When promoting BITREVERSE/BSWAP don't take the shift amount into account when determining the shift amount VT.

If the target's preferred shift amount VT can't hold any shift
amount for the promoted VT, we should use i32. The specific shift
amount shouldn't matter. The type will be adjusted later when the
shift itself is type legalized. This avoids an assert in getNode.

Fixes PR43820.

Added: 
    llvm/test/CodeGen/X86/pr43820.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 757f3911b113..0e193ba383b5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -365,15 +365,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
                      CreateStackStoreLoad(InOp, OutVT));
 }
 
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
 // in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
-                                       const TargetLowering &TLI,
+static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
                                        SelectionDAG &DAG) {
   EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-  // If the value won't fit in the prefered type, just use something safe. It
-  // will be legalized when the shift is expanded.
-  if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+  // If any possible shift value won't fit in the prefered type, just use
+  // something safe. It will be legalized when the shift is expanded.
+  if (!ShiftVT.isVector() &&
+      ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
     ShiftVT = MVT::i32;
   return ShiftVT;
 }
@@ -385,7 +385,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
   return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
                      DAG.getConstant(DiffBits, dl, ShiftVT));
 }
@@ -397,7 +397,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
   return DAG.getNode(ISD::SRL, dl, NVT,
                      DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
                      DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -1058,8 +1058,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   if (N->getOpcode() == ISD::UMULO) {
     // Unsigned overflow occurred if the high part is non-zero.
     unsigned Shift = SmallVT.getScalarSizeInBits();
-    EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
-                                              TLI, DAG);
+    EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
                              DAG.getConstant(Shift, DL, ShiftTy));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,

diff  --git a/llvm/test/CodeGen/X86/pr43820.ll b/llvm/test/CodeGen/X86/pr43820.ll
new file mode 100644
index 000000000000..5bdf7872d61a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr43820.ll
@@ -0,0 +1,383 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define i1000 @square(i1000 %A) nounwind {
+; CHECK-LABEL: square:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT:    bswapq %rbx
+; CHECK-NEXT:    movabsq $1085102592571150095, %rdi # imm = 0xF0F0F0F0F0F0F0F
+; CHECK-NEXT:    movq %rbx, %rbp
+; CHECK-NEXT:    andq %rdi, %rbp
+; CHECK-NEXT:    shlq $4, %rbp
+; CHECK-NEXT:    movabsq $-1085102592571150096, %r11 # imm = 0xF0F0F0F0F0F0F0F0
+; CHECK-NEXT:    andq %r11, %rbx
+; CHECK-NEXT:    movq %r11, %rax
+; CHECK-NEXT:    shrq $4, %rbx
+; CHECK-NEXT:    orq %rbp, %rbx
+; CHECK-NEXT:    movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
+; CHECK-NEXT:    movq %rbx, %r14
+; CHECK-NEXT:    andq %r11, %r14
+; CHECK-NEXT:    movabsq $-3689348814741910324, %rbp # imm = 0xCCCCCCCCCCCCCCCC
+; CHECK-NEXT:    andq %rbp, %rbx
+; CHECK-NEXT:    movq %rbp, %r15
+; CHECK-NEXT:    shrq $2, %rbx
+; CHECK-NEXT:    leaq (%rbx,%r14,4), %r14
+; CHECK-NEXT:    movabsq $6148914691230924800, %rbx # imm = 0x5555555555000000
+; CHECK-NEXT:    andq %r14, %rbx
+; CHECK-NEXT:    movabsq $-6148914691247702016, %rbp # imm = 0xAAAAAAAAAA000000
+; CHECK-NEXT:    andq %r14, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%rbx,2), %rbx
+; CHECK-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    bswapq %r10
+; CHECK-NEXT:    movq %r10, %rbx
+; CHECK-NEXT:    andq %rdi, %rbx
+; CHECK-NEXT:    shlq $4, %rbx
+; CHECK-NEXT:    andq %rax, %r10
+; CHECK-NEXT:    shrq $4, %r10
+; CHECK-NEXT:    orq %rbx, %r10
+; CHECK-NEXT:    movq %r10, %rbx
+; CHECK-NEXT:    andq %r11, %rbx
+; CHECK-NEXT:    andq %r15, %r10
+; CHECK-NEXT:    shrq $2, %r10
+; CHECK-NEXT:    leaq (%r10,%rbx,4), %rbp
+; CHECK-NEXT:    movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    movabsq $-6148914691236517206, %r13 # imm = 0xAAAAAAAAAAAAAAAA
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %rax, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %rax, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %rax, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %rax, %rbp
+; CHECK-NEXT:    movq %rax, %r14
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %r14, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %r14, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %r14, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %r14, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; CHECK-NEXT:    bswapq %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rdi, %r10
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    andq %r14, %rbp
+; CHECK-NEXT:    shrq $4, %rbp
+; CHECK-NEXT:    orq %r10, %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %r11, %r10
+; CHECK-NEXT:    andq %r15, %rbp
+; CHECK-NEXT:    shrq $2, %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r10
+; CHECK-NEXT:    andq %rbx, %r10
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r10,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    bswapq %r9
+; CHECK-NEXT:    movq %r9, %rbp
+; CHECK-NEXT:    andq %rdi, %rbp
+; CHECK-NEXT:    shlq $4, %rbp
+; CHECK-NEXT:    andq %r14, %r9
+; CHECK-NEXT:    shrq $4, %r9
+; CHECK-NEXT:    orq %rbp, %r9
+; CHECK-NEXT:    movq %r9, %rbp
+; CHECK-NEXT:    andq %r11, %rbp
+; CHECK-NEXT:    andq %r15, %r9
+; CHECK-NEXT:    shrq $2, %r9
+; CHECK-NEXT:    leaq (%r9,%rbp,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r9
+; CHECK-NEXT:    andq %rbx, %r9
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r9,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    bswapq %r8
+; CHECK-NEXT:    movq %r8, %rbp
+; CHECK-NEXT:    andq %rdi, %rbp
+; CHECK-NEXT:    shlq $4, %rbp
+; CHECK-NEXT:    andq %r14, %r8
+; CHECK-NEXT:    shrq $4, %r8
+; CHECK-NEXT:    orq %rbp, %r8
+; CHECK-NEXT:    movq %r8, %rbp
+; CHECK-NEXT:    andq %r11, %rbp
+; CHECK-NEXT:    andq %r15, %r8
+; CHECK-NEXT:    movq %r15, %r9
+; CHECK-NEXT:    shrq $2, %r8
+; CHECK-NEXT:    leaq (%r8,%rbp,4), %rbp
+; CHECK-NEXT:    movq %rbp, %r8
+; CHECK-NEXT:    andq %rbx, %r8
+; CHECK-NEXT:    andq %r13, %rbp
+; CHECK-NEXT:    shrq %rbp
+; CHECK-NEXT:    leaq (%rbp,%r8,2), %rbp
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    bswapq %rcx
+; CHECK-NEXT:    movq %rcx, %rbp
+; CHECK-NEXT:    andq %rdi, %rbp
+; CHECK-NEXT:    shlq $4, %rbp
+; CHECK-NEXT:    andq %r14, %rcx
+; CHECK-NEXT:    shrq $4, %rcx
+; CHECK-NEXT:    orq %rbp, %rcx
+; CHECK-NEXT:    movq %rcx, %rbp
+; CHECK-NEXT:    andq %r11, %rbp
+; CHECK-NEXT:    andq %r15, %rcx
+; CHECK-NEXT:    shrq $2, %rcx
+; CHECK-NEXT:    leaq (%rcx,%rbp,4), %rcx
+; CHECK-NEXT:    movq %rcx, %rbp
+; CHECK-NEXT:    andq %rbx, %rbp
+; CHECK-NEXT:    andq %r13, %rcx
+; CHECK-NEXT:    shrq %rcx
+; CHECK-NEXT:    leaq (%rcx,%rbp,2), %r15
+; CHECK-NEXT:    bswapq %rdx
+; CHECK-NEXT:    movq %rdx, %rbp
+; CHECK-NEXT:    andq %rdi, %rbp
+; CHECK-NEXT:    shlq $4, %rbp
+; CHECK-NEXT:    andq %r14, %rdx
+; CHECK-NEXT:    shrq $4, %rdx
+; CHECK-NEXT:    orq %rbp, %rdx
+; CHECK-NEXT:    movq %rdx, %rbp
+; CHECK-NEXT:    andq %r11, %rbp
+; CHECK-NEXT:    andq %r9, %rdx
+; CHECK-NEXT:    shrq $2, %rdx
+; CHECK-NEXT:    leaq (%rdx,%rbp,4), %rdx
+; CHECK-NEXT:    movq %rdx, %rbp
+; CHECK-NEXT:    andq %rbx, %rbp
+; CHECK-NEXT:    andq %r13, %rdx
+; CHECK-NEXT:    shrq %rdx
+; CHECK-NEXT:    leaq (%rdx,%rbp,2), %rdx
+; CHECK-NEXT:    bswapq %rsi
+; CHECK-NEXT:    andq %rsi, %rdi
+; CHECK-NEXT:    andq %r14, %rsi
+; CHECK-NEXT:    shlq $4, %rdi
+; CHECK-NEXT:    shrq $4, %rsi
+; CHECK-NEXT:    orq %rdi, %rsi
+; CHECK-NEXT:    andq %rsi, %r11
+; CHECK-NEXT:    andq %r9, %rsi
+; CHECK-NEXT:    shrq $2, %rsi
+; CHECK-NEXT:    leaq (%rsi,%r11,4), %rsi
+; CHECK-NEXT:    andq %rsi, %rbx
+; CHECK-NEXT:    andq %r13, %rsi
+; CHECK-NEXT:    shrq %rsi
+; CHECK-NEXT:    leaq (%rsi,%rbx,2), %r13
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rax, %r11
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rcx, %rax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rbp, %rcx
+; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %r12, %rbp
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %r14, %r12
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rbx, %r14
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %r10, %rbx
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %r9, %r10
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %r8, %r9
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rdi, %r8
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rsi, %rdi
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT:    shrdq $24, %rax, %rsi
+; CHECK-NEXT:    shrdq $24, %r15, %rax
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrdq $24, %rdx, %r15
+; CHECK-NEXT:    shrdq $24, %r13, %rdx
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT:    movq %rdx, 112(%rax)
+; CHECK-NEXT:    movq %r15, 104(%rax)
+; CHECK-NEXT:    movq %rcx, 96(%rax)
+; CHECK-NEXT:    movq %rsi, 88(%rax)
+; CHECK-NEXT:    movq %rdi, 80(%rax)
+; CHECK-NEXT:    movq %r8, 72(%rax)
+; CHECK-NEXT:    movq %r9, 64(%rax)
+; CHECK-NEXT:    movq %r10, 56(%rax)
+; CHECK-NEXT:    movq %rbx, 48(%rax)
+; CHECK-NEXT:    movq %r14, 40(%rax)
+; CHECK-NEXT:    movq %r12, 32(%rax)
+; CHECK-NEXT:    movq %rbp, 24(%rax)
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT:    movq %rcx, 16(%rax)
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT:    movq %rcx, 8(%rax)
+; CHECK-NEXT:    movq %r11, (%rax)
+; CHECK-NEXT:    movq %r13, %rcx
+; CHECK-NEXT:    shrq $56, %r13
+; CHECK-NEXT:    movb %r13b, 124(%rax)
+; CHECK-NEXT:    shrq $24, %rcx
+; CHECK-NEXT:    movl %ecx, 120(%rax)
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+  %Z = call i1000 @llvm.bitreverse.i1000(i1000 %A)
+  ret i1000 %Z
+}
+
+declare i1000 @llvm.bitreverse.i1000(i1000)


        


More information about the llvm-commits mailing list