[llvm] r344460 - [LegalizeTypes] Prevent an assertion from PromoteIntRes_BSWAP and PromoteIntRes_BITREVERSE if the shift amount is too large for the VT returned by getShiftAmountTy

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 13 10:47:20 PDT 2018


Author: ctopper
Date: Sat Oct 13 10:47:20 2018
New Revision: 344460

URL: http://llvm.org/viewvc/llvm-project?rev=344460&view=rev
Log:
[LegalizeTypes] Prevent an assertion from PromoteIntRes_BSWAP and PromoteIntRes_BITREVERSE if the shift amount is too large for the VT returned by getShiftAmountTy

Summary:
getShiftAmountTy for X86 returns MVT::i8. If a BSWAP or BITREVERSE is created that requires promotion and the difference between the original VT and the promoted VT is more than 255 then we won't able to create the constant.

This patch adds a check to replace the result from getShiftAmountTy to MVT::i32 if the difference won't fit. This should get legalized later when the shift is ultimately expanded since its clearly an illegal type that we're only promoting to make it a power of 2 bit width. Alternatively we could base the decision completely on the largest shift amount the promoted VT could use.

Vectors should be immune here because getShiftAmountTy always returns the incoming VT for vectors. Only the scalar shift amount can be changed by the targets.

Reviewers: eli.friedman, RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53232

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/trunk/test/CodeGen/X86/bitreverse.ll
    llvm/trunk/test/CodeGen/X86/bswap.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=344460&r1=344459&r2=344460&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Sat Oct 13 10:47:20 2018
@@ -311,6 +311,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_
                      CreateStackStoreLoad(InOp, OutVT));
 }
 
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
+static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
+                                       const TargetLowering &TLI,
+                                       SelectionDAG &DAG) {
+  EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+  // If the value won't fit in the prefered type, just use something safe. It
+  // will be legalized when the shift is expanded.
+  if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+    ShiftVT = MVT::i32;
+  return ShiftVT;
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
   EVT OVT = N->getValueType(0);
@@ -318,10 +331,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  return DAG.getNode(
-      ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
-      DAG.getConstant(DiffBits, dl,
-                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, dl, ShiftVT));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -331,10 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  return DAG.getNode(
-      ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
-      DAG.getConstant(DiffBits, dl,
-                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  return DAG.getNode(ISD::SRL, dl, NVT,
+                     DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, dl, ShiftVT));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {

Modified: llvm/trunk/test/CodeGen/X86/bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitreverse.ll?rev=344460&r1=344459&r2=344460&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitreverse.ll Sat Oct 13 10:47:20 2018
@@ -523,3 +523,621 @@ define <2 x i16> @undef_v2i16() {
   %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
   ret <2 x i16> %b
 }
+
+; Make sure we don't assert during type legalization promoting a large
+; bitreverse due to the need for a large shift that won't fit in the i8 returned
+; from getShiftAmountTy.
+define i528 @large_promotion(i528 %A) nounwind {
+; X86-LABEL: large_promotion:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $56, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    bswapl %ebx
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    andl $252645135, %ebp # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ebp
+; X86-NEXT:    andl $-252645136, %ebx # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    andl $858993459, %ebp # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %ebx # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %ebx
+; X86-NEXT:    leal (%ebx,%ebp,4), %ebx
+; X86-NEXT:    movl %ebx, %ebp
+; X86-NEXT:    andl $1431633920, %ebp # imm = 0x55550000
+; X86-NEXT:    andl $-1431699456, %ebx # imm = 0xAAAA0000
+; X86-NEXT:    shrl %ebx
+; X86-NEXT:    leal (%ebx,%ebp,2), %ebx
+; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; X86-NEXT:    bswapl %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ebx
+; X86-NEXT:    andl $-252645136, %edi # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    andl $858993459, %ebx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %edi # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %edi
+; X86-NEXT:    leal (%edi,%ebx,4), %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %edi # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %edi
+; X86-NEXT:    leal (%edi,%ebx,2), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    bswapl %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %edi
+; X86-NEXT:    andl $-252645136, %esi # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %esi
+; X86-NEXT:    orl %edi, %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl $858993459, %edi # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %esi # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %esi
+; X86-NEXT:    leal (%esi,%edi,4), %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    andl $1431655765, %edi # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %esi # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %esi
+; X86-NEXT:    leal (%esi,%edi,2), %ebx
+; X86-NEXT:    bswapl %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %esi
+; X86-NEXT:    andl $-252645136, %edx # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %edx
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    andl $858993459, %esi # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %edx # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %edx
+; X86-NEXT:    leal (%edx,%esi,4), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    andl $1431655765, %esi # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %edx # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    leal (%edx,%esi,2), %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    bswapl %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %edx
+; X86-NEXT:    andl $-252645136, %ecx # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %ecx # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %ecx
+; X86-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %ecx # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %ecx
+; X86-NEXT:    leal (%ecx,%edx,2), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    andl $-252645136, %eax # imm = 0xF0F0F0F0
+; X86-NEXT:    shrl $4, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT:    andl $-858993460, %eax # imm = 0xCCCCCCCC
+; X86-NEXT:    shrl $2, %eax
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT:    andl $-1431655766, %eax # imm = 0xAAAAAAAA
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    leal (%eax,%ecx,2), %edx
+; X86-NEXT:    movl (%esp), %esi # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $16, %eax, %esi
+; X86-NEXT:    shrdl $16, %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ecx, %ebx
+; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $16, %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $16, %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $16, %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $16, %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ebp, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    shrdl $16, %ebx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $16, %eax, %ebx
+; X86-NEXT:    shrdl $16, %edi, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shrdl $16, %edx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, 60(%eax)
+; X86-NEXT:    movl %ecx, 56(%eax)
+; X86-NEXT:    movl %ebx, 52(%eax)
+; X86-NEXT:    movl %ebp, 48(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 44(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 40(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 36(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 32(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 28(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 20(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl %esi, (%eax)
+; X86-NEXT:    shrl $16, %edx
+; X86-NEXT:    movw %dx, 64(%eax)
+; X86-NEXT:    addl $56, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: large_promotion:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; X64-NEXT:    bswapq %rbx
+; X64-NEXT:    movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F
+; X64-NEXT:    movq %rbx, %r10
+; X64-NEXT:    andq %r13, %r10
+; X64-NEXT:    shlq $4, %r10
+; X64-NEXT:    movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0
+; X64-NEXT:    andq %rax, %rbx
+; X64-NEXT:    shrq $4, %rbx
+; X64-NEXT:    orq %r10, %rbx
+; X64-NEXT:    movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
+; X64-NEXT:    movq %rbx, %r10
+; X64-NEXT:    andq %r11, %r10
+; X64-NEXT:    movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC
+; X64-NEXT:    andq %r14, %rbx
+; X64-NEXT:    shrq $2, %rbx
+; X64-NEXT:    leaq (%rbx,%r10,4), %r10
+; X64-NEXT:    movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000
+; X64-NEXT:    andq %r10, %rbx
+; X64-NEXT:    movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000
+; X64-NEXT:    andq %r10, %rdi
+; X64-NEXT:    shrq %rdi
+; X64-NEXT:    leaq (%rdi,%rbx,2), %rdi
+; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    bswapq %rbp
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    andq %r13, %rdi
+; X64-NEXT:    shlq $4, %rdi
+; X64-NEXT:    andq %rax, %rbp
+; X64-NEXT:    shrq $4, %rbp
+; X64-NEXT:    orq %rdi, %rbp
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    andq %r11, %rdi
+; X64-NEXT:    andq %r14, %rbp
+; X64-NEXT:    shrq $2, %rbp
+; X64-NEXT:    leaq (%rbp,%rdi,4), %rbp
+; X64-NEXT:    movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555
+; X64-NEXT:    movq %rbp, %r10
+; X64-NEXT:    andq %rbx, %r10
+; X64-NEXT:    movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA
+; X64-NEXT:    andq %rdi, %rbp
+; X64-NEXT:    shrq %rbp
+; X64-NEXT:    leaq (%rbp,%r10,2), %rbp
+; X64-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; X64-NEXT:    bswapq %rbp
+; X64-NEXT:    movq %rbp, %r10
+; X64-NEXT:    andq %r13, %r10
+; X64-NEXT:    shlq $4, %r10
+; X64-NEXT:    andq %rax, %rbp
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    shrq $4, %rbp
+; X64-NEXT:    orq %r10, %rbp
+; X64-NEXT:    movq %rbp, %r10
+; X64-NEXT:    andq %r11, %r10
+; X64-NEXT:    andq %r14, %rbp
+; X64-NEXT:    shrq $2, %rbp
+; X64-NEXT:    leaq (%rbp,%r10,4), %rbp
+; X64-NEXT:    movq %rbp, %r10
+; X64-NEXT:    andq %rbx, %r10
+; X64-NEXT:    andq %rdi, %rbp
+; X64-NEXT:    shrq %rbp
+; X64-NEXT:    leaq (%rbp,%r10,2), %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    bswapq %r10
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    andq %r13, %rax
+; X64-NEXT:    shlq $4, %rax
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    andq %r15, %r10
+; X64-NEXT:    shrq $4, %r10
+; X64-NEXT:    orq %rax, %r10
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    andq %r11, %rax
+; X64-NEXT:    andq %r14, %r10
+; X64-NEXT:    shrq $2, %r10
+; X64-NEXT:    leaq (%r10,%rax,4), %rax
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    andq %rbx, %r10
+; X64-NEXT:    movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA
+; X64-NEXT:    andq %r15, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    leaq (%rax,%r10,2), %r10
+; X64-NEXT:    bswapq %r9
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    andq %r13, %rax
+; X64-NEXT:    shlq $4, %rax
+; X64-NEXT:    andq %rdi, %r9
+; X64-NEXT:    shrq $4, %r9
+; X64-NEXT:    orq %rax, %r9
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    andq %r11, %rax
+; X64-NEXT:    andq %r14, %r9
+; X64-NEXT:    shrq $2, %r9
+; X64-NEXT:    leaq (%r9,%rax,4), %rax
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    andq %rbx, %r9
+; X64-NEXT:    andq %r15, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    leaq (%rax,%r9,2), %r9
+; X64-NEXT:    bswapq %r8
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    andq %r13, %rax
+; X64-NEXT:    shlq $4, %rax
+; X64-NEXT:    andq %rdi, %r8
+; X64-NEXT:    shrq $4, %r8
+; X64-NEXT:    orq %rax, %r8
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    andq %r11, %rax
+; X64-NEXT:    andq %r14, %r8
+; X64-NEXT:    shrq $2, %r8
+; X64-NEXT:    leaq (%r8,%rax,4), %rax
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    andq %rbx, %r8
+; X64-NEXT:    andq %r15, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    leaq (%rax,%r8,2), %r8
+; X64-NEXT:    bswapq %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    andq %r13, %rax
+; X64-NEXT:    shlq $4, %rax
+; X64-NEXT:    andq %rdi, %rcx
+; X64-NEXT:    shrq $4, %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    andq %r11, %rax
+; X64-NEXT:    andq %r14, %rcx
+; X64-NEXT:    shrq $2, %rcx
+; X64-NEXT:    leaq (%rcx,%rax,4), %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    andq %rbx, %rcx
+; X64-NEXT:    andq %r15, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    leaq (%rax,%rcx,2), %rcx
+; X64-NEXT:    bswapq %rdx
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    andq %r13, %rax
+; X64-NEXT:    shlq $4, %rax
+; X64-NEXT:    andq %rdi, %rdx
+; X64-NEXT:    shrq $4, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    andq %r11, %rax
+; X64-NEXT:    andq %r14, %rdx
+; X64-NEXT:    shrq $2, %rdx
+; X64-NEXT:    leaq (%rdx,%rax,4), %rax
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    andq %rbx, %rdx
+; X64-NEXT:    andq %r15, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-NEXT:    bswapq %rsi
+; X64-NEXT:    andq %rsi, %r13
+; X64-NEXT:    andq %rdi, %rsi
+; X64-NEXT:    shlq $4, %r13
+; X64-NEXT:    shrq $4, %rsi
+; X64-NEXT:    orq %r13, %rsi
+; X64-NEXT:    andq %rsi, %r11
+; X64-NEXT:    andq %r14, %rsi
+; X64-NEXT:    shrq $2, %rsi
+; X64-NEXT:    leaq (%rsi,%r11,4), %rdx
+; X64-NEXT:    andq %rdx, %rbx
+; X64-NEXT:    andq %r15, %rdx
+; X64-NEXT:    shrq %rdx
+; X64-NEXT:    leaq (%rdx,%rbx,2), %rdx
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; X64-NEXT:    shrdq $48, %rdi, %rsi
+; X64-NEXT:    shrdq $48, %rbp, %rdi
+; X64-NEXT:    shrdq $48, %r10, %rbp
+; X64-NEXT:    shrdq $48, %r9, %r10
+; X64-NEXT:    shrdq $48, %r8, %r9
+; X64-NEXT:    shrdq $48, %rcx, %r8
+; X64-NEXT:    shrdq $48, %rax, %rcx
+; X64-NEXT:    shrdq $48, %rdx, %rax
+; X64-NEXT:    movq %rax, 56(%r12)
+; X64-NEXT:    movq %rcx, 48(%r12)
+; X64-NEXT:    movq %r8, 40(%r12)
+; X64-NEXT:    movq %r9, 32(%r12)
+; X64-NEXT:    movq %r10, 24(%r12)
+; X64-NEXT:    movq %rbp, 16(%r12)
+; X64-NEXT:    movq %rdi, 8(%r12)
+; X64-NEXT:    movq %rsi, (%r12)
+; X64-NEXT:    shrq $48, %rdx
+; X64-NEXT:    movw %dx, 64(%r12)
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %Z = call i528 @llvm.bitreverse.i528(i528 %A)
+  ret i528 %Z
+}
+declare i528 @llvm.bitreverse.i528(i528)

Modified: llvm/trunk/test/CodeGen/X86/bswap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bswap.ll?rev=344460&r1=344459&r2=344460&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bswap.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bswap.ll Sat Oct 13 10:47:20 2018
@@ -206,3 +206,153 @@ define i64 @finally_useful_bswap() {
   ret i64 %swapped
 }
 
+; Make sure we don't assert during type legalization promoting a large
+; bswap due to the need for a large shift that won't fit in the i8 returned
+; from getShiftAmountTy.
+define i528 @large_promotion(i528 %A) nounwind {
+; CHECK-LABEL: large_promotion:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %ebx
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    subl $44, %esp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    shrdl $16, %ecx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    bswapl %edx
+; CHECK-NEXT:    shrdl $16, %edx, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    bswapl %esi
+; CHECK-NEXT:    shrdl $16, %esi, %edx
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    bswapl %edi
+; CHECK-NEXT:    shrdl $16, %edi, %esi
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    bswapl %ebx
+; CHECK-NEXT:    shrdl $16, %ebx, %edi
+; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    bswapl %ebp
+; CHECK-NEXT:    shrdl $16, %ebp, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    shrdl $16, %ecx, %ebp
+; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    shrdl $16, %eax, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    shrdl $16, %ecx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    shrdl $16, %eax, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    bswapl %ebp
+; CHECK-NEXT:    shrdl $16, %ebp, %eax
+; CHECK-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    bswapl %ebx
+; CHECK-NEXT:    shrdl $16, %ebx, %ebp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    bswapl %esi
+; CHECK-NEXT:    shrdl $16, %esi, %ebx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    bswapl %edx
+; CHECK-NEXT:    shrdl $16, %edx, %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    shrdl $16, %ecx, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    bswapl %edi
+; CHECK-NEXT:    shrdl $16, %edi, %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl %ecx, 60(%eax)
+; CHECK-NEXT:    movl %edx, 56(%eax)
+; CHECK-NEXT:    movl %esi, 52(%eax)
+; CHECK-NEXT:    movl %ebx, 48(%eax)
+; CHECK-NEXT:    movl %ebp, 44(%eax)
+; CHECK-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 40(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 36(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 32(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 28(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 24(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 20(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 16(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 12(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 8(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, 4(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, (%eax)
+; CHECK-NEXT:    shrl $16, %edi
+; CHECK-NEXT:    movw %di, 64(%eax)
+; CHECK-NEXT:    addl $44, %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl $4
+;
+; CHECK64-LABEL: large_promotion:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    pushq %rbx
+; CHECK64-NEXT:    movq %rdi, %rax
+; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; CHECK64-NEXT:    bswapq %r10
+; CHECK64-NEXT:    bswapq %rdi
+; CHECK64-NEXT:    shrdq $48, %rdi, %r10
+; CHECK64-NEXT:    bswapq %r11
+; CHECK64-NEXT:    shrdq $48, %r11, %rdi
+; CHECK64-NEXT:    bswapq %rbx
+; CHECK64-NEXT:    shrdq $48, %rbx, %r11
+; CHECK64-NEXT:    bswapq %r9
+; CHECK64-NEXT:    shrdq $48, %r9, %rbx
+; CHECK64-NEXT:    bswapq %r8
+; CHECK64-NEXT:    shrdq $48, %r8, %r9
+; CHECK64-NEXT:    bswapq %rcx
+; CHECK64-NEXT:    shrdq $48, %rcx, %r8
+; CHECK64-NEXT:    bswapq %rdx
+; CHECK64-NEXT:    shrdq $48, %rdx, %rcx
+; CHECK64-NEXT:    bswapq %rsi
+; CHECK64-NEXT:    shrdq $48, %rsi, %rdx
+; CHECK64-NEXT:    shrq $48, %rsi
+; CHECK64-NEXT:    movq %rdx, 56(%rax)
+; CHECK64-NEXT:    movq %rcx, 48(%rax)
+; CHECK64-NEXT:    movq %r8, 40(%rax)
+; CHECK64-NEXT:    movq %r9, 32(%rax)
+; CHECK64-NEXT:    movq %rbx, 24(%rax)
+; CHECK64-NEXT:    movq %r11, 16(%rax)
+; CHECK64-NEXT:    movq %rdi, 8(%rax)
+; CHECK64-NEXT:    movq %r10, (%rax)
+; CHECK64-NEXT:    movw %si, 64(%rax)
+; CHECK64-NEXT:    popq %rbx
+; CHECK64-NEXT:    retq
+  %Z = call i528 @llvm.bswap.i528(i528 %A)
+  ret i528 %Z
+}
+declare i528 @llvm.bswap.i528(i528)




More information about the llvm-commits mailing list