[llvm] b3b4727 - [X86] Replace (most) X86ISD::SHLD/SHRD usage with ISD::FSHL/FSHR generic opcodes (PR39467)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 04:33:27 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-11T11:17:49Z
New Revision: b3b4727a3e7e170189e58ee8a6409112839a87b0
URL: https://github.com/llvm/llvm-project/commit/b3b4727a3e7e170189e58ee8a6409112839a87b0
DIFF: https://github.com/llvm/llvm-project/commit/b3b4727a3e7e170189e58ee8a6409112839a87b0.diff
LOG: [X86] Replace (most) X86ISD::SHLD/SHRD usage with ISD::FSHL/FSHR generic opcodes (PR39467)
For i32 and i64 cases, X86ISD::SHLD/SHRD are close enough to ISD::FSHL/FSHR that we can use them directly, we just need to account for the operand commutation for SHRD.
The i16 SHLD/SHRD case is annoying as the shift amount is modulo-32 (vs funnel shift modulo-16), so I've added X86ISD::FSHL/FSHR equivalents, which matches the generic implementation in all other terms.
Something I'm slightly concerned with is that ISD::FSHL/FSHR legality is controlled by the Subtarget.isSHLDSlow() feature flag - we don't normally use non-ISA features for this but it allows the DAG combines to continue to operate after legalization in a lot more cases.
The X86 *bits.ll changes are all affected by the same issue - we now have a "FSHR(-1,-1,amt) -> ROTR(-1,amt) -> (-1)" simplification that reduces the dependencies enough for the branch fall through code to mess up.
Differential Revision: https://reviews.llvm.org/D75748
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/lib/Target/X86/X86InstrCompiler.td
llvm/lib/Target/X86/X86InstrInfo.td
llvm/lib/Target/X86/X86InstrShiftRotate.td
llvm/test/CodeGen/X86/clear-highbits.ll
llvm/test/CodeGen/X86/clear-lowbits.ll
llvm/test/CodeGen/X86/extract-bits.ll
llvm/test/CodeGen/X86/extract-lowbits.ll
llvm/test/CodeGen/X86/fshl.ll
llvm/test/CodeGen/X86/fshr.ll
llvm/test/CodeGen/X86/shift-combine.ll
llvm/test/CodeGen/X86/shift-parts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a56dd0427127..4c86c87fb33b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -207,10 +207,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+ // For slow shld targets we only lower for code size.
+ LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
+
setOperationAction(ShiftOp , MVT::i16 , Custom);
- setOperationAction(ShiftOp , MVT::i32 , Custom);
+ setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
if (Subtarget.is64Bit())
- setOperationAction(ShiftOp , MVT::i64 , Custom);
+ setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
}
if (!Subtarget.useSoftFloat()) {
@@ -18860,16 +18863,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
- if (IsFSHR)
- std::swap(Op0, Op1);
-
// i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
- if (VT == MVT::i16)
+ if (VT == MVT::i16) {
Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
DAG.getConstant(15, DL, Amt.getValueType()));
+ unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
+ return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
+ }
- unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
- return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+ return Op;
}
// Try to use a packed vector operation to handle i64 on 32-bit targets when
@@ -29963,8 +29965,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
NODE_NAME_CASE(BSF)
NODE_NAME_CASE(BSR)
- NODE_NAME_CASE(SHLD)
- NODE_NAME_CASE(SHRD)
+ NODE_NAME_CASE(FSHL)
+ NODE_NAME_CASE(FSHR)
NODE_NAME_CASE(FAND)
NODE_NAME_CASE(FANDN)
NODE_NAME_CASE(FOR)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d102685e1cbe..0da22acc99de 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -33,10 +33,12 @@ namespace llvm {
/// Bit scan reverse.
BSR,
- /// Double shift instructions. These correspond to
- /// X86::SHLDxx and X86::SHRDxx instructions.
- SHLD,
- SHRD,
+ /// X86 funnel/double shift i16 instructions. These correspond to
+ /// X86::SHLDW and X86::SHRDW instructions which have
diff erent amt
+ /// modulo rules to generic funnel shifts.
+ /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
+ FSHL,
+ FSHR,
/// Bitwise logical AND of floating point values. This corresponds
/// to X86::ANDPS or X86::ANDPD.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index fe1efd2fc097..0ad53c42550e 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1782,21 +1782,24 @@ multiclass MaskedRotateAmountPats<SDNode frag, string name> {
defm : MaskedRotateAmountPats<rotl, "ROL">;
defm : MaskedRotateAmountPats<rotr, "ROR">;
-// Double shift amount is implicitly masked.
-multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {
- // (shift x (and y, 31)) ==> (shift x, y)
- def : Pat<(frag GR16:$src1, GR16:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;
- def : Pat<(frag GR32:$src1, GR32:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;
-
- // (shift x (and y, 63)) ==> (shift x, y)
- def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
-}
-
-defm : MaskedDoubleShiftAmountPats<X86shld, "SHLD">;
-defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;
+// Double "funnel" shift amount is implicitly masked.
+// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32)
+def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y)
+def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+// (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y)
+def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
let Predicates = [HasBMI2] in {
let AddedComplexity = 1 in {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 8629cf728b45..173b5ef95238 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -143,8 +143,8 @@ def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
-def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
-def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>;
+def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 9d974b716dda..ddc273b1706d 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -661,32 +661,32 @@ let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, CL))]>,
TB, OpSize16;
def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, CL))]>,
TB, OpSize16;
def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>,
+ [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, CL))]>,
TB, OpSize32;
def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>,
+ [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, CL))]>,
TB, OpSize32;
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, CL))]>,
TB;
def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>,
TB;
} // SchedRW
@@ -695,42 +695,42 @@ def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2,
(i8 imm:$src3)))]>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1,
(i8 imm:$src3)))]>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2,
(i8 imm:$src3)))]>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1,
(i8 imm:$src3)))]>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2,
(i8 imm:$src3)))]>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1,
(i8 imm:$src3)))]>,
TB;
} // SchedRW
@@ -739,70 +739,70 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize16;
+ [(store (X86fshl (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize16;
def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize16;
+ [(store (X86fshr GR16:$src2, (loadi16 addr:$dst), CL),
+ addr:$dst)]>, TB, OpSize16;
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ [(store (fshl (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)]>, TB, OpSize32;
def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB, OpSize32;
+ [(store (fshr GR32:$src2, (loadi32 addr:$dst), CL),
+ addr:$dst)]>, TB, OpSize32;
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ [(store (fshl (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL),
+ addr:$dst)]>, TB;
} // SchedRW
let SchedRW = [WriteSHDmri] in {
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (X86fshl (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize16;
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (X86fshr GR16:$src2, (loadi16 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize16;
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshl (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize32;
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshr GR32:$src2, (loadi32 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize32;
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshl (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB;
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshr GR64:$src2, (loadi64 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB;
} // SchedRW
diff --git a/llvm/test/CodeGen/X86/clear-highbits.ll b/llvm/test/CodeGen/X86/clear-highbits.ll
index 10e96c1b1ca3..be34c5de550a 100644
--- a/llvm/test/CodeGen/X86/clear-highbits.ll
+++ b/llvm/test/CodeGen/X86/clear-highbits.ll
@@ -513,33 +513,36 @@ define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind
define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c0:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB13_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %esi
+; X86-FALLBACK0-NEXT: shrl %cl, %esi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB13_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB13_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %esi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB13_3
+; X86-FALLBACK0-NEXT: .LBB13_1:
+; X86-FALLBACK0-NEXT: movl %esi, %eax
+; X86-FALLBACK0-NEXT: .LBB13_3:
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-FALLBACK0-NEXT: popl %esi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c0:
; X86-FALLBACK1: # %bb.0:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %esi
-; X86-FALLBACK1-NEXT: shrl %cl, %esi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK1-NEXT: cmovel %esi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %esi, %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK1-NEXT: popl %esi
@@ -549,14 +552,13 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK2: # %bb.0:
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %esi
-; X86-FALLBACK2-NEXT: shrl %cl, %esi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK2-NEXT: cmovel %esi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %esi, %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK2-NEXT: popl %esi
@@ -568,11 +570,10 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: popl %esi
@@ -600,33 +601,36 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c1_indexzext:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB14_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %esi
+; X86-FALLBACK0-NEXT: shrl %cl, %esi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB14_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB14_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %esi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB14_3
+; X86-FALLBACK0-NEXT: .LBB14_1:
+; X86-FALLBACK0-NEXT: movl %esi, %eax
+; X86-FALLBACK0-NEXT: .LBB14_3:
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-FALLBACK0-NEXT: popl %esi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c1_indexzext:
; X86-FALLBACK1: # %bb.0:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %esi
-; X86-FALLBACK1-NEXT: shrl %cl, %esi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK1-NEXT: cmovel %esi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %esi, %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK1-NEXT: popl %esi
@@ -636,14 +640,13 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
; X86-FALLBACK2: # %bb.0:
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %esi
-; X86-FALLBACK2-NEXT: shrl %cl, %esi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK2-NEXT: cmovel %esi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %esi, %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK2-NEXT: popl %esi
@@ -655,11 +658,10 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: popl %esi
@@ -689,22 +691,26 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c2_load:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %edi
; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB15_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %edi
+; X86-FALLBACK0-NEXT: shrl %cl, %edi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB15_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB15_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %edi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB15_3
+; X86-FALLBACK0-NEXT: .LBB15_1:
+; X86-FALLBACK0-NEXT: movl %edi, %eax
+; X86-FALLBACK0-NEXT: .LBB15_3:
; X86-FALLBACK0-NEXT: andl (%esi), %eax
; X86-FALLBACK0-NEXT: andl 4(%esi), %edx
; X86-FALLBACK0-NEXT: popl %esi
+; X86-FALLBACK0-NEXT: popl %edi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c2_load:
@@ -713,14 +719,13 @@ define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %edi
-; X86-FALLBACK1-NEXT: shrl %cl, %edi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK1-NEXT: cmovel %edi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %edi, %eax
; X86-FALLBACK1-NEXT: andl (%esi), %eax
; X86-FALLBACK1-NEXT: andl 4(%esi), %edx
; X86-FALLBACK1-NEXT: popl %esi
@@ -733,14 +738,13 @@ define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %edi
-; X86-FALLBACK2-NEXT: shrl %cl, %edi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK2-NEXT: cmovel %edi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %edi, %eax
; X86-FALLBACK2-NEXT: andl (%esi), %eax
; X86-FALLBACK2-NEXT: andl 4(%esi), %edx
; X86-FALLBACK2-NEXT: popl %esi
@@ -749,21 +753,20 @@ define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
;
; X86-BMI2-LABEL: clear_highbits64_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
+; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %eax
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %edi, %eax
-; X86-BMI2-NEXT: cmovel %edi, %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: andl 4(%esi), %edx
+; X86-BMI2-NEXT: testb $32, %bl
+; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_highbits64_c2_load:
@@ -789,22 +792,26 @@ define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c3_load_indexzext:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %edi
; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB16_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %edi
+; X86-FALLBACK0-NEXT: shrl %cl, %edi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB16_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB16_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %edi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB16_3
+; X86-FALLBACK0-NEXT: .LBB16_1:
+; X86-FALLBACK0-NEXT: movl %edi, %eax
+; X86-FALLBACK0-NEXT: .LBB16_3:
; X86-FALLBACK0-NEXT: andl (%esi), %eax
; X86-FALLBACK0-NEXT: andl 4(%esi), %edx
; X86-FALLBACK0-NEXT: popl %esi
+; X86-FALLBACK0-NEXT: popl %edi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c3_load_indexzext:
@@ -813,14 +820,13 @@ define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwin
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %edi
-; X86-FALLBACK1-NEXT: shrl %cl, %edi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK1-NEXT: cmovel %edi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %edi, %eax
; X86-FALLBACK1-NEXT: andl (%esi), %eax
; X86-FALLBACK1-NEXT: andl 4(%esi), %edx
; X86-FALLBACK1-NEXT: popl %esi
@@ -833,14 +839,13 @@ define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwin
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %edi
-; X86-FALLBACK2-NEXT: shrl %cl, %edi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %edi, %eax
-; X86-FALLBACK2-NEXT: cmovel %edi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %edi, %eax
; X86-FALLBACK2-NEXT: andl (%esi), %eax
; X86-FALLBACK2-NEXT: andl 4(%esi), %edx
; X86-FALLBACK2-NEXT: popl %esi
@@ -849,21 +854,20 @@ define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwin
;
; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
+; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %eax
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %edi, %eax
-; X86-BMI2-NEXT: cmovel %edi, %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: andl 4(%esi), %edx
+; X86-BMI2-NEXT: testb $32, %bl
+; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
@@ -891,33 +895,36 @@ define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwin
define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK0-LABEL: clear_highbits64_c4_commutative:
; X86-FALLBACK0: # %bb.0:
+; X86-FALLBACK0-NEXT: pushl %esi
; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-FALLBACK0-NEXT: movl $-1, %eax
-; X86-FALLBACK0-NEXT: movl $-1, %edx
-; X86-FALLBACK0-NEXT: shrl %cl, %edx
-; X86-FALLBACK0-NEXT: shrdl %cl, %eax, %eax
-; X86-FALLBACK0-NEXT: testb $32, %cl
-; X86-FALLBACK0-NEXT: je .LBB17_2
-; X86-FALLBACK0-NEXT: # %bb.1:
-; X86-FALLBACK0-NEXT: movl %edx, %eax
+; X86-FALLBACK0-NEXT: movl $-1, %esi
+; X86-FALLBACK0-NEXT: shrl %cl, %esi
; X86-FALLBACK0-NEXT: xorl %edx, %edx
-; X86-FALLBACK0-NEXT: .LBB17_2:
+; X86-FALLBACK0-NEXT: testb $32, %cl
+; X86-FALLBACK0-NEXT: jne .LBB17_1
+; X86-FALLBACK0-NEXT: # %bb.2:
+; X86-FALLBACK0-NEXT: movl %esi, %edx
+; X86-FALLBACK0-NEXT: jmp .LBB17_3
+; X86-FALLBACK0-NEXT: .LBB17_1:
+; X86-FALLBACK0-NEXT: movl %esi, %eax
+; X86-FALLBACK0-NEXT: .LBB17_3:
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-FALLBACK0-NEXT: popl %esi
; X86-FALLBACK0-NEXT: retl
;
; X86-FALLBACK1-LABEL: clear_highbits64_c4_commutative:
; X86-FALLBACK1: # %bb.0:
; X86-FALLBACK1-NEXT: pushl %esi
; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: movl $-1, %esi
-; X86-FALLBACK1-NEXT: shrl %cl, %esi
-; X86-FALLBACK1-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK1-NEXT: movl $-1, %eax
+; X86-FALLBACK1-NEXT: shrl %cl, %eax
; X86-FALLBACK1-NEXT: xorl %edx, %edx
; X86-FALLBACK1-NEXT: testb $32, %cl
-; X86-FALLBACK1-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK1-NEXT: cmovel %esi, %edx
+; X86-FALLBACK1-NEXT: cmovel %eax, %edx
+; X86-FALLBACK1-NEXT: cmovel %esi, %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK1-NEXT: popl %esi
@@ -927,14 +934,13 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind
; X86-FALLBACK2: # %bb.0:
; X86-FALLBACK2-NEXT: pushl %esi
; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: movl $-1, %esi
-; X86-FALLBACK2-NEXT: shrl %cl, %esi
-; X86-FALLBACK2-NEXT: shrdl %cl, %eax, %eax
+; X86-FALLBACK2-NEXT: movl $-1, %eax
+; X86-FALLBACK2-NEXT: shrl %cl, %eax
; X86-FALLBACK2-NEXT: xorl %edx, %edx
; X86-FALLBACK2-NEXT: testb $32, %cl
-; X86-FALLBACK2-NEXT: cmovnel %esi, %eax
-; X86-FALLBACK2-NEXT: cmovel %esi, %edx
+; X86-FALLBACK2-NEXT: cmovel %eax, %edx
+; X86-FALLBACK2-NEXT: cmovel %esi, %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-FALLBACK2-NEXT: popl %esi
@@ -946,11 +952,10 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
-; X86-BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: cmovel %esi, %edx
+; X86-BMI2-NEXT: cmovnel %esi, %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: popl %esi
@@ -1064,7 +1069,6 @@ define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK0-NEXT: movl $-1, %esi
; X86-FALLBACK0-NEXT: movl $-1, %edi
; X86-FALLBACK0-NEXT: shrl %cl, %edi
-; X86-FALLBACK0-NEXT: shrdl %cl, %esi, %esi
; X86-FALLBACK0-NEXT: testb $32, %cl
; X86-FALLBACK0-NEXT: je .LBB19_2
; X86-FALLBACK0-NEXT: # %bb.1:
@@ -1094,7 +1098,6 @@ define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK1-NEXT: movl $-1, %esi
; X86-FALLBACK1-NEXT: movl $-1, %eax
; X86-FALLBACK1-NEXT: shrl %cl, %eax
-; X86-FALLBACK1-NEXT: shrdl %cl, %esi, %esi
; X86-FALLBACK1-NEXT: xorl %edi, %edi
; X86-FALLBACK1-NEXT: testb $32, %cl
; X86-FALLBACK1-NEXT: cmovnel %eax, %esi
@@ -1122,7 +1125,6 @@ define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind {
; X86-FALLBACK2-NEXT: movl $-1, %esi
; X86-FALLBACK2-NEXT: movl $-1, %eax
; X86-FALLBACK2-NEXT: shrl %cl, %eax
-; X86-FALLBACK2-NEXT: shrdl %cl, %esi, %esi
; X86-FALLBACK2-NEXT: xorl %edi, %edi
; X86-FALLBACK2-NEXT: testb $32, %cl
; X86-FALLBACK2-NEXT: cmovnel %eax, %esi
@@ -1146,14 +1148,13 @@ define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind {
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: movl $-1, %esi
-; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax
-; X86-BMI2-NEXT: shrdl %cl, %esi, %esi
+; X86-BMI2-NEXT: shrxl %eax, %esi, %ecx
; X86-BMI2-NEXT: xorl %edi, %edi
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: cmovnel %eax, %esi
-; X86-BMI2-NEXT: cmovel %eax, %edi
+; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: cmovnel %ecx, %esi
+; X86-BMI2-NEXT: cmovel %ecx, %edi
; X86-BMI2-NEXT: subl $8, %esp
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
diff --git a/llvm/test/CodeGen/X86/clear-lowbits.ll b/llvm/test/CodeGen/X86/clear-lowbits.ll
index be251eb99f2e..a8f2ecde03d9 100644
--- a/llvm/test/CodeGen/X86/clear-lowbits.ll
+++ b/llvm/test/CodeGen/X86/clear-lowbits.ll
@@ -502,15 +502,14 @@ define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB13_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB13_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c0:
@@ -518,15 +517,14 @@ define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB13_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB13_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c0:
@@ -555,15 +553,14 @@ define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB14_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB14_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext:
@@ -571,15 +568,14 @@ define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB14_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB14_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
@@ -612,35 +608,33 @@ define i64 @clear_lowbits64_c2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB15_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB15_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB15_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB15_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c2_load:
@@ -672,35 +666,33 @@ define i64 @clear_lowbits64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB16_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB16_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB16_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB16_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext:
@@ -732,15 +724,14 @@ define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB17_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB17_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_c4_commutative:
@@ -748,15 +739,14 @@ define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB17_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB17_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
@@ -1325,15 +1315,14 @@ define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB31_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB31_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic0:
@@ -1342,15 +1331,14 @@ define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB31_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB31_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
@@ -1383,15 +1371,14 @@ define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB32_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB32_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
@@ -1400,15 +1387,14 @@ define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB32_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB32_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
@@ -1445,36 +1431,34 @@ define i64 @clear_lowbits64_ic2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB33_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB33_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb $64, %cl
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB33_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB33_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
@@ -1510,36 +1494,34 @@ define i64 @clear_lowbits64_ic3_load_indexzext(i64* %w, i8 %numlowbits) nounwind
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB34_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB34_2:
-; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: andl (%esi), %eax
+; X86-NOBMI2-NEXT: andl 4(%esi), %edx
; X86-NOBMI2-NEXT: popl %esi
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb $64, %cl
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
; X86-BMI2-NEXT: movl $-1, %edx
-; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB34_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB34_2:
-; X86-BMI2-NEXT: andl 4(%esi), %edx
-; X86-BMI2-NEXT: andl (%esi), %eax
-; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI2-NEXT: andl 4(%ecx), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
@@ -1575,15 +1557,14 @@ define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
-; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx
; X86-NOBMI2-NEXT: testb $32, %cl
; X86-NOBMI2-NEXT: je .LBB35_2
; X86-NOBMI2-NEXT: # %bb.1:
; X86-NOBMI2-NEXT: movl %eax, %edx
; X86-NOBMI2-NEXT: xorl %eax, %eax
; X86-NOBMI2-NEXT: .LBB35_2:
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative:
@@ -1592,15 +1573,14 @@ define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
-; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: je .LBB35_2
; X86-BMI2-NEXT: # %bb.1:
; X86-BMI2-NEXT: movl %eax, %edx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: .LBB35_2:
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
@@ -1712,22 +1692,24 @@ define i64 @oneuse64(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-NEXT: pushl %eax
; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: movl $-1, %esi
-; X86-NOBMI2-NEXT: movl $-1, %edi
-; X86-NOBMI2-NEXT: shll %cl, %edi
-; X86-NOBMI2-NEXT: shldl %cl, %esi, %esi
-; X86-NOBMI2-NEXT: testb $32, %cl
-; X86-NOBMI2-NEXT: je .LBB37_2
-; X86-NOBMI2-NEXT: # %bb.1:
-; X86-NOBMI2-NEXT: movl %edi, %esi
+; X86-NOBMI2-NEXT: movl $-1, %eax
+; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: xorl %edi, %edi
-; X86-NOBMI2-NEXT: .LBB37_2:
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: jne .LBB37_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edi
+; X86-NOBMI2-NEXT: jmp .LBB37_3
+; X86-NOBMI2-NEXT: .LBB37_1:
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB37_3:
; X86-NOBMI2-NEXT: subl $8, %esp
; X86-NOBMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: pushl %edi
; X86-NOBMI2-NEXT: calll use64
; X86-NOBMI2-NEXT: addl $16, %esp
-; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-NOBMI2-NEXT: movl %edi, %eax
; X86-NOBMI2-NEXT: movl %esi, %edx
; X86-NOBMI2-NEXT: addl $4, %esp
@@ -1742,21 +1724,23 @@ define i64 @oneuse64(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI2-NEXT: pushl %eax
; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %esi
-; X86-BMI2-NEXT: shlxl %ecx, %esi, %edi
-; X86-BMI2-NEXT: shldl %cl, %esi, %esi
-; X86-BMI2-NEXT: testb $32, %cl
-; X86-BMI2-NEXT: je .LBB37_2
-; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %edi, %esi
+; X86-BMI2-NEXT: shlxl %ecx, %esi, %eax
; X86-BMI2-NEXT: xorl %edi, %edi
-; X86-BMI2-NEXT: .LBB37_2:
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: jne .LBB37_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edi
+; X86-BMI2-NEXT: jmp .LBB37_3
+; X86-BMI2-NEXT: .LBB37_1:
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB37_3:
; X86-BMI2-NEXT: subl $8, %esp
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: calll use64
; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl %edi, %eax
; X86-BMI2-NEXT: movl %esi, %edx
; X86-BMI2-NEXT: addl $4, %esp
diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index edb603f87d0e..9825b828eddf 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -2661,6 +2661,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b0:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
@@ -2670,6 +2671,7 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB25_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -2677,22 +2679,24 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB25_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB25_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB25_4:
+; X86-NOBMI-NEXT: jne .LBB25_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB25_5
+; X86-NOBMI-NEXT: .LBB25_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB25_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b0:
@@ -2717,7 +2721,6 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB25_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -2734,34 +2737,30 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-BMI1BMI2-LABEL: bextr64_b0:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB25_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB25_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB25_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB25_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
@@ -2800,6 +2799,7 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b1_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
@@ -2809,6 +2809,7 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB26_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -2816,22 +2817,24 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB26_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB26_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB26_4:
+; X86-NOBMI-NEXT: jne .LBB26_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB26_5
+; X86-NOBMI-NEXT: .LBB26_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB26_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext:
@@ -2856,7 +2859,6 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB26_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -2873,34 +2875,30 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB26_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB26_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB26_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB26_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
@@ -2943,6 +2941,7 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b2_load:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
@@ -2953,6 +2952,7 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB27_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -2960,22 +2960,24 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB27_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB27_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB27_4:
+; X86-NOBMI-NEXT: jne .LBB27_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB27_5
+; X86-NOBMI-NEXT: .LBB27_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB27_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b2_load:
@@ -3001,7 +3003,6 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB27_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -3018,35 +3019,31 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-BMI1BMI2-LABEL: bextr64_b2_load:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi
+; X86-BMI1BMI2-NEXT: movl (%edx), %eax
+; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB27_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB27_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB27_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB27_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
@@ -3087,6 +3084,7 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b3_load_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
@@ -3097,6 +3095,7 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB28_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -3104,22 +3103,24 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB28_2:
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB28_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %eax, %edx
-; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB28_4:
+; X86-NOBMI-NEXT: jne .LBB28_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: jmp .LBB28_5
+; X86-NOBMI-NEXT: .LBB28_3:
+; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: .LBB28_5:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %eax
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext:
@@ -3145,7 +3146,6 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB28_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -3162,35 +3162,31 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi
+; X86-BMI1BMI2-NEXT: movl (%edx), %eax
+; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB28_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB28_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB28_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB28_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
@@ -3235,6 +3231,7 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr64_b4_commutative:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
@@ -3244,6 +3241,7 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-NOBMI-NEXT: movl %esi, %edx
; X86-NOBMI-NEXT: shrl %cl, %edx
; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax
+; X86-NOBMI-NEXT: xorl %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB29_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -3251,22 +3249,24 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: .LBB29_2:
; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movb %ch, %cl
-; X86-NOBMI-NEXT: shll %cl, %esi
-; X86-NOBMI-NEXT: shldl %cl, %edi, %edi
+; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %ch
-; X86-NOBMI-NEXT: je .LBB29_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %esi, %edi
-; X86-NOBMI-NEXT: xorl %esi, %esi
-; X86-NOBMI-NEXT: .LBB29_4:
+; X86-NOBMI-NEXT: jne .LBB29_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %ebx, %esi
+; X86-NOBMI-NEXT: jmp .LBB29_5
+; X86-NOBMI-NEXT: .LBB29_3:
+; X86-NOBMI-NEXT: movl %ebx, %edi
+; X86-NOBMI-NEXT: .LBB29_5:
; X86-NOBMI-NEXT: notl %edi
; X86-NOBMI-NEXT: andl %edi, %edx
; X86-NOBMI-NEXT: notl %esi
; X86-NOBMI-NEXT: andl %esi, %eax
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
+; X86-NOBMI-NEXT: popl %ebx
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative:
@@ -3291,7 +3291,6 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl %eax, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %al
; X86-BMI1NOTBM-NEXT: je .LBB29_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -3308,34 +3307,30 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-BMI1BMI2-LABEL: bextr64_b4_commutative:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB29_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB29_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %edi
-; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB29_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edi
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %ecx, %esi
+; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI1BMI2-NEXT: .LBB29_4:
-; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx
+; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
@@ -3379,42 +3374,44 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %esi, %ebp
-; X86-NOBMI-NEXT: movl %eax, %ecx
+; X86-NOBMI-NEXT: movb %al, %cl
; X86-NOBMI-NEXT: shrl %cl, %ebp
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %ebx
+; X86-NOBMI-NEXT: shrdl %cl, %esi, %edx
+; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: testb $32, %al
; X86-NOBMI-NEXT: je .LBB30_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %ebp, %ebx
+; X86-NOBMI-NEXT: movl %ebp, %edx
; X86-NOBMI-NEXT: xorl %ebp, %ebp
; X86-NOBMI-NEXT: .LBB30_2:
-; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: movl %edx, %ecx
-; X86-NOBMI-NEXT: shll %cl, %edi
-; X86-NOBMI-NEXT: shldl %cl, %esi, %esi
-; X86-NOBMI-NEXT: testb $32, %dl
-; X86-NOBMI-NEXT: je .LBB30_4
-; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %edi, %esi
-; X86-NOBMI-NEXT: xorl %edi, %edi
-; X86-NOBMI-NEXT: .LBB30_4:
-; X86-NOBMI-NEXT: notl %esi
-; X86-NOBMI-NEXT: andl %ebp, %esi
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movb %ch, %cl
+; X86-NOBMI-NEXT: shll %cl, %esi
+; X86-NOBMI-NEXT: testb $32, %ch
+; X86-NOBMI-NEXT: jne .LBB30_3
+; X86-NOBMI-NEXT: # %bb.4:
+; X86-NOBMI-NEXT: movl %esi, %ebx
+; X86-NOBMI-NEXT: jmp .LBB30_5
+; X86-NOBMI-NEXT: .LBB30_3:
+; X86-NOBMI-NEXT: movl %esi, %edi
+; X86-NOBMI-NEXT: .LBB30_5:
; X86-NOBMI-NEXT: notl %edi
-; X86-NOBMI-NEXT: andl %ebx, %edi
+; X86-NOBMI-NEXT: andl %ebp, %edi
+; X86-NOBMI-NEXT: notl %ebx
+; X86-NOBMI-NEXT: andl %edx, %ebx
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %edi, %eax
-; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: movl %edi, %edx
; X86-NOBMI-NEXT: addl $12, %esp
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
@@ -3447,7 +3444,6 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl %edx, %ecx
; X86-BMI1NOTBM-NEXT: shll %cl, %ebp
-; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx
; X86-BMI1NOTBM-NEXT: testb $32, %dl
; X86-BMI1NOTBM-NEXT: je .LBB30_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -3477,34 +3473,32 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: subl $12, %esp
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movl %eax, %ecx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %al
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx
+; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB30_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %edi
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %edx, %eax
+; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB30_2:
-; X86-BMI1BMI2-NEXT: movl $-1, %ebp
-; X86-BMI1BMI2-NEXT: shlxl %edx, %ebp, %ebx
-; X86-BMI1BMI2-NEXT: movl %edx, %ecx
-; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %ebp
-; X86-BMI1BMI2-NEXT: testb $32, %dl
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI1BMI2-NEXT: movl $-1, %esi
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %edi
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB30_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %edi, %esi
+; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB30_4:
-; X86-BMI1BMI2-NEXT: andnl %esi, %ebp, %esi
-; X86-BMI1BMI2-NEXT: andnl %edi, %ebx, %edi
+; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %esi
+; X86-BMI1BMI2-NEXT: andnl %eax, %edi, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-BMI1BMI2-NEXT: pushl %eax
+; X86-BMI1BMI2-NEXT: pushl %ebp
+; X86-BMI1BMI2-NEXT: pushl %ecx
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
; X86-BMI1BMI2-NEXT: movl %edi, %eax
@@ -4888,7 +4882,6 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB41_4
; X86-NOBMI-NEXT: # %bb.3:
@@ -4935,7 +4928,6 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB41_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -4976,24 +4968,23 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB41_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB41_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB41_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
@@ -5097,7 +5088,6 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB42_4
; X86-NOBMI-NEXT: # %bb.3:
@@ -5144,7 +5134,6 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB42_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -5185,24 +5174,23 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB42_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB42_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB42_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
@@ -5310,7 +5298,6 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB43_4
; X86-NOBMI-NEXT: # %bb.3:
@@ -5358,7 +5345,6 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB43_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -5400,24 +5386,23 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB43_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB43_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB43_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
@@ -5523,7 +5508,6 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB44_4
; X86-NOBMI-NEXT: # %bb.3:
@@ -5571,7 +5555,6 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB44_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -5613,24 +5596,23 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB44_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB44_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB44_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
@@ -5738,7 +5720,6 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB45_4
; X86-NOBMI-NEXT: # %bb.3:
@@ -5785,7 +5766,6 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB45_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -5826,24 +5806,23 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB45_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %ebx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %ebp
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB45_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp
+; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB45_4:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: pushl %ebp
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl %ebx, %esi
-; X86-BMI1BMI2-NEXT: andl %ebp, %edi
+; X86-BMI1BMI2-NEXT: andl %ebp, %esi
+; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: movl %edi, %edx
; X86-BMI1BMI2-NEXT: addl $12, %esp
@@ -5947,7 +5926,6 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: shrl %cl, %ebp
-; X86-NOBMI-NEXT: shrdl %cl, %ebx, %ebx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB46_4
; X86-NOBMI-NEXT: # %bb.3:
@@ -5999,7 +5977,6 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %ebx
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB46_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
@@ -6045,12 +6022,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB46_2:
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-BMI1BMI2-NEXT: movl $-1, %ebp
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB46_4
; X86-BMI1BMI2-NEXT: # %bb.3:
; X86-BMI1BMI2-NEXT: movl %ebx, %ebp
@@ -6175,14 +6151,12 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-NOBMI-NEXT: .LBB47_2:
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: jne .LBB47_4
; X86-NOBMI-NEXT: # %bb.3:
-; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: .LBB47_4:
; X86-NOBMI-NEXT: andl %edx, %eax
; X86-NOBMI-NEXT: popl %esi
@@ -6204,14 +6178,12 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-BMI1NOTBM-NEXT: .LBB47_2:
; X86-BMI1NOTBM-NEXT: movb $64, %cl
; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: jne .LBB47_4
; X86-BMI1NOTBM-NEXT: # %bb.3:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: .LBB47_4:
; X86-BMI1NOTBM-NEXT: andl %edx, %eax
; X86-BMI1NOTBM-NEXT: popl %esi
@@ -6219,7 +6191,6 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
;
; X86-BMI1BMI2-LABEL: bextr64_32_c0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -6231,16 +6202,13 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
; X86-BMI1BMI2-NEXT: .LBB47_2:
; X86-BMI1BMI2-NEXT: movb $64, %cl
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB47_4
; X86-BMI1BMI2-NEXT: # %bb.3:
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB47_4:
; X86-BMI1BMI2-NEXT: andl %edx, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bextr64_32_c0:
diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index c7fa8617e072..e93638132e7d 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -1356,58 +1356,56 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b0:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB20_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB20_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB20_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: jmp .LBB20_3
+; X86-NOBMI-NEXT: .LBB20_1:
+; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: .LBB20_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b0:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB20_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %edx
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB20_2:
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1NOTBM-NEXT: popl %esi
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %dl
; X86-BMI1BMI2-NEXT: je .LBB20_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %ecx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB20_2:
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b0:
@@ -1439,58 +1437,56 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB21_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB21_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB21_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: jmp .LBB21_3
+; X86-NOBMI-NEXT: .LBB21_1:
+; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: .LBB21_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB21_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %edx
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB21_2:
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1NOTBM-NEXT: popl %esi
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %dl
; X86-BMI1BMI2-NEXT: je .LBB21_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %ecx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB21_2:
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
@@ -1525,63 +1521,65 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b2_load:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB22_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %edi
+; X86-NOBMI-NEXT: shll %cl, %edi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB22_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB22_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: jmp .LBB22_3
+; X86-NOBMI-NEXT: .LBB22_1:
+; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: .LBB22_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: andl (%esi), %eax
+; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b2_load:
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: pushl %esi
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB22_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %edx
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %esi
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB22_2:
-; X86-BMI1NOTBM-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1NOTBM-NEXT: andnl (%eax), %esi, %eax
+; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b2_load:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB22_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %edx
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %edx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB22_2:
-; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b2_load:
@@ -1614,63 +1612,65 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB23_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %edi
+; X86-NOBMI-NEXT: shll %cl, %edi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB23_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB23_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: jmp .LBB23_3
+; X86-NOBMI-NEXT: .LBB23_1:
+; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: .LBB23_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: andl (%esi), %eax
+; X86-NOBMI-NEXT: andl 4(%esi), %edx
; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext:
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: pushl %esi
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB23_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %edx
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %esi
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB23_2:
-; X86-BMI1NOTBM-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1NOTBM-NEXT: andnl (%eax), %esi, %eax
+; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: pushl %ebx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB23_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %edx
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %edx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB23_2:
-; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx
-; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
@@ -1706,58 +1706,56 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_b4_commutative:
; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
-; X86-NOBMI-NEXT: movl $-1, %eax
-; X86-NOBMI-NEXT: shll %cl, %eax
-; X86-NOBMI-NEXT: shldl %cl, %edx, %edx
-; X86-NOBMI-NEXT: testb $32, %cl
-; X86-NOBMI-NEXT: je .LBB24_2
-; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %eax, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: xorl %eax, %eax
-; X86-NOBMI-NEXT: .LBB24_2:
+; X86-NOBMI-NEXT: testb $32, %cl
+; X86-NOBMI-NEXT: jne .LBB24_1
+; X86-NOBMI-NEXT: # %bb.2:
+; X86-NOBMI-NEXT: movl %esi, %eax
+; X86-NOBMI-NEXT: jmp .LBB24_3
+; X86-NOBMI-NEXT: .LBB24_1:
+; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: .LBB24_3:
; X86-NOBMI-NEXT: notl %edx
; X86-NOBMI-NEXT: notl %eax
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
-; X86-BMI1NOTBM-NEXT: movl $-1, %esi
-; X86-BMI1NOTBM-NEXT: shll %cl, %esi
-; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT: shll %cl, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB24_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
+; X86-BMI1NOTBM-NEXT: movl %eax, %edx
+; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB24_2:
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1NOTBM-NEXT: popl %esi
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT: testb $32, %dl
; X86-BMI1BMI2-NEXT: je .LBB24_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: xorl %esi, %esi
+; X86-BMI1BMI2-NEXT: movl %eax, %ecx
+; X86-BMI1BMI2-NEXT: xorl %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB24_2:
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_b4_commutative:
@@ -2628,7 +2626,6 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB34_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -2659,7 +2656,6 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB34_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
@@ -2685,26 +2681,25 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %edi
+; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB34_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edi, %esi
-; X86-BMI1BMI2-NEXT: xorl %edi, %edi
+; X86-BMI1BMI2-NEXT: movl %esi, %edi
+; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB34_2:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
@@ -2785,7 +2780,6 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB35_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -2816,7 +2810,6 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB35_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
@@ -2842,26 +2835,25 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %edi
+; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB35_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edi, %esi
-; X86-BMI1BMI2-NEXT: xorl %edi, %edi
+; X86-BMI1BMI2-NEXT: movl %esi, %edi
+; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB35_2:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
@@ -2944,24 +2936,23 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB36_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: movl %ebx, %eax
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB36_2:
-; X86-NOBMI-NEXT: movl (%edx), %esi
-; X86-NOBMI-NEXT: andl %eax, %esi
-; X86-NOBMI-NEXT: movl 4(%edx), %edi
-; X86-NOBMI-NEXT: andl %ebx, %edi
+; X86-NOBMI-NEXT: movl 4(%edx), %esi
+; X86-NOBMI-NEXT: andl %ebx, %esi
+; X86-NOBMI-NEXT: movl (%edx), %edi
+; X86-NOBMI-NEXT: andl %eax, %edi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: movl %esi, %edx
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx
@@ -2978,24 +2969,23 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB36_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl %ebx, %eax
; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx
; X86-BMI1NOTBM-NEXT: .LBB36_2:
-; X86-BMI1NOTBM-NEXT: movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT: andl %eax, %esi
-; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi
-; X86-BMI1NOTBM-NEXT: andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi
+; X86-BMI1NOTBM-NEXT: andl %ebx, %esi
+; X86-BMI1NOTBM-NEXT: movl (%edx), %edi
+; X86-BMI1NOTBM-NEXT: andl %eax, %edi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: pushl %ebx
; X86-BMI1NOTBM-NEXT: pushl %eax
; X86-BMI1NOTBM-NEXT: calll use64
; X86-BMI1NOTBM-NEXT: addl $16, %esp
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: movl %edi, %edx
+; X86-BMI1NOTBM-NEXT: movl %edi, %eax
+; X86-BMI1NOTBM-NEXT: movl %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: popl %edi
; X86-BMI1NOTBM-NEXT: popl %ebx
@@ -3006,29 +2996,28 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb $64, %bl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB36_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %ebx, %eax
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %edx, %ecx
+; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB36_2:
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: andl %eax, %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: andl %ebx, %edi
+; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi
+; X86-BMI1BMI2-NEXT: andl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl (%eax), %edi
+; X86-BMI1BMI2-NEXT: andl %ecx, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %eax
+; X86-BMI1BMI2-NEXT: pushl %edx
+; X86-BMI1BMI2-NEXT: pushl %ecx
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
@@ -3098,24 +3087,23 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB37_2
; X86-NOBMI-NEXT: # %bb.1:
; X86-NOBMI-NEXT: movl %ebx, %eax
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB37_2:
-; X86-NOBMI-NEXT: movl (%edx), %esi
-; X86-NOBMI-NEXT: andl %eax, %esi
-; X86-NOBMI-NEXT: movl 4(%edx), %edi
-; X86-NOBMI-NEXT: andl %ebx, %edi
+; X86-NOBMI-NEXT: movl 4(%edx), %esi
+; X86-NOBMI-NEXT: andl %ebx, %esi
+; X86-NOBMI-NEXT: movl (%edx), %edi
+; X86-NOBMI-NEXT: andl %eax, %edi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
+; X86-NOBMI-NEXT: movl %edi, %eax
+; X86-NOBMI-NEXT: movl %esi, %edx
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx
@@ -3132,24 +3120,23 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB37_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
; X86-BMI1NOTBM-NEXT: movl %ebx, %eax
; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx
; X86-BMI1NOTBM-NEXT: .LBB37_2:
-; X86-BMI1NOTBM-NEXT: movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT: andl %eax, %esi
-; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi
-; X86-BMI1NOTBM-NEXT: andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi
+; X86-BMI1NOTBM-NEXT: andl %ebx, %esi
+; X86-BMI1NOTBM-NEXT: movl (%edx), %edi
+; X86-BMI1NOTBM-NEXT: andl %eax, %edi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: pushl %ebx
; X86-BMI1NOTBM-NEXT: pushl %eax
; X86-BMI1NOTBM-NEXT: calll use64
; X86-BMI1NOTBM-NEXT: addl $16, %esp
-; X86-BMI1NOTBM-NEXT: movl %esi, %eax
-; X86-BMI1NOTBM-NEXT: movl %edi, %edx
+; X86-BMI1NOTBM-NEXT: movl %edi, %eax
+; X86-BMI1NOTBM-NEXT: movl %esi, %edx
; X86-BMI1NOTBM-NEXT: popl %esi
; X86-BMI1NOTBM-NEXT: popl %edi
; X86-BMI1NOTBM-NEXT: popl %ebx
@@ -3160,29 +3147,28 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb $64, %bl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT: movl $-1, %ecx
+; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx
+; X86-BMI1BMI2-NEXT: testb $32, %bl
; X86-BMI1BMI2-NEXT: je .LBB37_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %ebx, %eax
-; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT: movl %edx, %ecx
+; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB37_2:
-; X86-BMI1BMI2-NEXT: movl (%edx), %esi
-; X86-BMI1BMI2-NEXT: andl %eax, %esi
-; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
-; X86-BMI1BMI2-NEXT: andl %ebx, %edi
+; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi
+; X86-BMI1BMI2-NEXT: andl %edx, %esi
+; X86-BMI1BMI2-NEXT: movl (%eax), %edi
+; X86-BMI1BMI2-NEXT: andl %ecx, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %eax
+; X86-BMI1BMI2-NEXT: pushl %edx
+; X86-BMI1BMI2-NEXT: pushl %ecx
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
; X86-BMI1BMI2-NEXT: popl %ebx
@@ -3253,7 +3239,6 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
-; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB38_2
; X86-NOBMI-NEXT: # %bb.1:
@@ -3284,7 +3269,6 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB38_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
@@ -3310,26 +3294,25 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movb $64, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %esi
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT: testb $32, %cl
+; X86-BMI1BMI2-NEXT: movb $64, %al
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: movl $-1, %edi
+; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi
+; X86-BMI1BMI2-NEXT: testb $32, %al
; X86-BMI1BMI2-NEXT: je .LBB38_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %edi, %esi
-; X86-BMI1BMI2-NEXT: xorl %edi, %edi
+; X86-BMI1BMI2-NEXT: movl %esi, %edi
+; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB38_2:
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: movl %edi, %edx
+; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %edi, %eax
+; X86-BMI1BMI2-NEXT: movl %esi, %edx
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
; X86-BMI1BMI2-NEXT: popl %edi
@@ -3407,14 +3390,12 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
-; X86-NOBMI-NEXT: shrdl %cl, %edx, %edx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: jne .LBB39_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %edx, %eax
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: .LBB39_2:
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: retl
@@ -3423,14 +3404,12 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: movb $64, %cl
; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edx
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: jne .LBB39_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %edx, %eax
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: .LBB39_2:
; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: retl
@@ -3439,13 +3418,11 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movb $64, %cl
; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %edx
; X86-BMI1BMI2-NEXT: movl $-1, %eax
-; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB39_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI1BMI2-NEXT: .LBB39_2:
; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index 465dea578267..335e64f99c10 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -587,14 +587,9 @@ define i32 @combine_fshl_load_i32(i32* %p) nounwind {
define i64 @combine_fshl_load_i64(i64* %p) nounwind {
; X86-FAST-LABEL: combine_fshl_load_i64:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: pushl %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-FAST-NEXT: movl 12(%ecx), %eax
-; X86-FAST-NEXT: movl 16(%ecx), %esi
-; X86-FAST-NEXT: movl 20(%ecx), %edx
-; X86-FAST-NEXT: shldl $24, %esi, %edx
-; X86-FAST-NEXT: shrdl $8, %esi, %eax
-; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: movl 13(%ecx), %eax
+; X86-FAST-NEXT: movl 17(%ecx), %edx
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshl_load_i64:
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 644e33fe198c..2238ff4a3101 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -582,16 +582,9 @@ define i32 @combine_fshr_load_i32(i32* %p) nounwind {
define i64 @combine_fshr_load_i64(i64* %p) nounwind {
; X86-FAST-LABEL: combine_fshr_load_i64:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movzbl 11(%eax), %ecx
-; X86-FAST-NEXT: movl 12(%eax), %esi
-; X86-FAST-NEXT: movl 16(%eax), %edx
-; X86-FAST-NEXT: shldl $8, %esi, %edx
-; X86-FAST-NEXT: movl %esi, %eax
-; X86-FAST-NEXT: shll $8, %eax
-; X86-FAST-NEXT: orl %ecx, %eax
-; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl 11(%ecx), %eax
+; X86-FAST-NEXT: movl 15(%ecx), %edx
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: combine_fshr_load_i64:
diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 59fe62c0e4b4..f5673c7e8bd3 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -290,7 +290,6 @@ define i64 @ashr_add_shl_mismatch_shifts2(i64 %r) nounwind {
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: shrdl $8, %edx, %eax
; X32-NEXT: shrl $8, %edx
; X32-NEXT: incl %edx
; X32-NEXT: shrdl $8, %edx, %eax
diff --git a/llvm/test/CodeGen/X86/shift-parts.ll b/llvm/test/CodeGen/X86/shift-parts.ll
index 2dc35f6bfef0..da00f377020d 100644
--- a/llvm/test/CodeGen/X86/shift-parts.ll
+++ b/llvm/test/CodeGen/X86/shift-parts.ll
@@ -10,15 +10,14 @@ define i32 @int87(i32 %uint64p_8, i1 %cond) nounwind {
; CHECK-LABEL: int87:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rax
-; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rdx
-; CHECK-NEXT: movzbl %sil, %ecx
-; CHECK-NEXT: shll $6, %ecx
+; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rcx
+; CHECK-NEXT: movzbl %sil, %edx
+; CHECK-NEXT: shll $6, %edx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %for.cond
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq %rdx, %rsi
-; CHECK-NEXT: shrdq %cl, %rax, %rsi
-; CHECK-NEXT: testb $64, %cl
+; CHECK-NEXT: testb $64, %dl
+; CHECK-NEXT: movq %rcx, %rsi
; CHECK-NEXT: cmovneq %rax, %rsi
; CHECK-NEXT: orl $0, %esi
; CHECK-NEXT: je .LBB0_1
More information about the llvm-commits
mailing list