[llvm] 3250317 - [SelectionDAG] Optimize expansion for rotates/funnel shifts
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 2 04:39:00 PDT 2021
Author: Simon Pilgrim
Date: 2021-11-02T11:38:25Z
New Revision: 325031786e08cb65116fdd6f60f3342932f5f4d9
URL: https://github.com/llvm/llvm-project/commit/325031786e08cb65116fdd6f60f3342932f5f4d9
DIFF: https://github.com/llvm/llvm-project/commit/325031786e08cb65116fdd6f60f3342932f5f4d9.diff
LOG: [SelectionDAG] Optimize expansion for rotates/funnel shifts
If the type of a funnel shift needs to be expanded, expand it to two funnel shifts instead of regular shifts. For constant shifts, this doesn't make much difference, but for variable shifts it allows a more optimal lowering.
Also use the optimized funnel shift lowering for rotates.
Alive2: https://alive2.llvm.org/ce/z/TvHDB- / https://alive2.llvm.org/ce/z/yzPept
(Branched from D108058 as getting this completed should help unlock some other WIP patches).
Original Patch: @efriedma (Eli Friedman)
Differential Revision: https://reviews.llvm.org/D112443
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/test/CodeGen/AArch64/funnel-shift.ll
llvm/test/CodeGen/ARM/funnel-shift-rot.ll
llvm/test/CodeGen/ARM/funnel-shift.ll
llvm/test/CodeGen/Mips/funnel-shift-rot.ll
llvm/test/CodeGen/Mips/funnel-shift.ll
llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
llvm/test/CodeGen/PowerPC/funnel-shift.ll
llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
llvm/test/CodeGen/RISCV/rv32zbt.ll
llvm/test/CodeGen/RISCV/shifts.ll
llvm/test/CodeGen/X86/fshl.ll
llvm/test/CodeGen/X86/fshr.ll
llvm/test/CodeGen/X86/funnel-shift-rot.ll
llvm/test/CodeGen/X86/funnel-shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 29812ef8f1a07..0f2c5c17c4e55 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -4377,18 +4377,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- // Lower the rotate to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ // Delegate to funnel-shift expansion.
+ SDLoc DL(N);
+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;
+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),
+ N->getOperand(0), N->getOperand(1));
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- // Lower the funnel shift to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandFunnelShift(N, Res, DAG);
- SplitInteger(Res, Lo, Hi);
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+
+ SDLoc DL(N);
+ unsigned Opc = N->getOpcode();
+ SDValue ShAmt = N->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);
+
+ // If the shift amount is at least half the bitwidth, swap the inputs.
+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();
+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,
+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));
+ SDValue Cond =
+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),
+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);
+
+ // Expand to a pair of funnel shifts.
+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);
+
+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);
+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);
+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);
+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);
+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index c199ad0f76c4d..51dc7ce2d061d 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -46,29 +46,19 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
; CHECK-LABEL: fshl_i128:
; CHECK: // %bb.0:
+; CHECK-NEXT: tst x4, #0x40
; CHECK-NEXT: mvn w8, w4
-; CHECK-NEXT: extr x9, x3, x2, #1
-; CHECK-NEXT: lsr x10, x3, #1
-; CHECK-NEXT: and x12, x8, #0x7f
-; CHECK-NEXT: lsl x11, x10, #1
-; CHECK-NEXT: tst x12, #0x40
-; CHECK-NEXT: lsl x11, x11, x4
+; CHECK-NEXT: csel x9, x2, x3, ne
+; CHECK-NEXT: csel x10, x3, x0, ne
+; CHECK-NEXT: lsr x9, x9, #1
+; CHECK-NEXT: lsl x11, x10, x4
+; CHECK-NEXT: csel x12, x0, x1, ne
+; CHECK-NEXT: lsr x10, x10, #1
; CHECK-NEXT: lsr x9, x9, x8
-; CHECK-NEXT: orr x9, x11, x9
-; CHECK-NEXT: lsr x11, x0, #1
-; CHECK-NEXT: lsr x10, x10, x8
-; CHECK-NEXT: lsl x12, x1, x4
-; CHECK-NEXT: lsr x8, x11, x8
-; CHECK-NEXT: and x11, x4, #0x7f
-; CHECK-NEXT: csel x9, x10, x9, ne
-; CHECK-NEXT: csel x10, xzr, x10, ne
-; CHECK-NEXT: orr x8, x12, x8
-; CHECK-NEXT: lsl x12, x0, x4
-; CHECK-NEXT: tst x11, #0x40
-; CHECK-NEXT: csel x8, x12, x8, ne
-; CHECK-NEXT: csel x11, xzr, x12, ne
-; CHECK-NEXT: orr x1, x8, x10
+; CHECK-NEXT: lsl x12, x12, x4
+; CHECK-NEXT: lsr x8, x10, x8
; CHECK-NEXT: orr x0, x11, x9
+; CHECK-NEXT: orr x1, x12, x8
; CHECK-NEXT: ret
%f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
ret i128 %f
diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
index 55157875d355f..de5bd2a7040b9 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
@@ -67,61 +67,24 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
}
define i64 @rotl_i64(i64 %x, i64 %z) {
-; SCALAR-LABEL: rotl_i64:
-; SCALAR: @ %bb.0:
-; SCALAR-NEXT: .save {r4, r5, r11, lr}
-; SCALAR-NEXT: push {r4, r5, r11, lr}
-; SCALAR-NEXT: rsb r3, r2, #0
-; SCALAR-NEXT: and r4, r2, #63
-; SCALAR-NEXT: and lr, r3, #63
-; SCALAR-NEXT: rsb r3, lr, #32
-; SCALAR-NEXT: lsl r2, r0, r4
-; SCALAR-NEXT: lsr r12, r0, lr
-; SCALAR-NEXT: orr r3, r12, r1, lsl r3
-; SCALAR-NEXT: subs r12, lr, #32
-; SCALAR-NEXT: lsrpl r3, r1, r12
-; SCALAR-NEXT: subs r5, r4, #32
-; SCALAR-NEXT: movwpl r2, #0
-; SCALAR-NEXT: cmp r5, #0
-; SCALAR-NEXT: orr r2, r2, r3
-; SCALAR-NEXT: rsb r3, r4, #32
-; SCALAR-NEXT: lsr r3, r0, r3
-; SCALAR-NEXT: orr r3, r3, r1, lsl r4
-; SCALAR-NEXT: lslpl r3, r0, r5
-; SCALAR-NEXT: lsr r0, r1, lr
-; SCALAR-NEXT: cmp r12, #0
-; SCALAR-NEXT: movwpl r0, #0
-; SCALAR-NEXT: orr r1, r3, r0
-; SCALAR-NEXT: mov r0, r2
-; SCALAR-NEXT: pop {r4, r5, r11, pc}
-;
-; NEON-LABEL: rotl_i64:
-; NEON: @ %bb.0:
-; NEON-NEXT: .save {r4, r5, r11, lr}
-; NEON-NEXT: push {r4, r5, r11, lr}
-; NEON-NEXT: and r12, r2, #63
-; NEON-NEXT: rsb r2, r2, #0
-; NEON-NEXT: rsb r3, r12, #32
-; NEON-NEXT: and r4, r2, #63
-; NEON-NEXT: subs lr, r12, #32
-; NEON-NEXT: lsr r3, r0, r3
-; NEON-NEXT: lsr r2, r1, r4
-; NEON-NEXT: orr r3, r3, r1, lsl r12
-; NEON-NEXT: lslpl r3, r0, lr
-; NEON-NEXT: subs r5, r4, #32
-; NEON-NEXT: movwpl r2, #0
-; NEON-NEXT: cmp r5, #0
-; NEON-NEXT: orr r2, r3, r2
-; NEON-NEXT: lsr r3, r0, r4
-; NEON-NEXT: rsb r4, r4, #32
-; NEON-NEXT: lsl r0, r0, r12
-; NEON-NEXT: orr r3, r3, r1, lsl r4
-; NEON-NEXT: lsrpl r3, r1, r5
-; NEON-NEXT: cmp lr, #0
-; NEON-NEXT: movwpl r0, #0
-; NEON-NEXT: mov r1, r2
-; NEON-NEXT: orr r0, r0, r3
-; NEON-NEXT: pop {r4, r5, r11, pc}
+; CHECK-LABEL: rotl_i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: ands r3, r2, #32
+; CHECK-NEXT: and r12, r2, #31
+; CHECK-NEXT: mov r3, r0
+; CHECK-NEXT: mov r4, #31
+; CHECK-NEXT: movne r3, r1
+; CHECK-NEXT: movne r1, r0
+; CHECK-NEXT: bic r2, r4, r2
+; CHECK-NEXT: lsl lr, r3, r12
+; CHECK-NEXT: lsr r0, r1, #1
+; CHECK-NEXT: lsl r1, r1, r12
+; CHECK-NEXT: lsr r3, r3, #1
+; CHECK-NEXT: orr r0, lr, r0, lsr r2
+; CHECK-NEXT: orr r1, r1, r3, lsr r2
+; CHECK-NEXT: pop {r4, pc}
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
ret i64 %f
}
@@ -243,31 +206,21 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK-LABEL: rotr_i64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r11, lr}
-; CHECK-NEXT: push {r4, r5, r11, lr}
-; CHECK-NEXT: and lr, r2, #63
-; CHECK-NEXT: rsb r2, r2, #0
-; CHECK-NEXT: rsb r3, lr, #32
-; CHECK-NEXT: and r4, r2, #63
-; CHECK-NEXT: lsr r12, r0, lr
-; CHECK-NEXT: orr r3, r12, r1, lsl r3
-; CHECK-NEXT: subs r12, lr, #32
-; CHECK-NEXT: lsl r2, r0, r4
-; CHECK-NEXT: lsrpl r3, r1, r12
-; CHECK-NEXT: subs r5, r4, #32
-; CHECK-NEXT: movwpl r2, #0
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: orr r2, r3, r2
-; CHECK-NEXT: rsb r3, r4, #32
-; CHECK-NEXT: lsr r3, r0, r3
-; CHECK-NEXT: orr r3, r3, r1, lsl r4
-; CHECK-NEXT: lslpl r3, r0, r5
-; CHECK-NEXT: lsr r0, r1, lr
-; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: movwpl r0, #0
-; CHECK-NEXT: orr r1, r0, r3
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: pop {r4, r5, r11, pc}
+; CHECK-NEXT: ands r3, r2, #32
+; CHECK-NEXT: mov r3, r1
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: moveq r0, r1
+; CHECK-NEXT: mov r1, #31
+; CHECK-NEXT: lsl r12, r0, #1
+; CHECK-NEXT: bic r1, r1, r2
+; CHECK-NEXT: and r2, r2, #31
+; CHECK-NEXT: lsl r12, r12, r1
+; CHECK-NEXT: orr r12, r12, r3, lsr r2
+; CHECK-NEXT: lsl r3, r3, #1
+; CHECK-NEXT: lsl r1, r3, r1
+; CHECK-NEXT: orr r1, r1, r0, lsr r2
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: bx lr
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
ret i64 %f
}
diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll
index 54c93b493c981..25e210d819147 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift.ll
@@ -45,46 +45,69 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
; Verify that weird types are minimally supported.
declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
-; CHECK-LABEL: fshl_i37:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: ldr r0, [sp, #24]
-; CHECK-NEXT: mov r6, r3
-; CHECK-NEXT: ldr r1, [sp, #28]
-; CHECK-NEXT: mov r7, r2
-; CHECK-NEXT: mov r2, #37
-; CHECK-NEXT: mov r3, #0
-; CHECK-NEXT: bl __aeabi_uldivmod
-; CHECK-NEXT: mov r0, #63
-; CHECK-NEXT: bic r1, r0, r2
-; CHECK-NEXT: lsl r0, r6, #27
-; CHECK-NEXT: lsl r3, r7, #27
-; CHECK-NEXT: orr r0, r0, r7, lsr #5
-; CHECK-NEXT: and r2, r2, #63
-; CHECK-NEXT: lsrs r7, r0, #1
-; CHECK-NEXT: rrx r0, r3
-; CHECK-NEXT: rsb r3, r1, #32
-; CHECK-NEXT: lsr r0, r0, r1
-; CHECK-NEXT: lsl r6, r4, r2
-; CHECK-NEXT: orr r0, r0, r7, lsl r3
-; CHECK-NEXT: subs r3, r1, #32
-; CHECK-NEXT: lsr r1, r7, r1
-; CHECK-NEXT: lsrpl r0, r7, r3
-; CHECK-NEXT: subs r5, r2, #32
-; CHECK-NEXT: movwpl r6, #0
-; CHECK-NEXT: orr r0, r6, r0
-; CHECK-NEXT: rsb r6, r2, #32
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: lsr r6, r4, r6
-; CHECK-NEXT: orr r2, r6, r8, lsl r2
-; CHECK-NEXT: lslpl r2, r4, r5
-; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: movwpl r1, #0
-; CHECK-NEXT: orr r1, r2, r1
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
+; SCALAR-LABEL: fshl_i37:
+; SCALAR: @ %bb.0:
+; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
+; SCALAR-NEXT: mov r4, r1
+; SCALAR-NEXT: mov r8, r0
+; SCALAR-NEXT: ldr r0, [sp, #24]
+; SCALAR-NEXT: mov r5, r3
+; SCALAR-NEXT: ldr r1, [sp, #28]
+; SCALAR-NEXT: mov r6, r2
+; SCALAR-NEXT: mov r2, #37
+; SCALAR-NEXT: mov r3, #0
+; SCALAR-NEXT: bl __aeabi_uldivmod
+; SCALAR-NEXT: lsl r1, r5, #27
+; SCALAR-NEXT: ands r12, r2, #32
+; SCALAR-NEXT: orr r1, r1, r6, lsr #5
+; SCALAR-NEXT: mov r3, r8
+; SCALAR-NEXT: and r5, r2, #31
+; SCALAR-NEXT: mov r0, #31
+; SCALAR-NEXT: movne r3, r1
+; SCALAR-NEXT: cmp r12, #0
+; SCALAR-NEXT: bic r2, r0, r2
+; SCALAR-NEXT: lslne r1, r6, #27
+; SCALAR-NEXT: movne r4, r8
+; SCALAR-NEXT: lsl r7, r3, r5
+; SCALAR-NEXT: lsr r0, r1, #1
+; SCALAR-NEXT: lsl r1, r4, r5
+; SCALAR-NEXT: lsr r3, r3, #1
+; SCALAR-NEXT: orr r0, r7, r0, lsr r2
+; SCALAR-NEXT: orr r1, r1, r3, lsr r2
+; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; NEON-LABEL: fshl_i37:
+; NEON: @ %bb.0:
+; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; NEON-NEXT: mov r4, r1
+; NEON-NEXT: mov r5, r0
+; NEON-NEXT: ldr r0, [sp, #24]
+; NEON-NEXT: mov r7, r3
+; NEON-NEXT: ldr r1, [sp, #28]
+; NEON-NEXT: mov r6, r2
+; NEON-NEXT: mov r2, #37
+; NEON-NEXT: mov r3, #0
+; NEON-NEXT: bl __aeabi_uldivmod
+; NEON-NEXT: mov r0, #31
+; NEON-NEXT: bic r1, r0, r2
+; NEON-NEXT: lsl r0, r7, #27
+; NEON-NEXT: ands r12, r2, #32
+; NEON-NEXT: orr r0, r0, r6, lsr #5
+; NEON-NEXT: mov r7, r5
+; NEON-NEXT: and r2, r2, #31
+; NEON-NEXT: movne r7, r0
+; NEON-NEXT: lslne r0, r6, #27
+; NEON-NEXT: cmp r12, #0
+; NEON-NEXT: lsl r3, r7, r2
+; NEON-NEXT: lsr r0, r0, #1
+; NEON-NEXT: movne r4, r5
+; NEON-NEXT: orr r0, r3, r0, lsr r1
+; NEON-NEXT: lsr r3, r7, #1
+; NEON-NEXT: lsl r2, r4, r2
+; NEON-NEXT: orr r1, r2, r3, lsr r1
+; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
ret i37 %f
}
@@ -157,8 +180,8 @@ define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
; CHECK-LABEL: fshl_i64_const_overshift:
; CHECK: @ %bb.0:
-; CHECK-NEXT: lsr r1, r2, #23
-; CHECK-NEXT: orr r2, r1, r3, lsl #9
+; CHECK-NEXT: lsl r1, r3, #9
+; CHECK-NEXT: orr r2, r1, r2, lsr #23
; CHECK-NEXT: lsl r0, r0, #9
; CHECK-NEXT: orr r1, r0, r3, lsr #23
; CHECK-NEXT: mov r0, r2
@@ -212,46 +235,36 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshr_i37:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: mov r9, r0
-; CHECK-NEXT: ldr r0, [sp, #32]
-; CHECK-NEXT: mov r6, r3
-; CHECK-NEXT: ldr r1, [sp, #36]
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: ldr r0, [sp, #24]
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: ldr r1, [sp, #28]
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: mov r2, #37
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: lsl r3, r5, #27
; CHECK-NEXT: add r0, r2, #27
-; CHECK-NEXT: lsl r6, r6, #27
-; CHECK-NEXT: and r1, r0, #63
-; CHECK-NEXT: lsl r2, r7, #27
-; CHECK-NEXT: orr r7, r6, r7, lsr #5
-; CHECK-NEXT: mov r6, #63
-; CHECK-NEXT: rsb r3, r1, #32
-; CHECK-NEXT: lsr r2, r2, r1
-; CHECK-NEXT: subs r12, r1, #32
-; CHECK-NEXT: bic r6, r6, r0
-; CHECK-NEXT: orr r2, r2, r7, lsl r3
-; CHECK-NEXT: lsl r5, r9, #1
-; CHECK-NEXT: lsrpl r2, r7, r12
-; CHECK-NEXT: lsl r0, r5, r6
-; CHECK-NEXT: subs r4, r6, #32
-; CHECK-NEXT: lsl r3, r8, #1
-; CHECK-NEXT: movwpl r0, #0
-; CHECK-NEXT: orr r3, r3, r9, lsr #31
-; CHECK-NEXT: orr r0, r0, r2
-; CHECK-NEXT: rsb r2, r6, #32
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: lsr r1, r7, r1
-; CHECK-NEXT: lsr r2, r5, r2
-; CHECK-NEXT: orr r2, r2, r3, lsl r6
-; CHECK-NEXT: lslpl r2, r5, r4
+; CHECK-NEXT: orr r3, r3, r7, lsr #5
+; CHECK-NEXT: mov r1, #31
+; CHECK-NEXT: ands r12, r0, #32
+; CHECK-NEXT: mov r5, r6
+; CHECK-NEXT: moveq r5, r3
+; CHECK-NEXT: bic r1, r1, r0
+; CHECK-NEXT: lsl r2, r5, #1
+; CHECK-NEXT: lsleq r3, r7, #27
; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: movwpl r1, #0
-; CHECK-NEXT: orr r1, r2, r1
-; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-NEXT: and r7, r0, #31
+; CHECK-NEXT: lsl r2, r2, r1
+; CHECK-NEXT: moveq r4, r6
+; CHECK-NEXT: orr r0, r2, r3, lsr r7
+; CHECK-NEXT: lsl r2, r4, #1
+; CHECK-NEXT: lsl r1, r2, r1
+; CHECK-NEXT: orr r1, r1, r5, lsr r7
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
ret i37 %f
}
diff --git a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll
index 49532f246838a..e17980e98e9b5 100644
--- a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll
@@ -76,59 +76,43 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
define i64 @rotl_i64(i64 %x, i64 %z) {
; CHECK-BE-LABEL: rotl_i64:
; CHECK-BE: # %bb.0:
-; CHECK-BE-NEXT: negu $1, $7
-; CHECK-BE-NEXT: andi $3, $1, 63
-; CHECK-BE-NEXT: srlv $6, $4, $3
-; CHECK-BE-NEXT: andi $1, $1, 32
-; CHECK-BE-NEXT: andi $2, $7, 63
-; CHECK-BE-NEXT: move $8, $6
-; CHECK-BE-NEXT: movn $8, $zero, $1
-; CHECK-BE-NEXT: sllv $9, $4, $2
-; CHECK-BE-NEXT: srl $10, $5, 1
-; CHECK-BE-NEXT: not $11, $2
-; CHECK-BE-NEXT: srlv $10, $10, $11
-; CHECK-BE-NEXT: or $9, $9, $10
-; CHECK-BE-NEXT: sllv $10, $5, $2
-; CHECK-BE-NEXT: andi $7, $7, 32
-; CHECK-BE-NEXT: movn $9, $10, $7
-; CHECK-BE-NEXT: or $2, $9, $8
-; CHECK-BE-NEXT: srlv $5, $5, $3
-; CHECK-BE-NEXT: not $3, $3
-; CHECK-BE-NEXT: sll $4, $4, 1
-; CHECK-BE-NEXT: sllv $3, $4, $3
-; CHECK-BE-NEXT: or $3, $3, $5
-; CHECK-BE-NEXT: movn $3, $6, $1
-; CHECK-BE-NEXT: movn $10, $zero, $7
+; CHECK-BE-NEXT: srl $1, $7, 5
+; CHECK-BE-NEXT: andi $1, $1, 1
+; CHECK-BE-NEXT: move $3, $4
+; CHECK-BE-NEXT: movn $3, $5, $1
+; CHECK-BE-NEXT: andi $6, $7, 31
+; CHECK-BE-NEXT: sllv $2, $3, $6
+; CHECK-BE-NEXT: movn $5, $4, $1
+; CHECK-BE-NEXT: srl $1, $5, 1
+; CHECK-BE-NEXT: not $4, $7
+; CHECK-BE-NEXT: andi $4, $4, 31
+; CHECK-BE-NEXT: srlv $1, $1, $4
+; CHECK-BE-NEXT: or $2, $2, $1
+; CHECK-BE-NEXT: sllv $1, $5, $6
+; CHECK-BE-NEXT: srl $3, $3, 1
+; CHECK-BE-NEXT: srlv $3, $3, $4
; CHECK-BE-NEXT: jr $ra
-; CHECK-BE-NEXT: or $3, $10, $3
+; CHECK-BE-NEXT: or $3, $1, $3
;
; CHECK-LE-LABEL: rotl_i64:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: negu $1, $6
-; CHECK-LE-NEXT: andi $2, $1, 63
-; CHECK-LE-NEXT: srlv $7, $5, $2
-; CHECK-LE-NEXT: andi $1, $1, 32
-; CHECK-LE-NEXT: andi $3, $6, 63
-; CHECK-LE-NEXT: move $8, $7
-; CHECK-LE-NEXT: movn $8, $zero, $1
-; CHECK-LE-NEXT: sllv $9, $5, $3
-; CHECK-LE-NEXT: srl $10, $4, 1
-; CHECK-LE-NEXT: not $11, $3
-; CHECK-LE-NEXT: srlv $10, $10, $11
-; CHECK-LE-NEXT: or $9, $9, $10
-; CHECK-LE-NEXT: sllv $10, $4, $3
-; CHECK-LE-NEXT: andi $6, $6, 32
-; CHECK-LE-NEXT: movn $9, $10, $6
-; CHECK-LE-NEXT: or $3, $9, $8
-; CHECK-LE-NEXT: srlv $4, $4, $2
-; CHECK-LE-NEXT: not $2, $2
-; CHECK-LE-NEXT: sll $5, $5, 1
-; CHECK-LE-NEXT: sllv $2, $5, $2
-; CHECK-LE-NEXT: or $2, $2, $4
-; CHECK-LE-NEXT: movn $2, $7, $1
-; CHECK-LE-NEXT: movn $10, $zero, $6
+; CHECK-LE-NEXT: srl $1, $6, 5
+; CHECK-LE-NEXT: andi $1, $1, 1
+; CHECK-LE-NEXT: move $3, $4
+; CHECK-LE-NEXT: movn $3, $5, $1
+; CHECK-LE-NEXT: andi $7, $6, 31
+; CHECK-LE-NEXT: sllv $2, $3, $7
+; CHECK-LE-NEXT: movn $5, $4, $1
+; CHECK-LE-NEXT: srl $1, $5, 1
+; CHECK-LE-NEXT: not $4, $6
+; CHECK-LE-NEXT: andi $4, $4, 31
+; CHECK-LE-NEXT: srlv $1, $1, $4
+; CHECK-LE-NEXT: or $2, $2, $1
+; CHECK-LE-NEXT: sllv $1, $5, $7
+; CHECK-LE-NEXT: srl $3, $3, 1
+; CHECK-LE-NEXT: srlv $3, $3, $4
; CHECK-LE-NEXT: jr $ra
-; CHECK-LE-NEXT: or $2, $10, $2
+; CHECK-LE-NEXT: or $3, $1, $3
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
ret i64 %f
}
@@ -254,59 +238,41 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK-BE-LABEL: rotr_i64:
; CHECK-BE: # %bb.0:
-; CHECK-BE-NEXT: negu $1, $7
-; CHECK-BE-NEXT: andi $2, $1, 63
-; CHECK-BE-NEXT: sllv $6, $5, $2
-; CHECK-BE-NEXT: andi $1, $1, 32
-; CHECK-BE-NEXT: andi $3, $7, 63
-; CHECK-BE-NEXT: move $8, $6
-; CHECK-BE-NEXT: movn $8, $zero, $1
-; CHECK-BE-NEXT: srlv $9, $5, $3
-; CHECK-BE-NEXT: sll $10, $4, 1
-; CHECK-BE-NEXT: not $11, $3
-; CHECK-BE-NEXT: sllv $10, $10, $11
-; CHECK-BE-NEXT: or $9, $10, $9
-; CHECK-BE-NEXT: srlv $10, $4, $3
-; CHECK-BE-NEXT: andi $7, $7, 32
-; CHECK-BE-NEXT: movn $9, $10, $7
-; CHECK-BE-NEXT: or $3, $9, $8
-; CHECK-BE-NEXT: sllv $4, $4, $2
-; CHECK-BE-NEXT: not $2, $2
-; CHECK-BE-NEXT: srl $5, $5, 1
-; CHECK-BE-NEXT: srlv $2, $5, $2
-; CHECK-BE-NEXT: or $2, $4, $2
-; CHECK-BE-NEXT: movn $2, $6, $1
-; CHECK-BE-NEXT: movn $10, $zero, $7
+; CHECK-BE-NEXT: andi $1, $7, 32
+; CHECK-BE-NEXT: move $3, $5
+; CHECK-BE-NEXT: movz $3, $4, $1
+; CHECK-BE-NEXT: andi $6, $7, 31
+; CHECK-BE-NEXT: srlv $2, $3, $6
+; CHECK-BE-NEXT: movz $4, $5, $1
+; CHECK-BE-NEXT: sll $1, $4, 1
+; CHECK-BE-NEXT: not $5, $7
+; CHECK-BE-NEXT: andi $5, $5, 31
+; CHECK-BE-NEXT: sllv $1, $1, $5
+; CHECK-BE-NEXT: or $2, $1, $2
+; CHECK-BE-NEXT: srlv $1, $4, $6
+; CHECK-BE-NEXT: sll $3, $3, 1
+; CHECK-BE-NEXT: sllv $3, $3, $5
; CHECK-BE-NEXT: jr $ra
-; CHECK-BE-NEXT: or $2, $10, $2
+; CHECK-BE-NEXT: or $3, $3, $1
;
; CHECK-LE-LABEL: rotr_i64:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: negu $1, $6
-; CHECK-LE-NEXT: andi $3, $1, 63
-; CHECK-LE-NEXT: sllv $7, $4, $3
-; CHECK-LE-NEXT: andi $1, $1, 32
-; CHECK-LE-NEXT: andi $2, $6, 63
-; CHECK-LE-NEXT: move $8, $7
-; CHECK-LE-NEXT: movn $8, $zero, $1
-; CHECK-LE-NEXT: srlv $9, $4, $2
-; CHECK-LE-NEXT: sll $10, $5, 1
-; CHECK-LE-NEXT: not $11, $2
-; CHECK-LE-NEXT: sllv $10, $10, $11
-; CHECK-LE-NEXT: or $9, $10, $9
-; CHECK-LE-NEXT: srlv $10, $5, $2
-; CHECK-LE-NEXT: andi $6, $6, 32
-; CHECK-LE-NEXT: movn $9, $10, $6
-; CHECK-LE-NEXT: or $2, $9, $8
-; CHECK-LE-NEXT: sllv $5, $5, $3
-; CHECK-LE-NEXT: not $3, $3
-; CHECK-LE-NEXT: srl $4, $4, 1
-; CHECK-LE-NEXT: srlv $3, $4, $3
-; CHECK-LE-NEXT: or $3, $5, $3
-; CHECK-LE-NEXT: movn $3, $7, $1
-; CHECK-LE-NEXT: movn $10, $zero, $6
+; CHECK-LE-NEXT: andi $1, $6, 32
+; CHECK-LE-NEXT: move $3, $5
+; CHECK-LE-NEXT: movz $3, $4, $1
+; CHECK-LE-NEXT: andi $7, $6, 31
+; CHECK-LE-NEXT: srlv $2, $3, $7
+; CHECK-LE-NEXT: movz $4, $5, $1
+; CHECK-LE-NEXT: sll $1, $4, 1
+; CHECK-LE-NEXT: not $5, $6
+; CHECK-LE-NEXT: andi $5, $5, 31
+; CHECK-LE-NEXT: sllv $1, $1, $5
+; CHECK-LE-NEXT: or $2, $1, $2
+; CHECK-LE-NEXT: srlv $1, $4, $7
+; CHECK-LE-NEXT: sll $3, $3, 1
+; CHECK-LE-NEXT: sllv $3, $3, $5
; CHECK-LE-NEXT: jr $ra
-; CHECK-LE-NEXT: or $3, $10, $3
+; CHECK-LE-NEXT: or $3, $3, $1
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
ret i64 %f
}
diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll
index 99029b7b9410c..d4f47318ebb18 100644
--- a/llvm/test/CodeGen/Mips/funnel-shift.ll
+++ b/llvm/test/CodeGen/Mips/funnel-shift.ll
@@ -72,37 +72,25 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: jal __umoddi3
; CHECK-BE-NEXT: addiu $7, $zero, 37
; CHECK-BE-NEXT: not $1, $3
-; CHECK-BE-NEXT: andi $2, $3, 63
-; CHECK-BE-NEXT: not $4, $2
-; CHECK-BE-NEXT: srl $5, $18, 1
-; CHECK-BE-NEXT: sllv $6, $19, $2
-; CHECK-BE-NEXT: srlv $4, $5, $4
-; CHECK-BE-NEXT: andi $5, $1, 63
-; CHECK-BE-NEXT: srl $7, $16, 5
-; CHECK-BE-NEXT: sll $8, $17, 27
-; CHECK-BE-NEXT: or $7, $8, $7
-; CHECK-BE-NEXT: srl $8, $7, 1
-; CHECK-BE-NEXT: srlv $9, $8, $5
-; CHECK-BE-NEXT: andi $1, $1, 32
-; CHECK-BE-NEXT: move $10, $9
-; CHECK-BE-NEXT: movn $10, $zero, $1
-; CHECK-BE-NEXT: or $4, $6, $4
-; CHECK-BE-NEXT: sllv $6, $18, $2
-; CHECK-BE-NEXT: andi $3, $3, 32
-; CHECK-BE-NEXT: movn $4, $6, $3
-; CHECK-BE-NEXT: sll $7, $7, 31
-; CHECK-BE-NEXT: sll $2, $16, 27
-; CHECK-BE-NEXT: srl $11, $2, 1
-; CHECK-BE-NEXT: or $2, $4, $10
-; CHECK-BE-NEXT: movn $6, $zero, $3
-; CHECK-BE-NEXT: or $3, $11, $7
-; CHECK-BE-NEXT: srlv $3, $3, $5
-; CHECK-BE-NEXT: not $4, $5
-; CHECK-BE-NEXT: sll $5, $8, 1
-; CHECK-BE-NEXT: sllv $4, $5, $4
-; CHECK-BE-NEXT: or $3, $4, $3
-; CHECK-BE-NEXT: movn $3, $9, $1
-; CHECK-BE-NEXT: or $3, $6, $3
+; CHECK-BE-NEXT: srl $2, $3, 5
+; CHECK-BE-NEXT: andi $4, $2, 1
+; CHECK-BE-NEXT: movn $19, $18, $4
+; CHECK-BE-NEXT: andi $3, $3, 31
+; CHECK-BE-NEXT: sllv $2, $19, $3
+; CHECK-BE-NEXT: andi $1, $1, 31
+; CHECK-BE-NEXT: srl $5, $16, 5
+; CHECK-BE-NEXT: sll $6, $17, 27
+; CHECK-BE-NEXT: or $5, $6, $5
+; CHECK-BE-NEXT: movn $18, $5, $4
+; CHECK-BE-NEXT: srl $6, $18, 1
+; CHECK-BE-NEXT: srlv $6, $6, $1
+; CHECK-BE-NEXT: or $2, $2, $6
+; CHECK-BE-NEXT: sllv $3, $18, $3
+; CHECK-BE-NEXT: sll $6, $16, 27
+; CHECK-BE-NEXT: movn $5, $6, $4
+; CHECK-BE-NEXT: srl $4, $5, 1
+; CHECK-BE-NEXT: srlv $1, $4, $1
+; CHECK-BE-NEXT: or $3, $3, $1
; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload
; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload
; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload
@@ -134,38 +122,27 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LE-NEXT: addiu $6, $zero, 37
; CHECK-LE-NEXT: jal __umoddi3
; CHECK-LE-NEXT: addiu $7, $zero, 0
-; CHECK-LE-NEXT: not $1, $2
-; CHECK-LE-NEXT: andi $3, $2, 63
-; CHECK-LE-NEXT: not $4, $3
-; CHECK-LE-NEXT: srl $5, $19, 1
-; CHECK-LE-NEXT: sllv $6, $18, $3
-; CHECK-LE-NEXT: srlv $4, $5, $4
-; CHECK-LE-NEXT: andi $5, $1, 63
-; CHECK-LE-NEXT: srl $7, $17, 5
-; CHECK-LE-NEXT: sll $8, $16, 27
-; CHECK-LE-NEXT: or $7, $8, $7
-; CHECK-LE-NEXT: srl $8, $7, 1
-; CHECK-LE-NEXT: srlv $9, $8, $5
-; CHECK-LE-NEXT: andi $1, $1, 32
-; CHECK-LE-NEXT: move $10, $9
-; CHECK-LE-NEXT: movn $10, $zero, $1
-; CHECK-LE-NEXT: or $4, $6, $4
-; CHECK-LE-NEXT: sllv $6, $19, $3
-; CHECK-LE-NEXT: andi $2, $2, 32
-; CHECK-LE-NEXT: movn $4, $6, $2
-; CHECK-LE-NEXT: sll $7, $7, 31
-; CHECK-LE-NEXT: sll $3, $17, 27
-; CHECK-LE-NEXT: srl $11, $3, 1
-; CHECK-LE-NEXT: or $3, $4, $10
-; CHECK-LE-NEXT: movn $6, $zero, $2
-; CHECK-LE-NEXT: or $2, $11, $7
-; CHECK-LE-NEXT: srlv $2, $2, $5
-; CHECK-LE-NEXT: not $4, $5
-; CHECK-LE-NEXT: sll $5, $8, 1
-; CHECK-LE-NEXT: sllv $4, $5, $4
-; CHECK-LE-NEXT: or $2, $4, $2
-; CHECK-LE-NEXT: movn $2, $9, $1
+; CHECK-LE-NEXT: srl $1, $2, 5
+; CHECK-LE-NEXT: andi $1, $1, 1
+; CHECK-LE-NEXT: srl $3, $17, 5
+; CHECK-LE-NEXT: sll $4, $16, 27
+; CHECK-LE-NEXT: or $3, $4, $3
+; CHECK-LE-NEXT: move $4, $19
+; CHECK-LE-NEXT: movn $4, $3, $1
+; CHECK-LE-NEXT: andi $5, $2, 31
+; CHECK-LE-NEXT: sllv $6, $4, $5
+; CHECK-LE-NEXT: not $2, $2
+; CHECK-LE-NEXT: andi $7, $2, 31
+; CHECK-LE-NEXT: sll $2, $17, 27
+; CHECK-LE-NEXT: movn $3, $2, $1
+; CHECK-LE-NEXT: srl $2, $3, 1
+; CHECK-LE-NEXT: srlv $2, $2, $7
; CHECK-LE-NEXT: or $2, $6, $2
+; CHECK-LE-NEXT: movn $18, $19, $1
+; CHECK-LE-NEXT: sllv $1, $18, $5
+; CHECK-LE-NEXT: srl $3, $4, 1
+; CHECK-LE-NEXT: srlv $3, $3, $7
+; CHECK-LE-NEXT: or $3, $1, $3
; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload
; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload
; CHECK-LE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload
@@ -250,15 +227,15 @@ define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
; CHECK-BE-NEXT: srl $1, $6, 23
; CHECK-BE-NEXT: sll $2, $5, 9
; CHECK-BE-NEXT: or $2, $2, $1
-; CHECK-BE-NEXT: sll $1, $6, 9
-; CHECK-BE-NEXT: srl $3, $7, 23
+; CHECK-BE-NEXT: srl $1, $7, 23
+; CHECK-BE-NEXT: sll $3, $6, 9
; CHECK-BE-NEXT: jr $ra
; CHECK-BE-NEXT: or $3, $3, $1
;
; CHECK-LE-LABEL: fshl_i64_const_overshift:
; CHECK-LE: # %bb.0:
-; CHECK-LE-NEXT: sll $1, $7, 9
-; CHECK-LE-NEXT: srl $2, $6, 23
+; CHECK-LE-NEXT: srl $1, $6, 23
+; CHECK-LE-NEXT: sll $2, $7, 9
; CHECK-LE-NEXT: or $2, $2, $1
; CHECK-LE-NEXT: srl $1, $7, 23
; CHECK-LE-NEXT: sll $3, $4, 9
@@ -338,40 +315,25 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: jal __umoddi3
; CHECK-BE-NEXT: addiu $7, $zero, 37
; CHECK-BE-NEXT: addiu $1, $3, 27
-; CHECK-BE-NEXT: andi $2, $1, 63
-; CHECK-BE-NEXT: not $3, $2
-; CHECK-BE-NEXT: srl $4, $16, 5
-; CHECK-BE-NEXT: sll $5, $17, 27
-; CHECK-BE-NEXT: or $4, $5, $4
-; CHECK-BE-NEXT: sll $5, $4, 1
-; CHECK-BE-NEXT: sll $6, $16, 27
-; CHECK-BE-NEXT: srlv $6, $6, $2
-; CHECK-BE-NEXT: sllv $3, $5, $3
-; CHECK-BE-NEXT: not $5, $1
-; CHECK-BE-NEXT: andi $7, $5, 63
-; CHECK-BE-NEXT: sll $8, $18, 1
-; CHECK-BE-NEXT: sllv $8, $8, $7
-; CHECK-BE-NEXT: andi $5, $5, 32
-; CHECK-BE-NEXT: move $9, $8
-; CHECK-BE-NEXT: movn $9, $zero, $5
-; CHECK-BE-NEXT: or $3, $3, $6
-; CHECK-BE-NEXT: srlv $2, $4, $2
-; CHECK-BE-NEXT: andi $1, $1, 32
-; CHECK-BE-NEXT: movn $3, $2, $1
-; CHECK-BE-NEXT: srl $4, $18, 31
+; CHECK-BE-NEXT: andi $3, $1, 32
+; CHECK-BE-NEXT: srl $2, $16, 5
+; CHECK-BE-NEXT: sll $4, $17, 27
+; CHECK-BE-NEXT: or $4, $4, $2
+; CHECK-BE-NEXT: movz $19, $18, $3
+; CHECK-BE-NEXT: movz $18, $4, $3
+; CHECK-BE-NEXT: andi $5, $1, 31
+; CHECK-BE-NEXT: srlv $2, $18, $5
+; CHECK-BE-NEXT: not $1, $1
+; CHECK-BE-NEXT: andi $1, $1, 31
; CHECK-BE-NEXT: sll $6, $19, 1
-; CHECK-BE-NEXT: or $4, $6, $4
-; CHECK-BE-NEXT: or $3, $9, $3
-; CHECK-BE-NEXT: movn $2, $zero, $1
-; CHECK-BE-NEXT: sllv $1, $4, $7
-; CHECK-BE-NEXT: not $4, $7
-; CHECK-BE-NEXT: lui $6, 32767
-; CHECK-BE-NEXT: ori $6, $6, 65535
-; CHECK-BE-NEXT: and $6, $18, $6
-; CHECK-BE-NEXT: srlv $4, $6, $4
-; CHECK-BE-NEXT: or $1, $1, $4
-; CHECK-BE-NEXT: movn $1, $8, $5
-; CHECK-BE-NEXT: or $2, $1, $2
+; CHECK-BE-NEXT: sllv $6, $6, $1
+; CHECK-BE-NEXT: or $2, $6, $2
+; CHECK-BE-NEXT: sll $6, $16, 27
+; CHECK-BE-NEXT: movz $4, $6, $3
+; CHECK-BE-NEXT: srlv $3, $4, $5
+; CHECK-BE-NEXT: sll $4, $18, 1
+; CHECK-BE-NEXT: sllv $1, $4, $1
+; CHECK-BE-NEXT: or $3, $1, $3
; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload
; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload
; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload
@@ -404,39 +366,25 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LE-NEXT: jal __umoddi3
; CHECK-LE-NEXT: addiu $7, $zero, 0
; CHECK-LE-NEXT: addiu $1, $2, 27
-; CHECK-LE-NEXT: andi $2, $1, 63
-; CHECK-LE-NEXT: not $3, $2
-; CHECK-LE-NEXT: srl $4, $17, 5
-; CHECK-LE-NEXT: sll $5, $16, 27
-; CHECK-LE-NEXT: or $4, $5, $4
-; CHECK-LE-NEXT: sll $5, $4, 1
-; CHECK-LE-NEXT: sll $6, $17, 27
-; CHECK-LE-NEXT: srlv $6, $6, $2
-; CHECK-LE-NEXT: sllv $3, $5, $3
-; CHECK-LE-NEXT: not $5, $1
-; CHECK-LE-NEXT: andi $7, $5, 63
-; CHECK-LE-NEXT: sll $8, $19, 1
-; CHECK-LE-NEXT: sllv $8, $8, $7
-; CHECK-LE-NEXT: andi $5, $5, 32
-; CHECK-LE-NEXT: move $9, $8
-; CHECK-LE-NEXT: movn $9, $zero, $5
-; CHECK-LE-NEXT: or $3, $3, $6
-; CHECK-LE-NEXT: srlv $4, $4, $2
-; CHECK-LE-NEXT: andi $1, $1, 32
-; CHECK-LE-NEXT: movn $3, $4, $1
-; CHECK-LE-NEXT: srl $2, $19, 31
-; CHECK-LE-NEXT: sll $6, $18, 1
-; CHECK-LE-NEXT: or $6, $6, $2
-; CHECK-LE-NEXT: or $2, $9, $3
-; CHECK-LE-NEXT: movn $4, $zero, $1
-; CHECK-LE-NEXT: sllv $1, $6, $7
-; CHECK-LE-NEXT: not $3, $7
-; CHECK-LE-NEXT: lui $6, 32767
-; CHECK-LE-NEXT: ori $6, $6, 65535
-; CHECK-LE-NEXT: and $6, $19, $6
-; CHECK-LE-NEXT: srlv $3, $6, $3
-; CHECK-LE-NEXT: or $1, $1, $3
-; CHECK-LE-NEXT: movn $1, $8, $5
+; CHECK-LE-NEXT: andi $3, $1, 32
+; CHECK-LE-NEXT: srl $2, $17, 5
+; CHECK-LE-NEXT: sll $4, $16, 27
+; CHECK-LE-NEXT: or $2, $4, $2
+; CHECK-LE-NEXT: sll $4, $17, 27
+; CHECK-LE-NEXT: move $5, $19
+; CHECK-LE-NEXT: movz $5, $2, $3
+; CHECK-LE-NEXT: movz $2, $4, $3
+; CHECK-LE-NEXT: andi $4, $1, 31
+; CHECK-LE-NEXT: srlv $2, $2, $4
+; CHECK-LE-NEXT: not $1, $1
+; CHECK-LE-NEXT: andi $1, $1, 31
+; CHECK-LE-NEXT: sll $6, $5, 1
+; CHECK-LE-NEXT: sllv $6, $6, $1
+; CHECK-LE-NEXT: or $2, $6, $2
+; CHECK-LE-NEXT: srlv $4, $5, $4
+; CHECK-LE-NEXT: movz $18, $19, $3
+; CHECK-LE-NEXT: sll $3, $18, 1
+; CHECK-LE-NEXT: sllv $1, $3, $1
; CHECK-LE-NEXT: or $3, $1, $4
; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload
; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index c44304d1c8ed5..0a622fd68d6b3 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -87,52 +87,44 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
define i64 @rotl_i64(i64 %x, i64 %z) {
; CHECK32_32-LABEL: rotl_i64:
; CHECK32_32: # %bb.0:
-; CHECK32_32-NEXT: clrlwi 5, 6, 26
-; CHECK32_32-NEXT: subfic 8, 5, 32
-; CHECK32_32-NEXT: neg 6, 6
-; CHECK32_32-NEXT: slw 7, 3, 5
-; CHECK32_32-NEXT: addi 9, 5, -32
-; CHECK32_32-NEXT: srw 8, 4, 8
-; CHECK32_32-NEXT: clrlwi 6, 6, 26
-; CHECK32_32-NEXT: slw 9, 4, 9
-; CHECK32_32-NEXT: or 7, 7, 8
-; CHECK32_32-NEXT: subfic 8, 6, 32
-; CHECK32_32-NEXT: or 7, 7, 9
-; CHECK32_32-NEXT: addi 9, 6, -32
-; CHECK32_32-NEXT: slw 8, 3, 8
-; CHECK32_32-NEXT: srw 9, 3, 9
-; CHECK32_32-NEXT: srw 3, 3, 6
-; CHECK32_32-NEXT: srw 6, 4, 6
-; CHECK32_32-NEXT: or 6, 6, 8
-; CHECK32_32-NEXT: or 6, 6, 9
-; CHECK32_32-NEXT: slw 4, 4, 5
-; CHECK32_32-NEXT: or 3, 7, 3
-; CHECK32_32-NEXT: or 4, 4, 6
+; CHECK32_32-NEXT: andi. 5, 6, 32
+; CHECK32_32-NEXT: clrlwi 5, 6, 27
+; CHECK32_32-NEXT: subfic 6, 5, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB4_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 7, 3, 0
+; CHECK32_32-NEXT: ori 3, 4, 0
+; CHECK32_32-NEXT: b .LBB4_3
+; CHECK32_32-NEXT: .LBB4_2:
+; CHECK32_32-NEXT: addi 7, 4, 0
+; CHECK32_32-NEXT: .LBB4_3:
+; CHECK32_32-NEXT: srw 4, 7, 6
+; CHECK32_32-NEXT: slw 8, 3, 5
+; CHECK32_32-NEXT: srw 6, 3, 6
+; CHECK32_32-NEXT: slw 5, 7, 5
+; CHECK32_32-NEXT: or 3, 8, 4
+; CHECK32_32-NEXT: or 4, 5, 6
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotl_i64:
; CHECK32_64: # %bb.0:
-; CHECK32_64-NEXT: clrlwi 5, 6, 26
-; CHECK32_64-NEXT: neg 6, 6
-; CHECK32_64-NEXT: subfic 8, 5, 32
-; CHECK32_64-NEXT: slw 7, 3, 5
-; CHECK32_64-NEXT: clrlwi 6, 6, 26
-; CHECK32_64-NEXT: srw 8, 4, 8
-; CHECK32_64-NEXT: addi 9, 5, -32
-; CHECK32_64-NEXT: or 7, 7, 8
-; CHECK32_64-NEXT: subfic 8, 6, 32
-; CHECK32_64-NEXT: slw 5, 4, 5
-; CHECK32_64-NEXT: slw 9, 4, 9
-; CHECK32_64-NEXT: srw 10, 3, 6
-; CHECK32_64-NEXT: srw 4, 4, 6
-; CHECK32_64-NEXT: addi 6, 6, -32
-; CHECK32_64-NEXT: slw 8, 3, 8
-; CHECK32_64-NEXT: srw 3, 3, 6
-; CHECK32_64-NEXT: or 4, 4, 8
-; CHECK32_64-NEXT: or 6, 7, 9
-; CHECK32_64-NEXT: or 4, 4, 3
-; CHECK32_64-NEXT: or 3, 6, 10
-; CHECK32_64-NEXT: or 4, 5, 4
+; CHECK32_64-NEXT: andi. 5, 6, 32
+; CHECK32_64-NEXT: clrlwi 5, 6, 27
+; CHECK32_64-NEXT: bc 12, 2, .LBB4_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 7, 3, 0
+; CHECK32_64-NEXT: ori 3, 4, 0
+; CHECK32_64-NEXT: b .LBB4_3
+; CHECK32_64-NEXT: .LBB4_2:
+; CHECK32_64-NEXT: addi 7, 4, 0
+; CHECK32_64-NEXT: .LBB4_3:
+; CHECK32_64-NEXT: subfic 6, 5, 32
+; CHECK32_64-NEXT: srw 4, 7, 6
+; CHECK32_64-NEXT: slw 8, 3, 5
+; CHECK32_64-NEXT: srw 6, 3, 6
+; CHECK32_64-NEXT: slw 5, 7, 5
+; CHECK32_64-NEXT: or 3, 8, 4
+; CHECK32_64-NEXT: or 4, 5, 6
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotl_i64:
@@ -256,52 +248,44 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK32_32-LABEL: rotr_i64:
; CHECK32_32: # %bb.0:
-; CHECK32_32-NEXT: clrlwi 5, 6, 26
-; CHECK32_32-NEXT: subfic 8, 5, 32
-; CHECK32_32-NEXT: neg 6, 6
-; CHECK32_32-NEXT: srw 7, 4, 5
-; CHECK32_32-NEXT: addi 9, 5, -32
-; CHECK32_32-NEXT: slw 8, 3, 8
-; CHECK32_32-NEXT: clrlwi 6, 6, 26
-; CHECK32_32-NEXT: srw 9, 3, 9
-; CHECK32_32-NEXT: or 7, 7, 8
-; CHECK32_32-NEXT: subfic 8, 6, 32
-; CHECK32_32-NEXT: or 7, 7, 9
-; CHECK32_32-NEXT: addi 9, 6, -32
-; CHECK32_32-NEXT: srw 8, 4, 8
-; CHECK32_32-NEXT: slw 9, 4, 9
-; CHECK32_32-NEXT: slw 4, 4, 6
-; CHECK32_32-NEXT: slw 6, 3, 6
-; CHECK32_32-NEXT: or 6, 6, 8
-; CHECK32_32-NEXT: or 6, 6, 9
-; CHECK32_32-NEXT: srw 3, 3, 5
-; CHECK32_32-NEXT: or 4, 7, 4
-; CHECK32_32-NEXT: or 3, 3, 6
+; CHECK32_32-NEXT: andi. 5, 6, 32
+; CHECK32_32-NEXT: clrlwi 5, 6, 27
+; CHECK32_32-NEXT: subfic 6, 5, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB11_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 7, 4, 0
+; CHECK32_32-NEXT: b .LBB11_3
+; CHECK32_32-NEXT: .LBB11_2:
+; CHECK32_32-NEXT: addi 7, 3, 0
+; CHECK32_32-NEXT: addi 3, 4, 0
+; CHECK32_32-NEXT: .LBB11_3:
+; CHECK32_32-NEXT: srw 4, 7, 5
+; CHECK32_32-NEXT: slw 8, 3, 6
+; CHECK32_32-NEXT: srw 5, 3, 5
+; CHECK32_32-NEXT: slw 6, 7, 6
+; CHECK32_32-NEXT: or 3, 8, 4
+; CHECK32_32-NEXT: or 4, 6, 5
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: rotr_i64:
; CHECK32_64: # %bb.0:
-; CHECK32_64-NEXT: clrlwi 5, 6, 26
-; CHECK32_64-NEXT: neg 6, 6
-; CHECK32_64-NEXT: subfic 8, 5, 32
-; CHECK32_64-NEXT: srw 7, 4, 5
-; CHECK32_64-NEXT: clrlwi 6, 6, 26
-; CHECK32_64-NEXT: slw 8, 3, 8
-; CHECK32_64-NEXT: addi 9, 5, -32
-; CHECK32_64-NEXT: or 7, 7, 8
-; CHECK32_64-NEXT: subfic 8, 6, 32
+; CHECK32_64-NEXT: andi. 5, 6, 32
+; CHECK32_64-NEXT: clrlwi 5, 6, 27
+; CHECK32_64-NEXT: bc 12, 2, .LBB11_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 7, 4, 0
+; CHECK32_64-NEXT: b .LBB11_3
+; CHECK32_64-NEXT: .LBB11_2:
+; CHECK32_64-NEXT: addi 7, 3, 0
+; CHECK32_64-NEXT: addi 3, 4, 0
+; CHECK32_64-NEXT: .LBB11_3:
+; CHECK32_64-NEXT: subfic 6, 5, 32
+; CHECK32_64-NEXT: srw 4, 7, 5
+; CHECK32_64-NEXT: slw 8, 3, 6
; CHECK32_64-NEXT: srw 5, 3, 5
-; CHECK32_64-NEXT: srw 9, 3, 9
-; CHECK32_64-NEXT: slw 10, 4, 6
-; CHECK32_64-NEXT: slw 3, 3, 6
-; CHECK32_64-NEXT: addi 6, 6, -32
-; CHECK32_64-NEXT: srw 8, 4, 8
-; CHECK32_64-NEXT: slw 4, 4, 6
-; CHECK32_64-NEXT: or 3, 3, 8
-; CHECK32_64-NEXT: or 6, 7, 9
-; CHECK32_64-NEXT: or 3, 3, 4
-; CHECK32_64-NEXT: or 4, 6, 10
-; CHECK32_64-NEXT: or 3, 5, 3
+; CHECK32_64-NEXT: slw 6, 7, 6
+; CHECK32_64-NEXT: or 3, 8, 4
+; CHECK32_64-NEXT: or 4, 6, 5
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: rotr_i64:
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index c33904082f238..62b68e0b2cadd 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -43,58 +43,47 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
; CHECK32_32-LABEL: fshl_i64:
; CHECK32_32: # %bb.0:
-; CHECK32_32-NEXT: clrlwi 7, 8, 26
-; CHECK32_32-NEXT: not 8, 8
-; CHECK32_32-NEXT: rotlwi 6, 6, 31
-; CHECK32_32-NEXT: subfic 10, 7, 32
-; CHECK32_32-NEXT: srwi 9, 5, 1
+; CHECK32_32-NEXT: andi. 7, 8, 32
+; CHECK32_32-NEXT: clrlwi 7, 8, 27
+; CHECK32_32-NEXT: subfic 8, 7, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB1_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 9, 5, 0
+; CHECK32_32-NEXT: ori 3, 4, 0
+; CHECK32_32-NEXT: ori 4, 6, 0
+; CHECK32_32-NEXT: b .LBB1_3
+; CHECK32_32-NEXT: .LBB1_2:
+; CHECK32_32-NEXT: addi 9, 4, 0
+; CHECK32_32-NEXT: addi 4, 5, 0
+; CHECK32_32-NEXT: .LBB1_3:
+; CHECK32_32-NEXT: srw 5, 9, 8
; CHECK32_32-NEXT: slw 3, 3, 7
-; CHECK32_32-NEXT: clrlwi 8, 8, 26
-; CHECK32_32-NEXT: rlwimi 6, 5, 31, 0, 0
-; CHECK32_32-NEXT: srw 5, 4, 10
-; CHECK32_32-NEXT: srw 10, 9, 8
-; CHECK32_32-NEXT: srw 6, 6, 8
+; CHECK32_32-NEXT: srw 4, 4, 8
+; CHECK32_32-NEXT: slw 6, 9, 7
; CHECK32_32-NEXT: or 3, 3, 5
-; CHECK32_32-NEXT: subfic 5, 8, 32
-; CHECK32_32-NEXT: addi 8, 8, -32
-; CHECK32_32-NEXT: slw 5, 9, 5
-; CHECK32_32-NEXT: srw 8, 9, 8
-; CHECK32_32-NEXT: addi 9, 7, -32
-; CHECK32_32-NEXT: slw 9, 4, 9
-; CHECK32_32-NEXT: or 5, 6, 5
-; CHECK32_32-NEXT: or 3, 3, 9
-; CHECK32_32-NEXT: or 5, 5, 8
-; CHECK32_32-NEXT: slw 4, 4, 7
-; CHECK32_32-NEXT: or 3, 3, 10
-; CHECK32_32-NEXT: or 4, 4, 5
+; CHECK32_32-NEXT: or 4, 6, 4
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: fshl_i64:
; CHECK32_64: # %bb.0:
-; CHECK32_64-NEXT: clrlwi 7, 8, 26
-; CHECK32_64-NEXT: not 8, 8
-; CHECK32_64-NEXT: subfic 9, 7, 32
-; CHECK32_64-NEXT: rotlwi 6, 6, 31
+; CHECK32_64-NEXT: andi. 7, 8, 32
+; CHECK32_64-NEXT: clrlwi 7, 8, 27
+; CHECK32_64-NEXT: bc 12, 2, .LBB1_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 9, 5, 0
+; CHECK32_64-NEXT: ori 3, 4, 0
+; CHECK32_64-NEXT: ori 5, 6, 0
+; CHECK32_64-NEXT: b .LBB1_3
+; CHECK32_64-NEXT: .LBB1_2:
+; CHECK32_64-NEXT: addi 9, 4, 0
+; CHECK32_64-NEXT: .LBB1_3:
+; CHECK32_64-NEXT: subfic 8, 7, 32
+; CHECK32_64-NEXT: srw 4, 9, 8
; CHECK32_64-NEXT: slw 3, 3, 7
-; CHECK32_64-NEXT: clrlwi 8, 8, 26
-; CHECK32_64-NEXT: srw 9, 4, 9
-; CHECK32_64-NEXT: rlwimi 6, 5, 31, 0, 0
-; CHECK32_64-NEXT: srwi 5, 5, 1
-; CHECK32_64-NEXT: addi 10, 7, -32
-; CHECK32_64-NEXT: or 3, 3, 9
-; CHECK32_64-NEXT: subfic 9, 8, 32
-; CHECK32_64-NEXT: slw 7, 4, 7
-; CHECK32_64-NEXT: slw 4, 4, 10
-; CHECK32_64-NEXT: srw 10, 5, 8
-; CHECK32_64-NEXT: srw 6, 6, 8
-; CHECK32_64-NEXT: addi 8, 8, -32
-; CHECK32_64-NEXT: slw 9, 5, 9
; CHECK32_64-NEXT: srw 5, 5, 8
-; CHECK32_64-NEXT: or 6, 6, 9
+; CHECK32_64-NEXT: slw 6, 9, 7
; CHECK32_64-NEXT: or 3, 3, 4
; CHECK32_64-NEXT: or 4, 6, 5
-; CHECK32_64-NEXT: or 3, 3, 10
-; CHECK32_64-NEXT: or 4, 7, 4
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: fshl_i64:
@@ -112,387 +101,128 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
; CHECK32_32-LABEL: fshl_i128:
; CHECK32_32: # %bb.0:
-; CHECK32_32-NEXT: stwu 1, -64(1)
-; CHECK32_32-NEXT: lwz 0, 84(1)
-; CHECK32_32-NEXT: rotlwi 12, 8, 31
-; CHECK32_32-NEXT: srwi 11, 7, 1
-; CHECK32_32-NEXT: rlwimi 12, 7, 31, 0, 0
-; CHECK32_32-NEXT: andi. 7, 0, 127
-; CHECK32_32-NEXT: stw 27, 44(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: rotlwi 10, 10, 31
-; CHECK32_32-NEXT: stw 30, 56(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: rotlwi 30, 9, 31
-; CHECK32_32-NEXT: subfic 27, 7, 32
-; CHECK32_32-NEXT: stw 22, 24(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: rlwimi 10, 9, 31, 0, 0
-; CHECK32_32-NEXT: stw 25, 36(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: rlwimi 30, 8, 31, 0, 0
-; CHECK32_32-NEXT: stw 28, 48(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: not 8, 0
-; CHECK32_32-NEXT: subfic 9, 7, 96
-; CHECK32_32-NEXT: addi 0, 7, -64
-; CHECK32_32-NEXT: slw 28, 3, 7
-; CHECK32_32-NEXT: subfic 25, 7, 64
-; CHECK32_32-NEXT: srw 22, 4, 27
-; CHECK32_32-NEXT: stw 20, 16(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: srw 9, 6, 9
-; CHECK32_32-NEXT: stw 23, 28(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: slw 23, 5, 0
-; CHECK32_32-NEXT: stw 29, 52(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: addi 29, 7, -96
-; CHECK32_32-NEXT: srw 20, 5, 25
-; CHECK32_32-NEXT: or 28, 28, 22
-; CHECK32_32-NEXT: srw 22, 6, 25
-; CHECK32_32-NEXT: subfic 25, 25, 32
-; CHECK32_32-NEXT: stw 24, 32(1) # 4-byte Folded Spill
+; CHECK32_32-NEXT: lwz 11, 20(1)
+; CHECK32_32-NEXT: andi. 12, 11, 64
; CHECK32_32-NEXT: mcrf 1, 0
-; CHECK32_32-NEXT: stw 26, 40(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: addi 26, 7, -32
-; CHECK32_32-NEXT: andi. 8, 8, 127
-; CHECK32_32-NEXT: slw 24, 5, 7
-; CHECK32_32-NEXT: slw 29, 6, 29
-; CHECK32_32-NEXT: or 9, 23, 9
-; CHECK32_32-NEXT: slw 25, 5, 25
-; CHECK32_32-NEXT: srw 5, 5, 27
-; CHECK32_32-NEXT: srw 27, 6, 27
-; CHECK32_32-NEXT: stw 21, 20(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: slw 21, 4, 26
-; CHECK32_32-NEXT: subfic 23, 8, 32
-; CHECK32_32-NEXT: or 27, 24, 27
-; CHECK32_32-NEXT: subfic 24, 8, 96
-; CHECK32_32-NEXT: or 9, 9, 29
-; CHECK32_32-NEXT: addi 29, 8, -64
-; CHECK32_32-NEXT: or 25, 22, 25
-; CHECK32_32-NEXT: stw 19, 12(1) # 4-byte Folded Spill
-; CHECK32_32-NEXT: srw 19, 12, 8
-; CHECK32_32-NEXT: or 28, 28, 21
-; CHECK32_32-NEXT: slw 21, 11, 23
-; CHECK32_32-NEXT: slw 24, 11, 24
-; CHECK32_32-NEXT: srw 22, 12, 29
-; CHECK32_32-NEXT: slw 26, 6, 26
-; CHECK32_32-NEXT: or 5, 25, 5
-; CHECK32_32-NEXT: addi 25, 8, -96
-; CHECK32_32-NEXT: or 21, 19, 21
-; CHECK32_32-NEXT: srw 19, 10, 8
-; CHECK32_32-NEXT: or 24, 22, 24
-; CHECK32_32-NEXT: slw 22, 30, 23
-; CHECK32_32-NEXT: or 27, 27, 26
-; CHECK32_32-NEXT: addi 26, 8, -32
-; CHECK32_32-NEXT: srw 25, 11, 25
-; CHECK32_32-NEXT: or 22, 19, 22
-; CHECK32_32-NEXT: or 28, 28, 20
-; CHECK32_32-NEXT: srw 20, 11, 26
-; CHECK32_32-NEXT: or 25, 24, 25
-; CHECK32_32-NEXT: subfic 24, 8, 64
-; CHECK32_32-NEXT: srw 26, 30, 26
-; CHECK32_32-NEXT: or 26, 22, 26
-; CHECK32_32-NEXT: subfic 22, 24, 32
-; CHECK32_32-NEXT: slw 23, 12, 23
-; CHECK32_32-NEXT: srw 22, 12, 22
-; CHECK32_32-NEXT: slw 12, 12, 24
-; CHECK32_32-NEXT: slw 24, 11, 24
-; CHECK32_32-NEXT: cmplwi 5, 7, 64
-; CHECK32_32-NEXT: or 24, 24, 22
-; CHECK32_32-NEXT: slw 22, 6, 0
-; CHECK32_32-NEXT: slw 6, 6, 7
-; CHECK32_32-NEXT: slw 7, 4, 7
-; CHECK32_32-NEXT: srw 29, 11, 29
-; CHECK32_32-NEXT: srw 11, 11, 8
-; CHECK32_32-NEXT: cmplwi 6, 8, 64
-; CHECK32_32-NEXT: srw 8, 30, 8
-; CHECK32_32-NEXT: or 5, 7, 5
-; CHECK32_32-NEXT: or 7, 26, 12
-; CHECK32_32-NEXT: or 12, 24, 23
-; CHECK32_32-NEXT: bc 12, 20, .LBB2_1
-; CHECK32_32-NEXT: b .LBB2_2
-; CHECK32_32-NEXT: .LBB2_1:
-; CHECK32_32-NEXT: addi 9, 28, 0
+; CHECK32_32-NEXT: andi. 12, 11, 32
+; CHECK32_32-NEXT: clrlwi 11, 11, 27
+; CHECK32_32-NEXT: bc 12, 6, .LBB2_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 4, 6, 0
+; CHECK32_32-NEXT: ori 12, 7, 0
+; CHECK32_32-NEXT: ori 3, 5, 0
+; CHECK32_32-NEXT: ori 5, 8, 0
+; CHECK32_32-NEXT: ori 6, 9, 0
+; CHECK32_32-NEXT: ori 7, 10, 0
+; CHECK32_32-NEXT: b .LBB2_3
; CHECK32_32-NEXT: .LBB2_2:
-; CHECK32_32-NEXT: li 28, 0
-; CHECK32_32-NEXT: bc 12, 20, .LBB2_4
-; CHECK32_32-NEXT: # %bb.3:
-; CHECK32_32-NEXT: ori 5, 22, 0
-; CHECK32_32-NEXT: b .LBB2_4
-; CHECK32_32-NEXT: .LBB2_4:
-; CHECK32_32-NEXT: bc 12, 24, .LBB2_6
-; CHECK32_32-NEXT: # %bb.5:
-; CHECK32_32-NEXT: ori 7, 25, 0
+; CHECK32_32-NEXT: addi 12, 5, 0
+; CHECK32_32-NEXT: addi 5, 6, 0
+; CHECK32_32-NEXT: addi 6, 7, 0
+; CHECK32_32-NEXT: addi 7, 8, 0
+; CHECK32_32-NEXT: .LBB2_3:
+; CHECK32_32-NEXT: subfic 8, 11, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB2_5
+; CHECK32_32-NEXT: # %bb.4:
+; CHECK32_32-NEXT: ori 9, 12, 0
+; CHECK32_32-NEXT: ori 3, 4, 0
+; CHECK32_32-NEXT: ori 4, 5, 0
+; CHECK32_32-NEXT: ori 5, 6, 0
+; CHECK32_32-NEXT: ori 6, 7, 0
; CHECK32_32-NEXT: b .LBB2_6
+; CHECK32_32-NEXT: .LBB2_5:
+; CHECK32_32-NEXT: addi 9, 4, 0
+; CHECK32_32-NEXT: addi 4, 12, 0
; CHECK32_32-NEXT: .LBB2_6:
-; CHECK32_32-NEXT: or 8, 8, 12
-; CHECK32_32-NEXT: or 21, 21, 20
-; CHECK32_32-NEXT: bc 12, 20, .LBB2_8
-; CHECK32_32-NEXT: # %bb.7:
-; CHECK32_32-NEXT: ori 6, 28, 0
-; CHECK32_32-NEXT: b .LBB2_8
-; CHECK32_32-NEXT: .LBB2_8:
-; CHECK32_32-NEXT: bc 12, 6, .LBB2_10
-; CHECK32_32-NEXT: # %bb.9:
-; CHECK32_32-NEXT: ori 4, 5, 0
-; CHECK32_32-NEXT: b .LBB2_10
-; CHECK32_32-NEXT: .LBB2_10:
-; CHECK32_32-NEXT: bc 12, 2, .LBB2_12
-; CHECK32_32-NEXT: # %bb.11:
-; CHECK32_32-NEXT: ori 5, 7, 0
-; CHECK32_32-NEXT: b .LBB2_13
-; CHECK32_32-NEXT: .LBB2_12:
-; CHECK32_32-NEXT: addi 5, 10, 0
-; CHECK32_32-NEXT: .LBB2_13:
-; CHECK32_32-NEXT: bc 12, 24, .LBB2_15
-; CHECK32_32-NEXT: # %bb.14:
-; CHECK32_32-NEXT: ori 7, 29, 0
-; CHECK32_32-NEXT: ori 11, 28, 0
-; CHECK32_32-NEXT: ori 0, 28, 0
-; CHECK32_32-NEXT: b .LBB2_16
-; CHECK32_32-NEXT: .LBB2_15:
-; CHECK32_32-NEXT: addi 7, 8, 0
-; CHECK32_32-NEXT: addi 0, 21, 0
-; CHECK32_32-NEXT: .LBB2_16:
-; CHECK32_32-NEXT: bc 12, 6, .LBB2_18
-; CHECK32_32-NEXT: # %bb.17:
-; CHECK32_32-NEXT: ori 3, 9, 0
-; CHECK32_32-NEXT: b .LBB2_18
-; CHECK32_32-NEXT: .LBB2_18:
-; CHECK32_32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: or 6, 6, 5
-; CHECK32_32-NEXT: bc 12, 20, .LBB2_20
-; CHECK32_32-NEXT: # %bb.19:
-; CHECK32_32-NEXT: ori 5, 28, 0
-; CHECK32_32-NEXT: b .LBB2_21
-; CHECK32_32-NEXT: .LBB2_20:
-; CHECK32_32-NEXT: addi 5, 27, 0
-; CHECK32_32-NEXT: .LBB2_21:
-; CHECK32_32-NEXT: bc 12, 2, .LBB2_22
-; CHECK32_32-NEXT: b .LBB2_23
-; CHECK32_32-NEXT: .LBB2_22:
-; CHECK32_32-NEXT: addi 7, 30, 0
-; CHECK32_32-NEXT: .LBB2_23:
-; CHECK32_32-NEXT: or 3, 3, 11
-; CHECK32_32-NEXT: or 4, 4, 0
-; CHECK32_32-NEXT: or 5, 5, 7
-; CHECK32_32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 21, 20(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 20, 16(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: lwz 19, 12(1) # 4-byte Folded Reload
-; CHECK32_32-NEXT: addi 1, 1, 64
+; CHECK32_32-NEXT: srw 7, 9, 8
+; CHECK32_32-NEXT: slw 3, 3, 11
+; CHECK32_32-NEXT: srw 10, 4, 8
+; CHECK32_32-NEXT: slw 9, 9, 11
+; CHECK32_32-NEXT: srw 12, 5, 8
+; CHECK32_32-NEXT: slw 0, 4, 11
+; CHECK32_32-NEXT: srw 6, 6, 8
+; CHECK32_32-NEXT: slw 8, 5, 11
+; CHECK32_32-NEXT: or 3, 3, 7
+; CHECK32_32-NEXT: or 4, 9, 10
+; CHECK32_32-NEXT: or 5, 0, 12
+; CHECK32_32-NEXT: or 6, 8, 6
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: fshl_i128:
; CHECK32_64: # %bb.0:
-; CHECK32_64-NEXT: stwu 1, -64(1)
-; CHECK32_64-NEXT: lwz 12, 84(1)
-; CHECK32_64-NEXT: rotlwi 11, 8, 31
-; CHECK32_64-NEXT: rotlwi 10, 10, 31
-; CHECK32_64-NEXT: rlwimi 10, 9, 31, 0, 0
-; CHECK32_64-NEXT: rlwimi 11, 7, 31, 0, 0
-; CHECK32_64-NEXT: stw 30, 56(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: rotlwi 30, 9, 31
-; CHECK32_64-NEXT: stw 27, 44(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: not 9, 12
-; CHECK32_64-NEXT: rlwimi 30, 8, 31, 0, 0
-; CHECK32_64-NEXT: andi. 8, 12, 127
-; CHECK32_64-NEXT: stw 22, 24(1) # 4-byte Folded Spill
+; CHECK32_64-NEXT: stwu 1, -16(1)
+; CHECK32_64-NEXT: lwz 11, 36(1)
+; CHECK32_64-NEXT: andi. 12, 11, 64
+; CHECK32_64-NEXT: stw 30, 8(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mcrf 1, 0
-; CHECK32_64-NEXT: subfic 12, 8, 96
-; CHECK32_64-NEXT: addi 0, 8, -64
-; CHECK32_64-NEXT: subfic 27, 8, 32
-; CHECK32_64-NEXT: stw 23, 28(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: andi. 9, 9, 127
-; CHECK32_64-NEXT: srw 12, 6, 12
-; CHECK32_64-NEXT: stw 25, 36(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: subfic 25, 8, 64
-; CHECK32_64-NEXT: slw 23, 5, 0
-; CHECK32_64-NEXT: stw 26, 40(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: addi 26, 8, -32
-; CHECK32_64-NEXT: srw 22, 4, 27
-; CHECK32_64-NEXT: srwi 7, 7, 1
-; CHECK32_64-NEXT: or 12, 23, 12
-; CHECK32_64-NEXT: stw 28, 48(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: slw 28, 3, 8
-; CHECK32_64-NEXT: srw 23, 6, 25
-; CHECK32_64-NEXT: stw 18, 8(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: subfic 18, 9, 32
-; CHECK32_64-NEXT: or 28, 28, 22
-; CHECK32_64-NEXT: srw 22, 5, 27
-; CHECK32_64-NEXT: srw 27, 6, 27
-; CHECK32_64-NEXT: stw 20, 16(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: srw 20, 5, 25
-; CHECK32_64-NEXT: subfic 25, 25, 32
-; CHECK32_64-NEXT: stw 21, 20(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: slw 21, 4, 26
-; CHECK32_64-NEXT: slw 26, 6, 26
-; CHECK32_64-NEXT: or 28, 28, 21
-; CHECK32_64-NEXT: slw 21, 7, 18
-; CHECK32_64-NEXT: stw 24, 32(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: slw 24, 5, 8
-; CHECK32_64-NEXT: slw 5, 5, 25
-; CHECK32_64-NEXT: stw 29, 52(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: addi 29, 8, -96
-; CHECK32_64-NEXT: subfic 25, 9, 96
-; CHECK32_64-NEXT: slw 29, 6, 29
-; CHECK32_64-NEXT: or 27, 24, 27
-; CHECK32_64-NEXT: stw 19, 12(1) # 4-byte Folded Spill
-; CHECK32_64-NEXT: srw 19, 11, 9
-; CHECK32_64-NEXT: addi 24, 9, -64
-; CHECK32_64-NEXT: or 12, 12, 29
-; CHECK32_64-NEXT: srw 29, 10, 9
-; CHECK32_64-NEXT: slw 25, 7, 25
-; CHECK32_64-NEXT: or 21, 19, 21
-; CHECK32_64-NEXT: srw 19, 11, 24
-; CHECK32_64-NEXT: or 5, 23, 5
-; CHECK32_64-NEXT: slw 23, 30, 18
-; CHECK32_64-NEXT: or 27, 27, 26
-; CHECK32_64-NEXT: addi 26, 9, -96
-; CHECK32_64-NEXT: or 25, 19, 25
-; CHECK32_64-NEXT: lwz 19, 12(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: or 29, 29, 23
-; CHECK32_64-NEXT: addi 23, 9, -32
-; CHECK32_64-NEXT: srw 26, 7, 26
-; CHECK32_64-NEXT: or 28, 28, 20
-; CHECK32_64-NEXT: srw 20, 7, 23
-; CHECK32_64-NEXT: or 26, 25, 26
-; CHECK32_64-NEXT: subfic 25, 9, 64
-; CHECK32_64-NEXT: srw 23, 30, 23
-; CHECK32_64-NEXT: or 29, 29, 23
-; CHECK32_64-NEXT: subfic 23, 25, 32
-; CHECK32_64-NEXT: or 5, 5, 22
-; CHECK32_64-NEXT: slw 22, 11, 18
-; CHECK32_64-NEXT: lwz 18, 8(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: srw 23, 11, 23
-; CHECK32_64-NEXT: slw 11, 11, 25
-; CHECK32_64-NEXT: slw 25, 7, 25
-; CHECK32_64-NEXT: cmplwi 5, 8, 64
-; CHECK32_64-NEXT: bc 12, 20, .LBB2_1
-; CHECK32_64-NEXT: b .LBB2_2
-; CHECK32_64-NEXT: .LBB2_1:
-; CHECK32_64-NEXT: addi 12, 28, 0
+; CHECK32_64-NEXT: clrlwi 12, 11, 27
+; CHECK32_64-NEXT: andi. 11, 11, 32
+; CHECK32_64-NEXT: bc 12, 6, .LBB2_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 4, 6, 0
+; CHECK32_64-NEXT: ori 30, 7, 0
+; CHECK32_64-NEXT: ori 3, 5, 0
+; CHECK32_64-NEXT: ori 7, 9, 0
+; CHECK32_64-NEXT: b .LBB2_3
; CHECK32_64-NEXT: .LBB2_2:
-; CHECK32_64-NEXT: lwz 28, 48(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: or 25, 25, 23
-; CHECK32_64-NEXT: bc 12, 6, .LBB2_4
-; CHECK32_64-NEXT: # %bb.3:
-; CHECK32_64-NEXT: ori 3, 12, 0
-; CHECK32_64-NEXT: b .LBB2_4
-; CHECK32_64-NEXT: .LBB2_4:
-; CHECK32_64-NEXT: slw 23, 6, 0
-; CHECK32_64-NEXT: slw 6, 6, 8
-; CHECK32_64-NEXT: slw 8, 4, 8
-; CHECK32_64-NEXT: cmplwi 6, 9, 64
-; CHECK32_64-NEXT: or 5, 8, 5
-; CHECK32_64-NEXT: bc 12, 20, .LBB2_6
-; CHECK32_64-NEXT: # %bb.5:
-; CHECK32_64-NEXT: ori 5, 23, 0
+; CHECK32_64-NEXT: addi 30, 5, 0
+; CHECK32_64-NEXT: .LBB2_3:
+; CHECK32_64-NEXT: bc 12, 2, .LBB2_5
+; CHECK32_64-NEXT: # %bb.4:
+; CHECK32_64-NEXT: ori 5, 30, 0
+; CHECK32_64-NEXT: ori 3, 4, 0
; CHECK32_64-NEXT: b .LBB2_6
+; CHECK32_64-NEXT: .LBB2_5:
+; CHECK32_64-NEXT: addi 5, 4, 0
; CHECK32_64-NEXT: .LBB2_6:
-; CHECK32_64-NEXT: lwz 23, 28(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: li 8, 0
-; CHECK32_64-NEXT: srw 24, 7, 24
; CHECK32_64-NEXT: bc 12, 6, .LBB2_8
; CHECK32_64-NEXT: # %bb.7:
-; CHECK32_64-NEXT: ori 4, 5, 0
-; CHECK32_64-NEXT: b .LBB2_8
+; CHECK32_64-NEXT: ori 4, 8, 0
+; CHECK32_64-NEXT: ori 8, 10, 0
+; CHECK32_64-NEXT: b .LBB2_9
; CHECK32_64-NEXT: .LBB2_8:
-; CHECK32_64-NEXT: bc 12, 20, .LBB2_10
-; CHECK32_64-NEXT: # %bb.9:
-; CHECK32_64-NEXT: ori 6, 8, 0
-; CHECK32_64-NEXT: b .LBB2_10
-; CHECK32_64-NEXT: .LBB2_10:
-; CHECK32_64-NEXT: srw 7, 7, 9
-; CHECK32_64-NEXT: srw 9, 30, 9
-; CHECK32_64-NEXT: bc 12, 24, .LBB2_12
-; CHECK32_64-NEXT: # %bb.11:
+; CHECK32_64-NEXT: addi 4, 6, 0
+; CHECK32_64-NEXT: .LBB2_9:
+; CHECK32_64-NEXT: subfic 11, 12, 32
+; CHECK32_64-NEXT: bc 12, 2, .LBB2_11
+; CHECK32_64-NEXT: # %bb.10:
+; CHECK32_64-NEXT: ori 0, 4, 0
+; CHECK32_64-NEXT: ori 4, 7, 0
; CHECK32_64-NEXT: ori 7, 8, 0
; CHECK32_64-NEXT: b .LBB2_12
+; CHECK32_64-NEXT: .LBB2_11:
+; CHECK32_64-NEXT: addi 0, 30, 0
; CHECK32_64-NEXT: .LBB2_12:
-; CHECK32_64-NEXT: or 0, 25, 22
-; CHECK32_64-NEXT: or 11, 29, 11
-; CHECK32_64-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: bc 12, 24, .LBB2_14
-; CHECK32_64-NEXT: # %bb.13:
-; CHECK32_64-NEXT: ori 5, 26, 0
-; CHECK32_64-NEXT: b .LBB2_15
-; CHECK32_64-NEXT: .LBB2_14:
-; CHECK32_64-NEXT: addi 5, 11, 0
-; CHECK32_64-NEXT: .LBB2_15:
-; CHECK32_64-NEXT: or 9, 9, 0
-; CHECK32_64-NEXT: or 21, 21, 20
-; CHECK32_64-NEXT: bc 12, 2, .LBB2_16
-; CHECK32_64-NEXT: b .LBB2_17
-; CHECK32_64-NEXT: .LBB2_16:
-; CHECK32_64-NEXT: addi 5, 10, 0
-; CHECK32_64-NEXT: .LBB2_17:
-; CHECK32_64-NEXT: bc 12, 24, .LBB2_19
-; CHECK32_64-NEXT: # %bb.18:
-; CHECK32_64-NEXT: ori 0, 8, 0
-; CHECK32_64-NEXT: b .LBB2_20
-; CHECK32_64-NEXT: .LBB2_19:
-; CHECK32_64-NEXT: addi 0, 21, 0
-; CHECK32_64-NEXT: .LBB2_20:
-; CHECK32_64-NEXT: bc 12, 20, .LBB2_21
-; CHECK32_64-NEXT: b .LBB2_22
-; CHECK32_64-NEXT: .LBB2_21:
-; CHECK32_64-NEXT: addi 8, 27, 0
-; CHECK32_64-NEXT: .LBB2_22:
-; CHECK32_64-NEXT: lwz 27, 44(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: or 3, 3, 7
-; CHECK32_64-NEXT: bc 12, 24, .LBB2_24
-; CHECK32_64-NEXT: # %bb.23:
-; CHECK32_64-NEXT: ori 7, 24, 0
-; CHECK32_64-NEXT: b .LBB2_25
-; CHECK32_64-NEXT: .LBB2_24:
-; CHECK32_64-NEXT: addi 7, 9, 0
-; CHECK32_64-NEXT: .LBB2_25:
-; CHECK32_64-NEXT: or 4, 4, 0
-; CHECK32_64-NEXT: bc 12, 2, .LBB2_26
-; CHECK32_64-NEXT: b .LBB2_27
-; CHECK32_64-NEXT: .LBB2_26:
-; CHECK32_64-NEXT: addi 7, 30, 0
-; CHECK32_64-NEXT: .LBB2_27:
-; CHECK32_64-NEXT: or 6, 6, 5
-; CHECK32_64-NEXT: or 5, 8, 7
-; CHECK32_64-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: lwz 25, 36(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: lwz 24, 32(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: lwz 22, 24(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: lwz 21, 20(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: lwz 20, 16(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: addi 1, 1, 64
+; CHECK32_64-NEXT: srw 6, 5, 11
+; CHECK32_64-NEXT: lwz 30, 8(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT: slw 3, 3, 12
+; CHECK32_64-NEXT: srw 9, 0, 11
+; CHECK32_64-NEXT: slw 5, 5, 12
+; CHECK32_64-NEXT: srw 10, 4, 11
+; CHECK32_64-NEXT: slw 0, 0, 12
+; CHECK32_64-NEXT: srw 7, 7, 11
+; CHECK32_64-NEXT: slw 8, 4, 12
+; CHECK32_64-NEXT: or 3, 3, 6
+; CHECK32_64-NEXT: or 4, 5, 9
+; CHECK32_64-NEXT: or 5, 0, 10
+; CHECK32_64-NEXT: or 6, 8, 7
+; CHECK32_64-NEXT: addi 1, 1, 16
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: fshl_i128:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: clrlwi 8, 7, 25
-; CHECK64-NEXT: rotldi 5, 5, 63
-; CHECK64-NEXT: not 7, 7
-; CHECK64-NEXT: rldicl 9, 6, 63, 1
-; CHECK64-NEXT: subfic 10, 8, 64
-; CHECK64-NEXT: addi 11, 8, -64
-; CHECK64-NEXT: rldimi 5, 6, 63, 0
-; CHECK64-NEXT: clrlwi 6, 7, 25
-; CHECK64-NEXT: srd 7, 3, 10
-; CHECK64-NEXT: sld 10, 3, 11
-; CHECK64-NEXT: subfic 11, 6, 64
-; CHECK64-NEXT: addi 12, 6, -64
-; CHECK64-NEXT: sld 4, 4, 8
-; CHECK64-NEXT: srd 5, 5, 6
-; CHECK64-NEXT: sld 11, 9, 11
-; CHECK64-NEXT: or 4, 4, 7
-; CHECK64-NEXT: or 5, 5, 11
-; CHECK64-NEXT: srd 7, 9, 12
-; CHECK64-NEXT: or 4, 4, 10
-; CHECK64-NEXT: srd 6, 9, 6
-; CHECK64-NEXT: or 5, 5, 7
-; CHECK64-NEXT: sld 3, 3, 8
-; CHECK64-NEXT: or 4, 4, 6
-; CHECK64-NEXT: or 3, 3, 5
+; CHECK64-NEXT: andi. 8, 7, 64
+; CHECK64-NEXT: clrlwi 7, 7, 26
+; CHECK64-NEXT: iseleq 5, 6, 5
+; CHECK64-NEXT: subfic 8, 7, 64
+; CHECK64-NEXT: iseleq 6, 3, 6
+; CHECK64-NEXT: iseleq 3, 4, 3
+; CHECK64-NEXT: srd 4, 5, 8
+; CHECK64-NEXT: sld 5, 6, 7
+; CHECK64-NEXT: srd 6, 6, 8
+; CHECK64-NEXT: sld 7, 3, 7
+; CHECK64-NEXT: or 3, 5, 4
+; CHECK64-NEXT: or 4, 7, 6
; CHECK64-NEXT: blr
%f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
ret i128 %f
@@ -525,31 +255,29 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_32-NEXT: li 5, 0
; CHECK32_32-NEXT: li 6, 37
; CHECK32_32-NEXT: bl __umoddi3
-; CHECK32_32-NEXT: clrlwi 6, 4, 26
-; CHECK32_32-NEXT: srwi 3, 30, 6
-; CHECK32_32-NEXT: not 4, 4
-; CHECK32_32-NEXT: subfic 8, 6, 32
-; CHECK32_32-NEXT: slwi 5, 30, 26
-; CHECK32_32-NEXT: rlwimi 3, 29, 26, 1, 5
-; CHECK32_32-NEXT: slw 7, 27, 6
-; CHECK32_32-NEXT: clrlwi 4, 4, 26
-; CHECK32_32-NEXT: srw 8, 28, 8
-; CHECK32_32-NEXT: srw 9, 3, 4
-; CHECK32_32-NEXT: srw 5, 5, 4
-; CHECK32_32-NEXT: or 7, 7, 8
-; CHECK32_32-NEXT: subfic 8, 4, 32
-; CHECK32_32-NEXT: addi 4, 4, -32
-; CHECK32_32-NEXT: slw 8, 3, 8
-; CHECK32_32-NEXT: srw 4, 3, 4
-; CHECK32_32-NEXT: addi 3, 6, -32
-; CHECK32_32-NEXT: slw 3, 28, 3
-; CHECK32_32-NEXT: or 5, 5, 8
-; CHECK32_32-NEXT: or 3, 7, 3
-; CHECK32_32-NEXT: or 4, 5, 4
-; CHECK32_32-NEXT: slw 5, 28, 6
-; CHECK32_32-NEXT: or 3, 3, 9
-; CHECK32_32-NEXT: or 4, 5, 4
+; CHECK32_32-NEXT: rotlwi 3, 30, 27
+; CHECK32_32-NEXT: slwi 5, 30, 27
+; CHECK32_32-NEXT: andi. 6, 4, 32
+; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4
+; CHECK32_32-NEXT: clrlwi 4, 4, 27
+; CHECK32_32-NEXT: subfic 6, 4, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB3_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 7, 3, 0
+; CHECK32_32-NEXT: ori 8, 28, 0
+; CHECK32_32-NEXT: ori 3, 5, 0
+; CHECK32_32-NEXT: b .LBB3_3
+; CHECK32_32-NEXT: .LBB3_2:
+; CHECK32_32-NEXT: addi 7, 28, 0
+; CHECK32_32-NEXT: addi 8, 27, 0
+; CHECK32_32-NEXT: .LBB3_3:
; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT: srw 5, 7, 6
+; CHECK32_32-NEXT: slw 8, 8, 4
+; CHECK32_32-NEXT: srw 6, 3, 6
+; CHECK32_32-NEXT: slw 4, 7, 4
+; CHECK32_32-NEXT: or 3, 8, 5
+; CHECK32_32-NEXT: or 4, 4, 6
; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
@@ -582,35 +310,42 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_64-NEXT: mr 30, 6
; CHECK32_64-NEXT: li 6, 37
; CHECK32_64-NEXT: bl __umoddi3
-; CHECK32_64-NEXT: clrlwi 6, 4, 26
-; CHECK32_64-NEXT: not 4, 4
-; CHECK32_64-NEXT: subfic 8, 6, 32
-; CHECK32_64-NEXT: srwi 3, 30, 6
-; CHECK32_64-NEXT: slw 7, 27, 6
-; CHECK32_64-NEXT: clrlwi 4, 4, 26
+; CHECK32_64-NEXT: rotlwi 3, 30, 27
+; CHECK32_64-NEXT: andi. 5, 4, 32
+; CHECK32_64-NEXT: bc 12, 2, .LBB3_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 8, 28, 0
+; CHECK32_64-NEXT: b .LBB3_3
+; CHECK32_64-NEXT: .LBB3_2:
+; CHECK32_64-NEXT: addi 8, 27, 0
+; CHECK32_64-NEXT: .LBB3_3:
; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: srw 8, 28, 8
-; CHECK32_64-NEXT: rlwimi 3, 29, 26, 1, 5
-; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: slwi 5, 30, 26
-; CHECK32_64-NEXT: or 7, 7, 8
-; CHECK32_64-NEXT: subfic 8, 4, 32
+; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4
+; CHECK32_64-NEXT: clrlwi 4, 4, 27
+; CHECK32_64-NEXT: bc 12, 2, .LBB3_5
+; CHECK32_64-NEXT: # %bb.4:
+; CHECK32_64-NEXT: ori 7, 3, 0
+; CHECK32_64-NEXT: b .LBB3_6
+; CHECK32_64-NEXT: .LBB3_5:
+; CHECK32_64-NEXT: addi 7, 28, 0
+; CHECK32_64-NEXT: .LBB3_6:
+; CHECK32_64-NEXT: slwi 5, 30, 27
; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: addi 9, 6, -32
-; CHECK32_64-NEXT: srw 10, 3, 4
-; CHECK32_64-NEXT: srw 5, 5, 4
-; CHECK32_64-NEXT: addi 4, 4, -32
-; CHECK32_64-NEXT: slw 8, 3, 8
-; CHECK32_64-NEXT: slw 9, 28, 9
-; CHECK32_64-NEXT: srw 3, 3, 4
-; CHECK32_64-NEXT: or 4, 5, 8
-; CHECK32_64-NEXT: slw 6, 28, 6
-; CHECK32_64-NEXT: or 5, 7, 9
+; CHECK32_64-NEXT: bc 12, 2, .LBB3_8
+; CHECK32_64-NEXT: # %bb.7:
+; CHECK32_64-NEXT: ori 3, 5, 0
+; CHECK32_64-NEXT: b .LBB3_8
+; CHECK32_64-NEXT: .LBB3_8:
+; CHECK32_64-NEXT: subfic 6, 4, 32
+; CHECK32_64-NEXT: slw 8, 8, 4
+; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT: srw 9, 7, 6
+; CHECK32_64-NEXT: srw 5, 3, 6
+; CHECK32_64-NEXT: slw 4, 7, 4
+; CHECK32_64-NEXT: or 3, 8, 9
; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: or 4, 4, 3
-; CHECK32_64-NEXT: or 3, 5, 10
+; CHECK32_64-NEXT: or 4, 4, 5
; CHECK32_64-NEXT: lwz 0, 36(1)
-; CHECK32_64-NEXT: or 4, 6, 4
; CHECK32_64-NEXT: addi 1, 1, 32
; CHECK32_64-NEXT: mtlr 0
; CHECK32_64-NEXT: blr
@@ -737,58 +472,47 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
; CHECK32_32-LABEL: fshr_i64:
; CHECK32_32: # %bb.0:
-; CHECK32_32-NEXT: clrlwi 7, 8, 26
-; CHECK32_32-NEXT: slwi 9, 4, 1
-; CHECK32_32-NEXT: not 8, 8
-; CHECK32_32-NEXT: rotlwi 4, 4, 1
-; CHECK32_32-NEXT: subfic 10, 7, 32
-; CHECK32_32-NEXT: srw 6, 6, 7
-; CHECK32_32-NEXT: clrlwi 8, 8, 26
-; CHECK32_32-NEXT: rlwimi 4, 3, 1, 0, 30
-; CHECK32_32-NEXT: slw 3, 5, 10
-; CHECK32_32-NEXT: slw 10, 9, 8
-; CHECK32_32-NEXT: slw 4, 4, 8
-; CHECK32_32-NEXT: or 3, 6, 3
-; CHECK32_32-NEXT: subfic 6, 8, 32
-; CHECK32_32-NEXT: addi 8, 8, -32
-; CHECK32_32-NEXT: srw 6, 9, 6
-; CHECK32_32-NEXT: slw 8, 9, 8
-; CHECK32_32-NEXT: addi 9, 7, -32
-; CHECK32_32-NEXT: srw 9, 5, 9
-; CHECK32_32-NEXT: or 3, 3, 9
-; CHECK32_32-NEXT: or 6, 4, 6
-; CHECK32_32-NEXT: or 4, 10, 3
-; CHECK32_32-NEXT: or 3, 6, 8
-; CHECK32_32-NEXT: srw 5, 5, 7
+; CHECK32_32-NEXT: andi. 7, 8, 32
+; CHECK32_32-NEXT: clrlwi 7, 8, 27
+; CHECK32_32-NEXT: subfic 8, 7, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB10_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 9, 4, 0
+; CHECK32_32-NEXT: ori 4, 5, 0
+; CHECK32_32-NEXT: b .LBB10_3
+; CHECK32_32-NEXT: .LBB10_2:
+; CHECK32_32-NEXT: addi 9, 5, 0
+; CHECK32_32-NEXT: addi 3, 4, 0
+; CHECK32_32-NEXT: addi 4, 6, 0
+; CHECK32_32-NEXT: .LBB10_3:
+; CHECK32_32-NEXT: srw 5, 9, 7
+; CHECK32_32-NEXT: slw 3, 3, 8
+; CHECK32_32-NEXT: srw 4, 4, 7
+; CHECK32_32-NEXT: slw 6, 9, 8
; CHECK32_32-NEXT: or 3, 3, 5
+; CHECK32_32-NEXT: or 4, 6, 4
; CHECK32_32-NEXT: blr
;
; CHECK32_64-LABEL: fshr_i64:
; CHECK32_64: # %bb.0:
-; CHECK32_64-NEXT: rotlwi 7, 4, 1
-; CHECK32_64-NEXT: slwi 4, 4, 1
-; CHECK32_64-NEXT: rlwimi 7, 3, 1, 0, 30
-; CHECK32_64-NEXT: clrlwi 3, 8, 26
-; CHECK32_64-NEXT: not 8, 8
-; CHECK32_64-NEXT: subfic 9, 3, 32
-; CHECK32_64-NEXT: srw 6, 6, 3
-; CHECK32_64-NEXT: clrlwi 8, 8, 26
-; CHECK32_64-NEXT: slw 9, 5, 9
-; CHECK32_64-NEXT: addi 10, 3, -32
-; CHECK32_64-NEXT: or 6, 6, 9
-; CHECK32_64-NEXT: subfic 9, 8, 32
-; CHECK32_64-NEXT: srw 3, 5, 3
-; CHECK32_64-NEXT: srw 5, 5, 10
-; CHECK32_64-NEXT: slw 10, 4, 8
-; CHECK32_64-NEXT: slw 7, 7, 8
-; CHECK32_64-NEXT: addi 8, 8, -32
-; CHECK32_64-NEXT: srw 9, 4, 9
-; CHECK32_64-NEXT: slw 4, 4, 8
-; CHECK32_64-NEXT: or 7, 7, 9
-; CHECK32_64-NEXT: or 5, 6, 5
-; CHECK32_64-NEXT: or 6, 7, 4
-; CHECK32_64-NEXT: or 4, 10, 5
-; CHECK32_64-NEXT: or 3, 6, 3
+; CHECK32_64-NEXT: andi. 7, 8, 32
+; CHECK32_64-NEXT: clrlwi 7, 8, 27
+; CHECK32_64-NEXT: bc 12, 2, .LBB10_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 9, 4, 0
+; CHECK32_64-NEXT: b .LBB10_3
+; CHECK32_64-NEXT: .LBB10_2:
+; CHECK32_64-NEXT: addi 9, 5, 0
+; CHECK32_64-NEXT: addi 3, 4, 0
+; CHECK32_64-NEXT: addi 5, 6, 0
+; CHECK32_64-NEXT: .LBB10_3:
+; CHECK32_64-NEXT: subfic 8, 7, 32
+; CHECK32_64-NEXT: srw 4, 9, 7
+; CHECK32_64-NEXT: slw 3, 3, 8
+; CHECK32_64-NEXT: srw 5, 5, 7
+; CHECK32_64-NEXT: slw 6, 9, 8
+; CHECK32_64-NEXT: or 3, 3, 4
+; CHECK32_64-NEXT: or 4, 6, 5
; CHECK32_64-NEXT: blr
;
; CHECK64-LABEL: fshr_i64:
@@ -830,35 +554,30 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_32-NEXT: li 5, 0
; CHECK32_32-NEXT: li 6, 37
; CHECK32_32-NEXT: bl __umoddi3
+; CHECK32_32-NEXT: rotlwi 3, 30, 27
; CHECK32_32-NEXT: addi 4, 4, 27
-; CHECK32_32-NEXT: rotlwi 5, 30, 27
-; CHECK32_32-NEXT: clrlwi 8, 4, 26
-; CHECK32_32-NEXT: slwi 3, 30, 27
-; CHECK32_32-NEXT: rotlwi 7, 28, 1
-; CHECK32_32-NEXT: rlwimi 5, 29, 27, 0, 4
-; CHECK32_32-NEXT: not 4, 4
-; CHECK32_32-NEXT: subfic 9, 8, 32
-; CHECK32_32-NEXT: slwi 6, 28, 1
-; CHECK32_32-NEXT: rlwimi 7, 27, 1, 0, 30
-; CHECK32_32-NEXT: srw 3, 3, 8
-; CHECK32_32-NEXT: clrlwi 4, 4, 26
-; CHECK32_32-NEXT: slw 9, 5, 9
-; CHECK32_32-NEXT: slw 10, 6, 4
-; CHECK32_32-NEXT: slw 7, 7, 4
-; CHECK32_32-NEXT: or 3, 3, 9
-; CHECK32_32-NEXT: subfic 9, 4, 32
-; CHECK32_32-NEXT: addi 4, 4, -32
-; CHECK32_32-NEXT: srw 9, 6, 9
-; CHECK32_32-NEXT: slw 6, 6, 4
-; CHECK32_32-NEXT: addi 4, 8, -32
-; CHECK32_32-NEXT: srw 4, 5, 4
-; CHECK32_32-NEXT: or 3, 3, 4
-; CHECK32_32-NEXT: or 7, 7, 9
-; CHECK32_32-NEXT: or 4, 10, 3
-; CHECK32_32-NEXT: or 3, 7, 6
-; CHECK32_32-NEXT: srw 5, 5, 8
-; CHECK32_32-NEXT: or 3, 3, 5
+; CHECK32_32-NEXT: slwi 5, 30, 27
+; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4
+; CHECK32_32-NEXT: andi. 6, 4, 32
+; CHECK32_32-NEXT: clrlwi 4, 4, 27
+; CHECK32_32-NEXT: subfic 6, 4, 32
+; CHECK32_32-NEXT: bc 12, 2, .LBB11_2
+; CHECK32_32-NEXT: # %bb.1:
+; CHECK32_32-NEXT: ori 7, 28, 0
+; CHECK32_32-NEXT: ori 8, 27, 0
+; CHECK32_32-NEXT: b .LBB11_3
+; CHECK32_32-NEXT: .LBB11_2:
+; CHECK32_32-NEXT: addi 7, 3, 0
+; CHECK32_32-NEXT: addi 8, 28, 0
+; CHECK32_32-NEXT: addi 3, 5, 0
+; CHECK32_32-NEXT: .LBB11_3:
; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
+; CHECK32_32-NEXT: srw 5, 7, 4
+; CHECK32_32-NEXT: slw 8, 8, 6
+; CHECK32_32-NEXT: srw 4, 3, 4
+; CHECK32_32-NEXT: slw 6, 7, 6
+; CHECK32_32-NEXT: or 3, 8, 5
+; CHECK32_32-NEXT: or 4, 6, 4
; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
@@ -893,37 +612,36 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_64-NEXT: bl __umoddi3
; CHECK32_64-NEXT: addi 4, 4, 27
; CHECK32_64-NEXT: rotlwi 3, 30, 27
-; CHECK32_64-NEXT: clrlwi 8, 4, 26
+; CHECK32_64-NEXT: andi. 5, 4, 32
; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4
; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: slwi 6, 30, 27
-; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: not 4, 4
-; CHECK32_64-NEXT: subfic 9, 8, 32
-; CHECK32_64-NEXT: rotlwi 5, 28, 1
-; CHECK32_64-NEXT: srw 6, 6, 8
-; CHECK32_64-NEXT: clrlwi 4, 4, 26
-; CHECK32_64-NEXT: slw 9, 3, 9
-; CHECK32_64-NEXT: rlwimi 5, 27, 1, 0, 30
-; CHECK32_64-NEXT: slwi 7, 28, 1
+; CHECK32_64-NEXT: bc 12, 2, .LBB11_2
+; CHECK32_64-NEXT: # %bb.1:
+; CHECK32_64-NEXT: ori 7, 28, 0
+; CHECK32_64-NEXT: ori 8, 27, 0
+; CHECK32_64-NEXT: b .LBB11_3
+; CHECK32_64-NEXT: .LBB11_2:
+; CHECK32_64-NEXT: addi 7, 3, 0
+; CHECK32_64-NEXT: addi 8, 28, 0
+; CHECK32_64-NEXT: .LBB11_3:
+; CHECK32_64-NEXT: clrlwi 4, 4, 27
; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: addi 10, 8, -32
+; CHECK32_64-NEXT: slwi 5, 30, 27
+; CHECK32_64-NEXT: subfic 6, 4, 32
+; CHECK32_64-NEXT: bc 12, 2, .LBB11_4
+; CHECK32_64-NEXT: b .LBB11_5
+; CHECK32_64-NEXT: .LBB11_4:
+; CHECK32_64-NEXT: addi 3, 5, 0
+; CHECK32_64-NEXT: .LBB11_5:
+; CHECK32_64-NEXT: srw 9, 7, 4
+; CHECK32_64-NEXT: slw 8, 8, 6
+; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
+; CHECK32_64-NEXT: srw 4, 3, 4
+; CHECK32_64-NEXT: slw 5, 7, 6
; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
-; CHECK32_64-NEXT: or 6, 6, 9
-; CHECK32_64-NEXT: subfic 9, 4, 32
-; CHECK32_64-NEXT: srw 8, 3, 8
-; CHECK32_64-NEXT: srw 3, 3, 10
+; CHECK32_64-NEXT: or 3, 8, 9
+; CHECK32_64-NEXT: or 4, 5, 4
; CHECK32_64-NEXT: lwz 0, 36(1)
-; CHECK32_64-NEXT: slw 10, 7, 4
-; CHECK32_64-NEXT: slw 5, 5, 4
-; CHECK32_64-NEXT: addi 4, 4, -32
-; CHECK32_64-NEXT: srw 9, 7, 9
-; CHECK32_64-NEXT: slw 4, 7, 4
-; CHECK32_64-NEXT: or 5, 5, 9
-; CHECK32_64-NEXT: or 3, 6, 3
-; CHECK32_64-NEXT: or 5, 5, 4
-; CHECK32_64-NEXT: or 4, 10, 3
-; CHECK32_64-NEXT: or 3, 5, 8
; CHECK32_64-NEXT: addi 1, 1, 32
; CHECK32_64-NEXT: mtlr 0
; CHECK32_64-NEXT: blr
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
index 4307ea01be5a8..f0cb94814613e 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
@@ -176,124 +176,76 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: rol_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv a7, a1
-; RV32I-NEXT: andi a1, a2, 63
-; RV32I-NEXT: addi t0, a1, -32
-; RV32I-NEXT: addi a6, zero, 31
-; RV32I-NEXT: bltz t0, .LBB7_2
+; RV32I-NEXT: srli a3, a2, 5
+; RV32I-NEXT: andi a3, a3, 1
+; RV32I-NEXT: mv a4, a1
+; RV32I-NEXT: bnez a3, .LBB7_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll a1, a0, t0
-; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: .LBB7_2:
-; RV32I-NEXT: sll a3, a7, a2
-; RV32I-NEXT: sub a1, a6, a1
-; RV32I-NEXT: srli a4, a0, 1
-; RV32I-NEXT: srl a1, a4, a1
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: .LBB7_3:
-; RV32I-NEXT: neg a5, a2
-; RV32I-NEXT: andi a3, a5, 63
-; RV32I-NEXT: addi a4, a3, -32
-; RV32I-NEXT: bltz a4, .LBB7_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a3, a7, a4
-; RV32I-NEXT: bltz t0, .LBB7_6
-; RV32I-NEXT: j .LBB7_7
-; RV32I-NEXT: .LBB7_5:
-; RV32I-NEXT: srl a4, a7, a5
-; RV32I-NEXT: or a1, a1, a4
-; RV32I-NEXT: srl a4, a0, a5
-; RV32I-NEXT: sub a3, a6, a3
-; RV32I-NEXT: slli a5, a7, 1
-; RV32I-NEXT: sll a3, a5, a3
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: bgez t0, .LBB7_7
-; RV32I-NEXT: .LBB7_6:
+; RV32I-NEXT: sll a6, a4, a2
+; RV32I-NEXT: bnez a3, .LBB7_4
+; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: .LBB7_4:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: not a5, a2
+; RV32I-NEXT: srl a1, a1, a5
+; RV32I-NEXT: or a3, a6, a1
; RV32I-NEXT: sll a0, a0, a2
-; RV32I-NEXT: or a3, a3, a0
-; RV32I-NEXT: .LBB7_7:
+; RV32I-NEXT: srli a1, a4, 1
+; RV32I-NEXT: srl a1, a1, a5
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: rol_i64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: mv a7, a1
-; RV32ZBB-NEXT: andi a1, a2, 63
-; RV32ZBB-NEXT: addi t0, a1, -32
-; RV32ZBB-NEXT: addi a6, zero, 31
-; RV32ZBB-NEXT: bltz t0, .LBB7_2
+; RV32ZBB-NEXT: srli a3, a2, 5
+; RV32ZBB-NEXT: andi a3, a3, 1
+; RV32ZBB-NEXT: mv a4, a1
+; RV32ZBB-NEXT: bnez a3, .LBB7_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sll a1, a0, t0
-; RV32ZBB-NEXT: j .LBB7_3
+; RV32ZBB-NEXT: mv a4, a0
; RV32ZBB-NEXT: .LBB7_2:
-; RV32ZBB-NEXT: sll a3, a7, a2
-; RV32ZBB-NEXT: sub a1, a6, a1
-; RV32ZBB-NEXT: srli a4, a0, 1
-; RV32ZBB-NEXT: srl a1, a4, a1
-; RV32ZBB-NEXT: or a1, a3, a1
-; RV32ZBB-NEXT: .LBB7_3:
-; RV32ZBB-NEXT: neg a5, a2
-; RV32ZBB-NEXT: andi a3, a5, 63
-; RV32ZBB-NEXT: addi a4, a3, -32
-; RV32ZBB-NEXT: bltz a4, .LBB7_5
-; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: srl a3, a7, a4
-; RV32ZBB-NEXT: bltz t0, .LBB7_6
-; RV32ZBB-NEXT: j .LBB7_7
-; RV32ZBB-NEXT: .LBB7_5:
-; RV32ZBB-NEXT: srl a4, a7, a5
-; RV32ZBB-NEXT: or a1, a1, a4
-; RV32ZBB-NEXT: srl a4, a0, a5
-; RV32ZBB-NEXT: sub a3, a6, a3
-; RV32ZBB-NEXT: slli a5, a7, 1
-; RV32ZBB-NEXT: sll a3, a5, a3
-; RV32ZBB-NEXT: or a3, a4, a3
-; RV32ZBB-NEXT: bgez t0, .LBB7_7
-; RV32ZBB-NEXT: .LBB7_6:
+; RV32ZBB-NEXT: sll a6, a4, a2
+; RV32ZBB-NEXT: bnez a3, .LBB7_4
+; RV32ZBB-NEXT: # %bb.3:
+; RV32ZBB-NEXT: mv a0, a1
+; RV32ZBB-NEXT: .LBB7_4:
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: not a5, a2
+; RV32ZBB-NEXT: srl a1, a1, a5
+; RV32ZBB-NEXT: or a3, a6, a1
; RV32ZBB-NEXT: sll a0, a0, a2
-; RV32ZBB-NEXT: or a3, a3, a0
-; RV32ZBB-NEXT: .LBB7_7:
+; RV32ZBB-NEXT: srli a1, a4, 1
+; RV32ZBB-NEXT: srl a1, a1, a5
+; RV32ZBB-NEXT: or a1, a0, a1
; RV32ZBB-NEXT: mv a0, a3
; RV32ZBB-NEXT: ret
;
; RV32ZBP-LABEL: rol_i64:
; RV32ZBP: # %bb.0:
-; RV32ZBP-NEXT: mv a7, a1
-; RV32ZBP-NEXT: andi a1, a2, 63
-; RV32ZBP-NEXT: addi t0, a1, -32
-; RV32ZBP-NEXT: addi a6, zero, 31
-; RV32ZBP-NEXT: bltz t0, .LBB7_2
+; RV32ZBP-NEXT: srli a3, a2, 5
+; RV32ZBP-NEXT: andi a3, a3, 1
+; RV32ZBP-NEXT: mv a4, a1
+; RV32ZBP-NEXT: bnez a3, .LBB7_2
; RV32ZBP-NEXT: # %bb.1:
-; RV32ZBP-NEXT: sll a1, a0, t0
-; RV32ZBP-NEXT: j .LBB7_3
+; RV32ZBP-NEXT: mv a4, a0
; RV32ZBP-NEXT: .LBB7_2:
-; RV32ZBP-NEXT: sll a3, a7, a2
-; RV32ZBP-NEXT: sub a1, a6, a1
-; RV32ZBP-NEXT: srli a4, a0, 1
-; RV32ZBP-NEXT: srl a1, a4, a1
-; RV32ZBP-NEXT: or a1, a3, a1
-; RV32ZBP-NEXT: .LBB7_3:
-; RV32ZBP-NEXT: neg a5, a2
-; RV32ZBP-NEXT: andi a3, a5, 63
-; RV32ZBP-NEXT: addi a4, a3, -32
-; RV32ZBP-NEXT: bltz a4, .LBB7_5
-; RV32ZBP-NEXT: # %bb.4:
-; RV32ZBP-NEXT: srl a3, a7, a4
-; RV32ZBP-NEXT: bltz t0, .LBB7_6
-; RV32ZBP-NEXT: j .LBB7_7
-; RV32ZBP-NEXT: .LBB7_5:
-; RV32ZBP-NEXT: srl a4, a7, a5
-; RV32ZBP-NEXT: or a1, a1, a4
-; RV32ZBP-NEXT: srl a4, a0, a5
-; RV32ZBP-NEXT: sub a3, a6, a3
-; RV32ZBP-NEXT: slli a5, a7, 1
-; RV32ZBP-NEXT: sll a3, a5, a3
-; RV32ZBP-NEXT: or a3, a4, a3
-; RV32ZBP-NEXT: bgez t0, .LBB7_7
-; RV32ZBP-NEXT: .LBB7_6:
+; RV32ZBP-NEXT: sll a6, a4, a2
+; RV32ZBP-NEXT: bnez a3, .LBB7_4
+; RV32ZBP-NEXT: # %bb.3:
+; RV32ZBP-NEXT: mv a0, a1
+; RV32ZBP-NEXT: .LBB7_4:
+; RV32ZBP-NEXT: srli a1, a0, 1
+; RV32ZBP-NEXT: not a5, a2
+; RV32ZBP-NEXT: srl a1, a1, a5
+; RV32ZBP-NEXT: or a3, a6, a1
; RV32ZBP-NEXT: sll a0, a0, a2
-; RV32ZBP-NEXT: or a3, a3, a0
-; RV32ZBP-NEXT: .LBB7_7:
+; RV32ZBP-NEXT: srli a1, a4, 1
+; RV32ZBP-NEXT: srl a1, a1, a5
+; RV32ZBP-NEXT: or a1, a0, a1
; RV32ZBP-NEXT: mv a0, a3
; RV32ZBP-NEXT: ret
%or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
@@ -332,125 +284,71 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: ror_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv t0, a0
-; RV32I-NEXT: andi a0, a2, 63
-; RV32I-NEXT: addi a7, a0, -32
-; RV32I-NEXT: addi a6, zero, 31
-; RV32I-NEXT: bltz a7, .LBB9_2
+; RV32I-NEXT: andi a4, a2, 32
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: beqz a4, .LBB9_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srl a0, a1, a7
-; RV32I-NEXT: j .LBB9_3
+; RV32I-NEXT: mv a3, a1
; RV32I-NEXT: .LBB9_2:
-; RV32I-NEXT: srl a3, t0, a2
-; RV32I-NEXT: sub a0, a6, a0
-; RV32I-NEXT: slli a4, a1, 1
-; RV32I-NEXT: sll a0, a4, a0
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: .LBB9_3:
-; RV32I-NEXT: neg a5, a2
-; RV32I-NEXT: andi a4, a5, 63
-; RV32I-NEXT: addi a3, a4, -32
-; RV32I-NEXT: bltz a3, .LBB9_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: sll a3, t0, a3
-; RV32I-NEXT: bltz a7, .LBB9_6
-; RV32I-NEXT: j .LBB9_7
-; RV32I-NEXT: .LBB9_5:
-; RV32I-NEXT: sll a3, t0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: sll a3, a1, a5
-; RV32I-NEXT: sub a4, a6, a4
-; RV32I-NEXT: srli a5, t0, 1
-; RV32I-NEXT: srl a4, a5, a4
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: bgez a7, .LBB9_7
-; RV32I-NEXT: .LBB9_6:
+; RV32I-NEXT: srl a5, a3, a2
+; RV32I-NEXT: beqz a4, .LBB9_4
+; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB9_4:
+; RV32I-NEXT: slli a0, a1, 1
+; RV32I-NEXT: not a4, a2
+; RV32I-NEXT: sll a0, a0, a4
+; RV32I-NEXT: or a0, a0, a5
; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: or a3, a3, a1
-; RV32I-NEXT: .LBB9_7:
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: slli a2, a3, 1
+; RV32I-NEXT: sll a2, a2, a4
+; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ror_i64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: mv t0, a0
-; RV32ZBB-NEXT: andi a0, a2, 63
-; RV32ZBB-NEXT: addi a7, a0, -32
-; RV32ZBB-NEXT: addi a6, zero, 31
-; RV32ZBB-NEXT: bltz a7, .LBB9_2
+; RV32ZBB-NEXT: andi a4, a2, 32
+; RV32ZBB-NEXT: mv a3, a0
+; RV32ZBB-NEXT: beqz a4, .LBB9_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: srl a0, a1, a7
-; RV32ZBB-NEXT: j .LBB9_3
+; RV32ZBB-NEXT: mv a3, a1
; RV32ZBB-NEXT: .LBB9_2:
-; RV32ZBB-NEXT: srl a3, t0, a2
-; RV32ZBB-NEXT: sub a0, a6, a0
-; RV32ZBB-NEXT: slli a4, a1, 1
-; RV32ZBB-NEXT: sll a0, a4, a0
-; RV32ZBB-NEXT: or a0, a3, a0
-; RV32ZBB-NEXT: .LBB9_3:
-; RV32ZBB-NEXT: neg a5, a2
-; RV32ZBB-NEXT: andi a4, a5, 63
-; RV32ZBB-NEXT: addi a3, a4, -32
-; RV32ZBB-NEXT: bltz a3, .LBB9_5
-; RV32ZBB-NEXT: # %bb.4:
-; RV32ZBB-NEXT: sll a3, t0, a3
-; RV32ZBB-NEXT: bltz a7, .LBB9_6
-; RV32ZBB-NEXT: j .LBB9_7
-; RV32ZBB-NEXT: .LBB9_5:
-; RV32ZBB-NEXT: sll a3, t0, a5
-; RV32ZBB-NEXT: or a0, a0, a3
-; RV32ZBB-NEXT: sll a3, a1, a5
-; RV32ZBB-NEXT: sub a4, a6, a4
-; RV32ZBB-NEXT: srli a5, t0, 1
-; RV32ZBB-NEXT: srl a4, a5, a4
-; RV32ZBB-NEXT: or a3, a3, a4
-; RV32ZBB-NEXT: bgez a7, .LBB9_7
-; RV32ZBB-NEXT: .LBB9_6:
+; RV32ZBB-NEXT: srl a5, a3, a2
+; RV32ZBB-NEXT: beqz a4, .LBB9_4
+; RV32ZBB-NEXT: # %bb.3:
+; RV32ZBB-NEXT: mv a1, a0
+; RV32ZBB-NEXT: .LBB9_4:
+; RV32ZBB-NEXT: slli a0, a1, 1
+; RV32ZBB-NEXT: not a4, a2
+; RV32ZBB-NEXT: sll a0, a0, a4
+; RV32ZBB-NEXT: or a0, a0, a5
; RV32ZBB-NEXT: srl a1, a1, a2
-; RV32ZBB-NEXT: or a3, a3, a1
-; RV32ZBB-NEXT: .LBB9_7:
-; RV32ZBB-NEXT: mv a1, a3
+; RV32ZBB-NEXT: slli a2, a3, 1
+; RV32ZBB-NEXT: sll a2, a2, a4
+; RV32ZBB-NEXT: or a1, a2, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBP-LABEL: ror_i64:
; RV32ZBP: # %bb.0:
-; RV32ZBP-NEXT: mv t0, a0
-; RV32ZBP-NEXT: andi a0, a2, 63
-; RV32ZBP-NEXT: addi a7, a0, -32
-; RV32ZBP-NEXT: addi a6, zero, 31
-; RV32ZBP-NEXT: bltz a7, .LBB9_2
+; RV32ZBP-NEXT: andi a4, a2, 32
+; RV32ZBP-NEXT: mv a3, a0
+; RV32ZBP-NEXT: beqz a4, .LBB9_2
; RV32ZBP-NEXT: # %bb.1:
-; RV32ZBP-NEXT: srl a0, a1, a7
-; RV32ZBP-NEXT: j .LBB9_3
+; RV32ZBP-NEXT: mv a3, a1
; RV32ZBP-NEXT: .LBB9_2:
-; RV32ZBP-NEXT: srl a3, t0, a2
-; RV32ZBP-NEXT: sub a0, a6, a0
-; RV32ZBP-NEXT: slli a4, a1, 1
-; RV32ZBP-NEXT: sll a0, a4, a0
-; RV32ZBP-NEXT: or a0, a3, a0
-; RV32ZBP-NEXT: .LBB9_3:
-; RV32ZBP-NEXT: neg a5, a2
-; RV32ZBP-NEXT: andi a4, a5, 63
-; RV32ZBP-NEXT: addi a3, a4, -32
-; RV32ZBP-NEXT: bltz a3, .LBB9_5
-; RV32ZBP-NEXT: # %bb.4:
-; RV32ZBP-NEXT: sll a3, t0, a3
-; RV32ZBP-NEXT: bltz a7, .LBB9_6
-; RV32ZBP-NEXT: j .LBB9_7
-; RV32ZBP-NEXT: .LBB9_5:
-; RV32ZBP-NEXT: sll a3, t0, a5
-; RV32ZBP-NEXT: or a0, a0, a3
-; RV32ZBP-NEXT: sll a3, a1, a5
-; RV32ZBP-NEXT: sub a4, a6, a4
-; RV32ZBP-NEXT: srli a5, t0, 1
-; RV32ZBP-NEXT: srl a4, a5, a4
-; RV32ZBP-NEXT: or a3, a3, a4
-; RV32ZBP-NEXT: bgez a7, .LBB9_7
-; RV32ZBP-NEXT: .LBB9_6:
+; RV32ZBP-NEXT: srl a5, a3, a2
+; RV32ZBP-NEXT: beqz a4, .LBB9_4
+; RV32ZBP-NEXT: # %bb.3:
+; RV32ZBP-NEXT: mv a1, a0
+; RV32ZBP-NEXT: .LBB9_4:
+; RV32ZBP-NEXT: slli a0, a1, 1
+; RV32ZBP-NEXT: not a4, a2
+; RV32ZBP-NEXT: sll a0, a0, a4
+; RV32ZBP-NEXT: or a0, a0, a5
; RV32ZBP-NEXT: srl a1, a1, a2
-; RV32ZBP-NEXT: or a3, a3, a1
-; RV32ZBP-NEXT: .LBB9_7:
-; RV32ZBP-NEXT: mv a1, a3
+; RV32ZBP-NEXT: slli a2, a3, 1
+; RV32ZBP-NEXT: sll a2, a2, a4
+; RV32ZBP-NEXT: or a1, a2, a1
; RV32ZBP-NEXT: ret
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
ret i64 %or
@@ -501,8 +399,8 @@ define i32 @rori_i32_fshr(i32 %a) nounwind {
define i64 @rori_i64(i64 %a) nounwind {
; RV32I-LABEL: rori_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a2, a1, 31
-; RV32I-NEXT: srli a3, a0, 1
+; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: slli a3, a1, 31
; RV32I-NEXT: or a2, a3, a2
; RV32I-NEXT: srli a1, a1, 1
; RV32I-NEXT: slli a0, a0, 31
@@ -512,8 +410,8 @@ define i64 @rori_i64(i64 %a) nounwind {
;
; RV32ZBB-LABEL: rori_i64:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: slli a2, a1, 31
-; RV32ZBB-NEXT: srli a3, a0, 1
+; RV32ZBB-NEXT: srli a2, a0, 1
+; RV32ZBB-NEXT: slli a3, a1, 31
; RV32ZBB-NEXT: or a2, a3, a2
; RV32ZBB-NEXT: srli a1, a1, 1
; RV32ZBB-NEXT: slli a0, a0, 31
@@ -523,8 +421,8 @@ define i64 @rori_i64(i64 %a) nounwind {
;
; RV32ZBP-LABEL: rori_i64:
; RV32ZBP: # %bb.0:
-; RV32ZBP-NEXT: slli a2, a1, 31
-; RV32ZBP-NEXT: srli a3, a0, 1
+; RV32ZBP-NEXT: srli a2, a0, 1
+; RV32ZBP-NEXT: slli a3, a1, 31
; RV32ZBP-NEXT: or a2, a3, a2
; RV32ZBP-NEXT: srli a1, a1, 1
; RV32ZBP-NEXT: slli a0, a0, 31
@@ -538,8 +436,8 @@ define i64 @rori_i64(i64 %a) nounwind {
define i64 @rori_i64_fshr(i64 %a) nounwind {
; RV32I-LABEL: rori_i64_fshr:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a2, a0, 1
-; RV32I-NEXT: srli a3, a1, 31
+; RV32I-NEXT: srli a2, a1, 31
+; RV32I-NEXT: slli a3, a0, 1
; RV32I-NEXT: or a2, a3, a2
; RV32I-NEXT: srli a0, a0, 31
; RV32I-NEXT: slli a1, a1, 1
@@ -549,8 +447,8 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
;
; RV32ZBB-LABEL: rori_i64_fshr:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: slli a2, a0, 1
-; RV32ZBB-NEXT: srli a3, a1, 31
+; RV32ZBB-NEXT: srli a2, a1, 31
+; RV32ZBB-NEXT: slli a3, a0, 1
; RV32ZBB-NEXT: or a2, a3, a2
; RV32ZBB-NEXT: srli a0, a0, 31
; RV32ZBB-NEXT: slli a1, a1, 1
@@ -560,8 +458,8 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
;
; RV32ZBP-LABEL: rori_i64_fshr:
; RV32ZBP: # %bb.0:
-; RV32ZBP-NEXT: slli a2, a0, 1
-; RV32ZBP-NEXT: srli a3, a1, 31
+; RV32ZBP-NEXT: srli a2, a1, 31
+; RV32ZBP-NEXT: slli a3, a0, 1
; RV32ZBP-NEXT: or a2, a3, a2
; RV32ZBP-NEXT: srli a0, a0, 31
; RV32ZBP-NEXT: slli a1, a1, 1
diff --git a/llvm/test/CodeGen/RISCV/rv32zbt.ll b/llvm/test/CodeGen/RISCV/rv32zbt.ll
index 6a298c423ad69..9cb081c1c70a7 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbt.ll
@@ -340,82 +340,44 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
; RV32I-LABEL: fshl_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a5, a4, 63
-; RV32I-NEXT: addi a7, a5, -32
-; RV32I-NEXT: addi a6, zero, 31
-; RV32I-NEXT: bltz a7, .LBB13_2
+; RV32I-NEXT: srli a5, a4, 5
+; RV32I-NEXT: andi a5, a5, 1
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: bnez a5, .LBB13_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sll a1, a0, a7
-; RV32I-NEXT: j .LBB13_3
+; RV32I-NEXT: mv a6, a0
; RV32I-NEXT: .LBB13_2:
-; RV32I-NEXT: sll t0, a1, a4
-; RV32I-NEXT: sub a5, a6, a5
-; RV32I-NEXT: srli a1, a0, 1
-; RV32I-NEXT: srl a1, a1, a5
-; RV32I-NEXT: or a1, t0, a1
-; RV32I-NEXT: .LBB13_3:
-; RV32I-NEXT: not t2, a4
-; RV32I-NEXT: andi t1, t2, 63
-; RV32I-NEXT: addi a5, t1, -32
-; RV32I-NEXT: srli t0, a3, 1
-; RV32I-NEXT: bltz a5, .LBB13_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: srl a2, t0, a5
-; RV32I-NEXT: bltz a7, .LBB13_6
-; RV32I-NEXT: j .LBB13_7
-; RV32I-NEXT: .LBB13_5:
-; RV32I-NEXT: srl a5, t0, t2
-; RV32I-NEXT: or a1, a1, a5
-; RV32I-NEXT: slli a3, a3, 31
+; RV32I-NEXT: sll a7, a6, a4
+; RV32I-NEXT: bnez a5, .LBB13_4
+; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: mv a2, a3
+; RV32I-NEXT: .LBB13_4:
; RV32I-NEXT: srli a2, a2, 1
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: srl a2, a2, t2
-; RV32I-NEXT: sub a3, a6, t1
-; RV32I-NEXT: slli a5, t0, 1
-; RV32I-NEXT: sll a3, a5, a3
-; RV32I-NEXT: or a2, a2, a3
-; RV32I-NEXT: bgez a7, .LBB13_7
+; RV32I-NEXT: not a3, a4
+; RV32I-NEXT: srl a2, a2, a3
+; RV32I-NEXT: or a2, a7, a2
+; RV32I-NEXT: bnez a5, .LBB13_6
+; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: .LBB13_6:
; RV32I-NEXT: sll a0, a0, a4
-; RV32I-NEXT: or a2, a2, a0
-; RV32I-NEXT: .LBB13_7:
+; RV32I-NEXT: srli a1, a6, 1
+; RV32I-NEXT: srl a1, a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: fshl_i64:
; RV32ZBT: # %bb.0:
-; RV32ZBT-NEXT: sll a7, a1, a4
-; RV32ZBT-NEXT: andi a5, a4, 63
-; RV32ZBT-NEXT: addi a6, zero, 31
-; RV32ZBT-NEXT: sub t0, a6, a5
-; RV32ZBT-NEXT: srli a1, a0, 1
-; RV32ZBT-NEXT: srl a1, a1, t0
-; RV32ZBT-NEXT: or a7, a7, a1
-; RV32ZBT-NEXT: addi t1, a5, -32
-; RV32ZBT-NEXT: sll t0, a0, t1
-; RV32ZBT-NEXT: slti a1, t1, 0
-; RV32ZBT-NEXT: cmov t0, a1, a7, t0
-; RV32ZBT-NEXT: not a5, a4
-; RV32ZBT-NEXT: srli a7, a3, 1
-; RV32ZBT-NEXT: srl t4, a7, a5
-; RV32ZBT-NEXT: andi t2, a5, 63
-; RV32ZBT-NEXT: addi t3, t2, -32
-; RV32ZBT-NEXT: srai a1, t3, 31
-; RV32ZBT-NEXT: and a1, a1, t4
-; RV32ZBT-NEXT: or a1, t0, a1
-; RV32ZBT-NEXT: fsri a2, a2, a3, 1
-; RV32ZBT-NEXT: srl a2, a2, a5
-; RV32ZBT-NEXT: sub a3, a6, t2
-; RV32ZBT-NEXT: slli a5, a7, 1
-; RV32ZBT-NEXT: sll a3, a5, a3
-; RV32ZBT-NEXT: or a2, a2, a3
-; RV32ZBT-NEXT: srl a3, a7, t3
-; RV32ZBT-NEXT: slti a5, t3, 0
+; RV32ZBT-NEXT: srli a5, a4, 5
+; RV32ZBT-NEXT: andi a5, a5, 1
; RV32ZBT-NEXT: cmov a2, a5, a2, a3
-; RV32ZBT-NEXT: sll a0, a0, a4
-; RV32ZBT-NEXT: srai a3, t1, 31
-; RV32ZBT-NEXT: and a0, a3, a0
-; RV32ZBT-NEXT: or a0, a0, a2
+; RV32ZBT-NEXT: cmov a3, a5, a3, a0
+; RV32ZBT-NEXT: andi a4, a4, 31
+; RV32ZBT-NEXT: fsl a2, a3, a2, a4
+; RV32ZBT-NEXT: cmov a0, a5, a0, a1
+; RV32ZBT-NEXT: fsl a1, a0, a3, a4
+; RV32ZBT-NEXT: mv a0, a2
; RV32ZBT-NEXT: ret
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
ret i64 %1
@@ -453,87 +415,41 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
; RV32I-LABEL: fshr_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: mv t0, a0
-; RV32I-NEXT: andi a0, a4, 63
-; RV32I-NEXT: addi a6, a0, -32
-; RV32I-NEXT: addi a7, zero, 31
-; RV32I-NEXT: bltz a6, .LBB15_2
+; RV32I-NEXT: andi a5, a4, 32
+; RV32I-NEXT: beqz a5, .LBB15_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: srl a0, a3, a6
-; RV32I-NEXT: j .LBB15_3
+; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: .LBB15_2:
-; RV32I-NEXT: srl a2, a2, a4
-; RV32I-NEXT: sub a0, a7, a0
-; RV32I-NEXT: slli a5, a3, 1
-; RV32I-NEXT: sll a0, a5, a0
-; RV32I-NEXT: or a0, a2, a0
-; RV32I-NEXT: .LBB15_3:
-; RV32I-NEXT: not t2, a4
-; RV32I-NEXT: andi a5, t2, 63
-; RV32I-NEXT: addi a2, a5, -32
-; RV32I-NEXT: slli t1, t0, 1
-; RV32I-NEXT: bltz a2, .LBB15_5
-; RV32I-NEXT: # %bb.4:
-; RV32I-NEXT: sll a1, t1, a2
-; RV32I-NEXT: bltz a6, .LBB15_6
-; RV32I-NEXT: j .LBB15_7
-; RV32I-NEXT: .LBB15_5:
-; RV32I-NEXT: sll a2, t1, t2
-; RV32I-NEXT: or a0, a0, a2
-; RV32I-NEXT: lui a2, 524288
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a2, t0, a2
-; RV32I-NEXT: sub a5, a7, a5
-; RV32I-NEXT: srl a2, a2, a5
-; RV32I-NEXT: srli a5, t0, 31
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: or a1, a1, a5
-; RV32I-NEXT: sll a1, a1, t2
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: bgez a6, .LBB15_7
+; RV32I-NEXT: srl a6, a2, a4
+; RV32I-NEXT: beqz a5, .LBB15_4
+; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: .LBB15_4:
+; RV32I-NEXT: slli a7, a3, 1
+; RV32I-NEXT: not t0, a4
+; RV32I-NEXT: sll a2, a7, t0
+; RV32I-NEXT: or a6, a2, a6
+; RV32I-NEXT: srl a3, a3, a4
+; RV32I-NEXT: beqz a5, .LBB15_6
+; RV32I-NEXT: # %bb.5:
+; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: .LBB15_6:
-; RV32I-NEXT: srl a2, a3, a4
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: .LBB15_7:
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: sll a0, a0, t0
+; RV32I-NEXT: or a1, a0, a3
+; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: ret
;
; RV32ZBT-LABEL: fshr_i64:
; RV32ZBT: # %bb.0:
-; RV32ZBT-NEXT: srl a7, a2, a4
-; RV32ZBT-NEXT: andi a5, a4, 63
-; RV32ZBT-NEXT: addi a6, zero, 31
-; RV32ZBT-NEXT: sub t0, a6, a5
-; RV32ZBT-NEXT: slli a2, a3, 1
-; RV32ZBT-NEXT: sll a2, a2, t0
-; RV32ZBT-NEXT: or a7, a7, a2
-; RV32ZBT-NEXT: addi t2, a5, -32
-; RV32ZBT-NEXT: srl t0, a3, t2
-; RV32ZBT-NEXT: slti a2, t2, 0
-; RV32ZBT-NEXT: cmov a7, a2, a7, t0
-; RV32ZBT-NEXT: not t4, a4
-; RV32ZBT-NEXT: slli t0, a0, 1
-; RV32ZBT-NEXT: sll t1, t0, t4
-; RV32ZBT-NEXT: andi t3, t4, 63
-; RV32ZBT-NEXT: addi a5, t3, -32
-; RV32ZBT-NEXT: srai a2, a5, 31
-; RV32ZBT-NEXT: and a2, a2, t1
-; RV32ZBT-NEXT: or a7, a2, a7
-; RV32ZBT-NEXT: lui a2, 524288
-; RV32ZBT-NEXT: addi a2, a2, -1
-; RV32ZBT-NEXT: and t1, a0, a2
-; RV32ZBT-NEXT: sub a2, a6, t3
-; RV32ZBT-NEXT: srl a2, t1, a2
-; RV32ZBT-NEXT: fsri a0, a0, a1, 31
-; RV32ZBT-NEXT: sll a0, a0, t4
-; RV32ZBT-NEXT: or a0, a0, a2
-; RV32ZBT-NEXT: sll a1, t0, a5
-; RV32ZBT-NEXT: slti a2, a5, 0
-; RV32ZBT-NEXT: cmov a0, a2, a0, a1
-; RV32ZBT-NEXT: srl a1, a3, a4
-; RV32ZBT-NEXT: srai a2, t2, 31
-; RV32ZBT-NEXT: and a1, a2, a1
-; RV32ZBT-NEXT: or a1, a0, a1
-; RV32ZBT-NEXT: mv a0, a7
+; RV32ZBT-NEXT: andi a5, a4, 32
+; RV32ZBT-NEXT: cmov a6, a5, a0, a3
+; RV32ZBT-NEXT: cmov a2, a5, a3, a2
+; RV32ZBT-NEXT: andi a3, a4, 31
+; RV32ZBT-NEXT: fsr a2, a2, a6, a3
+; RV32ZBT-NEXT: cmov a0, a5, a1, a0
+; RV32ZBT-NEXT: fsr a1, a6, a0, a3
+; RV32ZBT-NEXT: mv a0, a2
; RV32ZBT-NEXT: ret
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
ret i64 %1
@@ -558,8 +474,8 @@ define i32 @fshri_i32(i32 %a, i32 %b) nounwind {
define i64 @fshri_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: fshri_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a3, 27
-; RV32I-NEXT: srli a2, a2, 5
+; RV32I-NEXT: srli a1, a2, 5
+; RV32I-NEXT: slli a2, a3, 27
; RV32I-NEXT: or a2, a2, a1
; RV32I-NEXT: srli a1, a3, 5
; RV32I-NEXT: slli a0, a0, 27
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 62644de177e4d..5b67ca0f56164 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -571,34 +571,25 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind {
; RV32I-LABEL: fshr64_minsize:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: mv s2, a1
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: andi a2, a2, 63
-; RV32I-NEXT: call __lshrdi3 at plt
-; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: mv s4, a1
-; RV32I-NEXT: neg a0, s0
-; RV32I-NEXT: andi a2, a0, 63
-; RV32I-NEXT: mv a0, s1
-; RV32I-NEXT: mv a1, s2
-; RV32I-NEXT: call __ashldi3 at plt
-; RV32I-NEXT: or a0, s3, a0
-; RV32I-NEXT: or a1, s4, a1
-; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: andi a4, a2, 32
+; RV32I-NEXT: mv a3, a0
+; RV32I-NEXT: beqz a4, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: srl a5, a3, a2
+; RV32I-NEXT: beqz a4, .LBB9_4
+; RV32I-NEXT: # %bb.3:
+; RV32I-NEXT: mv a1, a0
+; RV32I-NEXT: .LBB9_4:
+; RV32I-NEXT: slli a0, a1, 1
+; RV32I-NEXT: not a4, a2
+; RV32I-NEXT: sll a0, a0, a4
+; RV32I-NEXT: or a0, a0, a5
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slli a2, a3, 1
+; RV32I-NEXT: sll a2, a2, a4
+; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: fshr64_minsize:
@@ -615,182 +606,92 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind {
define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-LABEL: fshr128_minsize:
; RV32I: # %bb.0:
-; RV32I-NEXT: addi sp, sp, -64
-; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s5, 0(a1)
-; RV32I-NEXT: lw s6, 4(a1)
-; RV32I-NEXT: lw s4, 8(a1)
-; RV32I-NEXT: lw s3, 12(a1)
-; RV32I-NEXT: lw s11, 0(a2)
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: andi s0, s11, 127
-; RV32I-NEXT: addi a2, s0, -64
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __lshrdi3 at plt
-; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s5
-; RV32I-NEXT: mv a1, s6
-; RV32I-NEXT: mv a2, s0
-; RV32I-NEXT: call __lshrdi3 at plt
-; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: sw a1, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi s9, zero, 64
-; RV32I-NEXT: sub a2, s9, s0
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: call __ashldi3 at plt
-; RV32I-NEXT: mv s10, a1
-; RV32I-NEXT: bgeu s0, s9, .LBB10_2
+; RV32I-NEXT: lw t2, 8(a1)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a2, 0(a2)
+; RV32I-NEXT: lw a7, 4(a1)
+; RV32I-NEXT: lw t1, 12(a1)
+; RV32I-NEXT: andi a1, a2, 64
+; RV32I-NEXT: mv a5, a7
+; RV32I-NEXT: mv a6, a3
+; RV32I-NEXT: beqz a1, .LBB10_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: or s8, s1, a0
+; RV32I-NEXT: mv a5, t1
+; RV32I-NEXT: mv a6, t2
; RV32I-NEXT: .LBB10_2:
-; RV32I-NEXT: mv s7, s5
-; RV32I-NEXT: beqz s0, .LBB10_4
+; RV32I-NEXT: andi a4, a2, 32
+; RV32I-NEXT: mv t0, a6
+; RV32I-NEXT: bnez a4, .LBB10_13
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: mv s7, s8
+; RV32I-NEXT: bnez a1, .LBB10_14
; RV32I-NEXT: .LBB10_4:
-; RV32I-NEXT: neg a0, s11
-; RV32I-NEXT: andi s1, a0, 127
-; RV32I-NEXT: mv a0, s5
-; RV32I-NEXT: mv a1, s6
-; RV32I-NEXT: mv a2, s1
-; RV32I-NEXT: call __ashldi3 at plt
-; RV32I-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT: bgeu s1, s9, .LBB10_6
-; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: or s7, s7, a0
+; RV32I-NEXT: beqz a4, .LBB10_6
+; RV32I-NEXT: .LBB10_5:
+; RV32I-NEXT: mv a5, t2
; RV32I-NEXT: .LBB10_6:
-; RV32I-NEXT: bltu s0, s9, .LBB10_8
+; RV32I-NEXT: slli t3, a5, 1
+; RV32I-NEXT: not a3, a2
+; RV32I-NEXT: beqz a1, .LBB10_8
; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: j .LBB10_9
+; RV32I-NEXT: mv t1, a7
; RV32I-NEXT: .LBB10_8:
-; RV32I-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: or a0, a0, s10
-; RV32I-NEXT: .LBB10_9:
-; RV32I-NEXT: mv s8, s6
-; RV32I-NEXT: beqz s0, .LBB10_11
-; RV32I-NEXT: # %bb.10:
-; RV32I-NEXT: mv s8, a0
-; RV32I-NEXT: .LBB10_11:
-; RV32I-NEXT: sub a2, s9, s1
-; RV32I-NEXT: mv a0, s5
-; RV32I-NEXT: mv a1, s6
-; RV32I-NEXT: call __lshrdi3 at plt
-; RV32I-NEXT: mv s10, a0
-; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: mv a2, s1
-; RV32I-NEXT: call __ashldi3 at plt
-; RV32I-NEXT: mv s11, a0
-; RV32I-NEXT: sw a1, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a2, s1, -64
-; RV32I-NEXT: mv a0, s5
-; RV32I-NEXT: mv a1, s6
-; RV32I-NEXT: call __ashldi3 at plt
-; RV32I-NEXT: mv s5, a1
-; RV32I-NEXT: bgeu s1, s9, .LBB10_13
-; RV32I-NEXT: # %bb.12:
-; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT: or s8, s8, a0
-; RV32I-NEXT: or a0, s11, s10
-; RV32I-NEXT: .LBB10_13:
-; RV32I-NEXT: mv s6, s4
-; RV32I-NEXT: beqz s1, .LBB10_15
-; RV32I-NEXT: # %bb.14:
-; RV32I-NEXT: mv s6, a0
-; RV32I-NEXT: .LBB10_15:
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: mv a1, s3
-; RV32I-NEXT: mv a2, s0
-; RV32I-NEXT: call __lshrdi3 at plt
-; RV32I-NEXT: bltu s0, s9, .LBB10_21
-; RV32I-NEXT: # %bb.16:
-; RV32I-NEXT: bltu s1, s9, .LBB10_22
-; RV32I-NEXT: .LBB10_17:
-; RV32I-NEXT: bnez s1, .LBB10_23
-; RV32I-NEXT: .LBB10_18:
-; RV32I-NEXT: bgeu s0, s9, .LBB10_20
-; RV32I-NEXT: .LBB10_19:
-; RV32I-NEXT: or s3, s3, a1
-; RV32I-NEXT: .LBB10_20:
-; RV32I-NEXT: sw s8, 4(s2)
-; RV32I-NEXT: sw s7, 0(s2)
-; RV32I-NEXT: sw s3, 12(s2)
-; RV32I-NEXT: sw s6, 8(s2)
-; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 64
+; RV32I-NEXT: srl a7, t0, a2
+; RV32I-NEXT: sll a1, t3, a3
+; RV32I-NEXT: srl a5, a5, a2
+; RV32I-NEXT: beqz a4, .LBB10_10
+; RV32I-NEXT: # %bb.9:
+; RV32I-NEXT: mv t2, t1
+; RV32I-NEXT: .LBB10_10:
+; RV32I-NEXT: or a7, a1, a7
+; RV32I-NEXT: slli a1, t2, 1
+; RV32I-NEXT: sll a1, a1, a3
+; RV32I-NEXT: or a5, a1, a5
+; RV32I-NEXT: srl a1, t2, a2
+; RV32I-NEXT: beqz a4, .LBB10_12
+; RV32I-NEXT: # %bb.11:
+; RV32I-NEXT: mv t1, a6
+; RV32I-NEXT: .LBB10_12:
+; RV32I-NEXT: slli a4, t1, 1
+; RV32I-NEXT: sll a4, a4, a3
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: srl a2, t1, a2
+; RV32I-NEXT: slli a4, t0, 1
+; RV32I-NEXT: sll a3, a4, a3
+; RV32I-NEXT: or a2, a3, a2
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: sw a1, 8(a0)
+; RV32I-NEXT: sw a5, 4(a0)
+; RV32I-NEXT: sw a7, 0(a0)
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB10_21:
-; RV32I-NEXT: or s6, s6, a0
-; RV32I-NEXT: bgeu s1, s9, .LBB10_17
-; RV32I-NEXT: .LBB10_22:
-; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw a2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT: or s5, a2, a0
-; RV32I-NEXT: beqz s1, .LBB10_18
-; RV32I-NEXT: .LBB10_23:
-; RV32I-NEXT: mv s3, s5
-; RV32I-NEXT: bltu s0, s9, .LBB10_19
-; RV32I-NEXT: j .LBB10_20
+; RV32I-NEXT: .LBB10_13:
+; RV32I-NEXT: mv t0, a5
+; RV32I-NEXT: beqz a1, .LBB10_4
+; RV32I-NEXT: .LBB10_14:
+; RV32I-NEXT: mv t2, a3
+; RV32I-NEXT: bnez a4, .LBB10_5
+; RV32I-NEXT: j .LBB10_6
;
; RV64I-LABEL: fshr128_minsize:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -48
-; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: mv s2, a1
-; RV64I-NEXT: mv s1, a0
-; RV64I-NEXT: andi a2, a2, 127
-; RV64I-NEXT: call __lshrti3 at plt
-; RV64I-NEXT: mv s3, a0
-; RV64I-NEXT: mv s4, a1
-; RV64I-NEXT: neg a0, s0
-; RV64I-NEXT: andi a2, a0, 127
-; RV64I-NEXT: mv a0, s1
-; RV64I-NEXT: mv a1, s2
-; RV64I-NEXT: call __ashlti3 at plt
-; RV64I-NEXT: or a0, s3, a0
-; RV64I-NEXT: or a1, s4, a1
-; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 48
+; RV64I-NEXT: andi a4, a2, 64
+; RV64I-NEXT: mv a3, a0
+; RV64I-NEXT: beqz a4, .LBB10_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a3, a1
+; RV64I-NEXT: .LBB10_2:
+; RV64I-NEXT: srl a5, a3, a2
+; RV64I-NEXT: beqz a4, .LBB10_4
+; RV64I-NEXT: # %bb.3:
+; RV64I-NEXT: mv a1, a0
+; RV64I-NEXT: .LBB10_4:
+; RV64I-NEXT: slli a0, a1, 1
+; RV64I-NEXT: not a4, a2
+; RV64I-NEXT: sll a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
+; RV64I-NEXT: srl a1, a1, a2
+; RV64I-NEXT: slli a2, a3, 1
+; RV64I-NEXT: sll a2, a2, a4
+; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: ret
%res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b)
ret i128 %res
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index bc3236343f530..26e5c04eb6c19 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -179,102 +179,62 @@ define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-FAST-LABEL: var_shift_i64:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: notb %cl
-; X86-FAST-NEXT: shrdl $1, %edi, %esi
-; X86-FAST-NEXT: shrl %edi
-; X86-FAST-NEXT: shrdl %cl, %edi, %esi
-; X86-FAST-NEXT: shrl %cl, %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: je .LBB5_2
-; X86-FAST-NEXT: # %bb.1:
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: xorl %edi, %edi
-; X86-FAST-NEXT: .LBB5_2:
-; X86-FAST-NEXT: movl %ebx, %eax
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: shll %cl, %eax
-; X86-FAST-NEXT: shldl %cl, %ebx, %edx
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: je .LBB5_4
-; X86-FAST-NEXT: # %bb.3:
-; X86-FAST-NEXT: movl %eax, %edx
-; X86-FAST-NEXT: xorl %eax, %eax
-; X86-FAST-NEXT: .LBB5_4:
-; X86-FAST-NEXT: orl %edi, %edx
-; X86-FAST-NEXT: orl %esi, %eax
+; X86-FAST-NEXT: jne .LBB5_1
+; X86-FAST-NEXT: # %bb.2:
+; X86-FAST-NEXT: movl %edx, %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: jmp .LBB5_3
+; X86-FAST-NEXT: .LBB5_1:
+; X86-FAST-NEXT: movl %esi, %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: .LBB5_3:
+; X86-FAST-NEXT: movl %edi, %eax
+; X86-FAST-NEXT: shldl %cl, %esi, %eax
+; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-FAST-NEXT: shldl %cl, %edi, %edx
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
-; X86-FAST-NEXT: popl %ebx
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: var_shift_i64:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %ebp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: shrl %eax
-; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: shll $31, %edi
-; X86-SLOW-NEXT: orl %eax, %edi
-; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: movb %cl, %ch
-; X86-SLOW-NEXT: notb %ch
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: shrl %esi
-; X86-SLOW-NEXT: leal (%esi,%esi), %ebp
-; X86-SLOW-NEXT: movb %al, %cl
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: movl %edx, %eax
-; X86-SLOW-NEXT: shrl %eax
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: testb $32, {{[0-9]+}}(%esp)
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: jne .LBB5_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: orl %eax, %ebx
+; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: jmp .LBB5_3
; X86-SLOW-NEXT: .LBB5_1:
-; X86-SLOW-NEXT: movl %edx, %ebx
-; X86-SLOW-NEXT: xorl %edx, %edx
+; X86-SLOW-NEXT: movl %eax, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: .LBB5_3:
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: shrl %eax
+; X86-SLOW-NEXT: notb %cl
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: orl %edi, %eax
+; X86-SLOW-NEXT: shrl %esi
; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb $32, %ch
-; X86-SLOW-NEXT: jne .LBB5_4
-; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: orl %edi, %ebp
-; X86-SLOW-NEXT: jmp .LBB5_6
-; X86-SLOW-NEXT: .LBB5_4:
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: xorl %esi, %esi
-; X86-SLOW-NEXT: .LBB5_6:
-; X86-SLOW-NEXT: orl %ebp, %edx
-; X86-SLOW-NEXT: orl %esi, %ebx
-; X86-SLOW-NEXT: movl %edx, %eax
-; X86-SLOW-NEXT: movl %ebx, %edx
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edx
+; X86-SLOW-NEXT: orl %esi, %edx
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
-; X86-SLOW-NEXT: popl %ebp
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i64:
@@ -307,226 +267,50 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: subl $72, %esp
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl %edx, %edi
-; X86-FAST-NEXT: shldl $31, %eax, %edi
-; X86-FAST-NEXT: movl %ebx, %eax
-; X86-FAST-NEXT: notl %ebx
-; X86-FAST-NEXT: andl $127, %ebx
-; X86-FAST-NEXT: movb $64, %cl
-; X86-FAST-NEXT: subb %bl, %cl
-; X86-FAST-NEXT: shrl %edx
-; X86-FAST-NEXT: movl %edx, %ebp
-; X86-FAST-NEXT: shldl %cl, %edi, %edx
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %edi, %edx
-; X86-FAST-NEXT: shll %cl, %edx
-; X86-FAST-NEXT: testb $32, %cl
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-FAST-NEXT: testb $64, %cl
; X86-FAST-NEXT: jne .LBB6_1
; X86-FAST-NEXT: # %bb.2:
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: jmp .LBB6_3
-; X86-FAST-NEXT: .LBB6_1:
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_3:
-; X86-FAST-NEXT: andl $127, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movb %al, %ch
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: shldl %cl, %esi, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movb %bl, %cl
-; X86-FAST-NEXT: addb $-64, %cl
; X86-FAST-NEXT: movl %edi, %eax
-; X86-FAST-NEXT: movl %ebp, %edx
-; X86-FAST-NEXT: shrdl %cl, %ebp, %eax
-; X86-FAST-NEXT: shrl %cl, %ebp
+; X86-FAST-NEXT: movl %esi, %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movl %ebx, %ebp
+; X86-FAST-NEXT: movl %edx, %ebx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: jne .LBB6_4
-; X86-FAST-NEXT: # %bb.5:
-; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: jmp .LBB6_6
+; X86-FAST-NEXT: je .LBB6_5
; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_6:
+; X86-FAST-NEXT: movl %edx, %esi
+; X86-FAST-NEXT: movl %edi, %edx
+; X86-FAST-NEXT: movl %ebx, %edi
+; X86-FAST-NEXT: movl %eax, %ebx
+; X86-FAST-NEXT: jmp .LBB6_6
+; X86-FAST-NEXT: .LBB6_1:
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: shldl %cl, %eax, %ebp
-; X86-FAST-NEXT: shll %cl, %eax
-; X86-FAST-NEXT: shll %cl, %esi
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: jne .LBB6_7
-; X86-FAST-NEXT: # %bb.8:
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: jmp .LBB6_9
-; X86-FAST-NEXT: .LBB6_7:
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %eax, %ebp
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_9:
-; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: jb .LBB6_11
-; X86-FAST-NEXT: # %bb.10:
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_11:
-; X86-FAST-NEXT: movb %bl, %cl
-; X86-FAST-NEXT: shrdl %cl, %edx, %edi
-; X86-FAST-NEXT: shrl %cl, %edx
-; X86-FAST-NEXT: shldl $31, %eax, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: shrdl $1, %ebp, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: shrdl %cl, %esi, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %esi, %eax
-; X86-FAST-NEXT: shrl %cl, %eax
-; X86-FAST-NEXT: testb $32, %bl
-; X86-FAST-NEXT: je .LBB6_13
-; X86-FAST-NEXT: # %bb.12:
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %edx, %edi
-; X86-FAST-NEXT: xorl %eax, %eax
-; X86-FAST-NEXT: xorl %edx, %edx
-; X86-FAST-NEXT: .LBB6_13:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: jb .LBB6_15
-; X86-FAST-NEXT: # %bb.14:
-; X86-FAST-NEXT: xorl %ebp, %ebp
-; X86-FAST-NEXT: .LBB6_15:
-; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movb $64, %cl
-; X86-FAST-NEXT: subb %ch, %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: shrl %cl, %ebp
-; X86-FAST-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: movl $0, %edx
-; X86-FAST-NEXT: jne .LBB6_17
-; X86-FAST-NEXT: # %bb.16:
-; X86-FAST-NEXT: movl %ebp, %edx
-; X86-FAST-NEXT: .LBB6_17:
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: addb $-64, %ch
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: shll %cl, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: shldl %cl, %edi, %edx
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: jne .LBB6_19
-; X86-FAST-NEXT: # %bb.18:
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_19:
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jb .LBB6_21
-; X86-FAST-NEXT: # %bb.20:
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_21:
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: jae .LBB6_23
-; X86-FAST-NEXT: # %bb.22:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: .LBB6_23:
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: jne .LBB6_25
-; X86-FAST-NEXT: # %bb.24:
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_25:
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jb .LBB6_27
-; X86-FAST-NEXT: # %bb.26:
-; X86-FAST-NEXT: xorl %edx, %edx
-; X86-FAST-NEXT: .LBB6_27:
-; X86-FAST-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: shrdl %cl, %esi, %edi
; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-FAST-NEXT: jne .LBB6_29
-; X86-FAST-NEXT: # %bb.28:
-; X86-FAST-NEXT: movl %edi, %ebp
-; X86-FAST-NEXT: .LBB6_29:
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-FAST-NEXT: jae .LBB6_31
-; X86-FAST-NEXT: # %bb.30:
-; X86-FAST-NEXT: orl %ebp, %esi
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_31:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jae .LBB6_33
-; X86-FAST-NEXT: # %bb.32:
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-FAST-NEXT: jne .LBB6_4
+; X86-FAST-NEXT: .LBB6_5:
; X86-FAST-NEXT: movl %eax, %ebp
-; X86-FAST-NEXT: .LBB6_33:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jae .LBB6_35
-; X86-FAST-NEXT: # %bb.34:
-; X86-FAST-NEXT: movl %edx, %ecx
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: orl %eax, %edx
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %ecx, %edx
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-FAST-NEXT: .LBB6_35:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: testl %ebx, %ebx
-; X86-FAST-NEXT: je .LBB6_37
-; X86-FAST-NEXT: # %bb.36:
-; X86-FAST-NEXT: movl %ebp, %ecx
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-FAST-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_37:
-; X86-FAST-NEXT: orl %ecx, %edi
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-FAST-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: je .LBB6_39
-; X86-FAST-NEXT: # %bb.38:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-FAST-NEXT: .LBB6_39:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-FAST-NEXT: orl %edx, %esi
-; X86-FAST-NEXT: movl %ecx, 12(%eax)
-; X86-FAST-NEXT: movl %esi, 8(%eax)
-; X86-FAST-NEXT: movl %edi, 4(%eax)
-; X86-FAST-NEXT: movl %ebx, (%eax)
-; X86-FAST-NEXT: addl $72, %esp
+; X86-FAST-NEXT: .LBB6_6:
+; X86-FAST-NEXT: movl %ebx, %eax
+; X86-FAST-NEXT: shldl %cl, %ebp, %eax
+; X86-FAST-NEXT: movl %edi, %ebp
+; X86-FAST-NEXT: shldl %cl, %ebx, %ebp
+; X86-FAST-NEXT: movl %edx, %ebx
+; X86-FAST-NEXT: shldl %cl, %edi, %ebx
+; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-FAST-NEXT: shldl %cl, %edx, %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl %esi, 12(%ecx)
+; X86-FAST-NEXT: movl %ebx, 8(%ecx)
+; X86-FAST-NEXT: movl %ebp, 4(%ecx)
+; X86-FAST-NEXT: movl %eax, (%ecx)
+; X86-FAST-NEXT: movl %ecx, %eax
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
; X86-FAST-NEXT: popl %ebx
@@ -539,289 +323,76 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: subl $76, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: pushl %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: andl $127, %eax
-; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: # kill: def $al killed $al killed $eax
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: shrl %esi
-; X86-SLOW-NEXT: movb %al, %ah
-; X86-SLOW-NEXT: notb %ah
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-SLOW-NEXT: testb $32, %al
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: testb $64, %al
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: orl (%esp), %edx # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: orl %edi, %ebp
+; X86-SLOW-NEXT: movl %ebp, %ecx
+; X86-SLOW-NEXT: movl %edi, %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl %esi, %edx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl %ebx, %ebp
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: xorl %ebx, %ebx
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
-; X86-SLOW-NEXT: jb .LBB6_5
-; X86-SLOW-NEXT: # %bb.4:
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: notl %ebx
-; X86-SLOW-NEXT: andl $127, %ebx
-; X86-SLOW-NEXT: movl %edi, %ebp
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: movl %esi, %ecx
-; X86-SLOW-NEXT: shrl %ecx
-; X86-SLOW-NEXT: movl %eax, %esi
-; X86-SLOW-NEXT: shll $31, %esi
-; X86-SLOW-NEXT: orl %ecx, %esi
-; X86-SLOW-NEXT: movl %esi, %ecx
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: movl $0, %esi
-; X86-SLOW-NEXT: movl $0, %ecx
-; X86-SLOW-NEXT: jne .LBB6_7
-; X86-SLOW-NEXT: # %bb.6:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: movl %ebp, %ecx
-; X86-SLOW-NEXT: .LBB6_7:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: shrl %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: shll $31, %esi
-; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: addl %edi, %edi
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: jne .LBB6_9
-; X86-SLOW-NEXT: # %bb.8:
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movl %edi, %ebp
-; X86-SLOW-NEXT: .LBB6_9:
-; X86-SLOW-NEXT: movb %bl, %dh
-; X86-SLOW-NEXT: addb $-64, %dh
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: movb %dh, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dh
-; X86-SLOW-NEXT: movl $0, %ecx
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-SLOW-NEXT: jne .LBB6_11
-; X86-SLOW-NEXT: # %bb.10:
-; X86-SLOW-NEXT: movl %esi, %ecx
-; X86-SLOW-NEXT: .LBB6_11:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: jb .LBB6_13
-; X86-SLOW-NEXT: # %bb.12:
-; X86-SLOW-NEXT: xorl %eax, %eax
-; X86-SLOW-NEXT: .LBB6_13:
-; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movb $64, %ch
-; X86-SLOW-NEXT: movb $64, %ah
-; X86-SLOW-NEXT: subb %dl, %ah
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: leal (%ebp,%ebp), %edi
+; X86-SLOW-NEXT: testb $32, %al
+; X86-SLOW-NEXT: jne .LBB6_4
+; X86-SLOW-NEXT: # %bb.5:
+; X86-SLOW-NEXT: movl %ecx, %ebx
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: .LBB6_4:
+; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebp, %esi
+; X86-SLOW-NEXT: movl %edx, %ebp
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: .LBB6_6:
+; X86-SLOW-NEXT: movl %edx, %edi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: testb $32, %ah
-; X86-SLOW-NEXT: jne .LBB6_14
-; X86-SLOW-NEXT: # %bb.15:
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edi, %ebp
-; X86-SLOW-NEXT: jmp .LBB6_16
-; X86-SLOW-NEXT: .LBB6_14:
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: .LBB6_16:
-; X86-SLOW-NEXT: addb $-64, %dl
-; X86-SLOW-NEXT: movb %dl, %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: shrl %ebx
+; X86-SLOW-NEXT: movb %al, %ch
+; X86-SLOW-NEXT: notb %ch
+; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: orl %edi, %ebx
+; X86-SLOW-NEXT: movl %ebp, %edi
+; X86-SLOW-NEXT: movb %al, %cl
; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movb %dl, %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dl
-; X86-SLOW-NEXT: jne .LBB6_17
-; X86-SLOW-NEXT: # %bb.18:
-; X86-SLOW-NEXT: orl %eax, %edi
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jae .LBB6_20
-; X86-SLOW-NEXT: jmp .LBB6_21
-; X86-SLOW-NEXT: .LBB6_17:
+; X86-SLOW-NEXT: shrl %edx
+; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: orl %edi, %edx
; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: xorl %esi, %esi
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jb .LBB6_21
-; X86-SLOW-NEXT: .LBB6_20:
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: .LBB6_21:
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: jae .LBB6_23
-; X86-SLOW-NEXT: # %bb.22:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: orl %ebp, %esi
-; X86-SLOW-NEXT: .LBB6_23:
-; X86-SLOW-NEXT: movl (%esp), %ebp # 4-byte Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: jae .LBB6_25
-; X86-SLOW-NEXT: # %bb.24:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_25:
-; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: shll $31, %esi
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb %bl, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: addl %edi, %edi
-; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-SLOW-NEXT: movb %al, %cl
; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: jne .LBB6_27
-; X86-SLOW-NEXT: # %bb.26:
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_27:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: movb %dh, %cl
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dh
-; X86-SLOW-NEXT: jne .LBB6_29
-; X86-SLOW-NEXT: # %bb.28:
-; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_29:
-; X86-SLOW-NEXT: subb %bl, %ch
-; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: shrl %ebp
; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: shrl %cl, %ebp
+; X86-SLOW-NEXT: orl %edi, %ebp
+; X86-SLOW-NEXT: movb %al, %cl
+; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: shrl %esi
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: testb $32, %ch
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: jne .LBB6_30
-; X86-SLOW-NEXT: # %bb.31:
-; X86-SLOW-NEXT: orl %ecx, %edx
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jb .LBB6_33
-; X86-SLOW-NEXT: jmp .LBB6_34
-; X86-SLOW-NEXT: .LBB6_30:
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: xorl %eax, %eax
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jae .LBB6_34
-; X86-SLOW-NEXT: .LBB6_33:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-SLOW-NEXT: orl %eax, %edx
-; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_34:
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jb .LBB6_35
-; X86-SLOW-NEXT: # %bb.36:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-SLOW-NEXT: jmp .LBB6_37
-; X86-SLOW-NEXT: .LBB6_35:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: orl %ecx, %eax
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: .LBB6_37:
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: testl %ebx, %ebx
-; X86-SLOW-NEXT: je .LBB6_39
-; X86-SLOW-NEXT: # %bb.38:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_39:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X86-SLOW-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: je .LBB6_41
-; X86-SLOW-NEXT: # %bb.40:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: .LBB6_41:
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-SLOW-NEXT: movl %esi, 12(%eax)
-; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %ebp, 8(%eax)
+; X86-SLOW-NEXT: movl %edx, 4(%eax)
; X86-SLOW-NEXT: movl %ebx, (%eax)
-; X86-SLOW-NEXT: movl %ebp, 4(%eax)
-; X86-SLOW-NEXT: addl $76, %esp
+; X86-SLOW-NEXT: addl $4, %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
@@ -830,65 +401,39 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
;
; X64-FAST-LABEL: var_shift_i128:
; X64-FAST: # %bb.0:
-; X64-FAST-NEXT: movq %r8, %r9
-; X64-FAST-NEXT: movq %rcx, %r8
-; X64-FAST-NEXT: movl %r9d, %ecx
-; X64-FAST-NEXT: shldq %cl, %rdi, %rsi
-; X64-FAST-NEXT: shrdq $1, %r8, %rdx
-; X64-FAST-NEXT: shrq %r8
-; X64-FAST-NEXT: notb %cl
-; X64-FAST-NEXT: shrdq %cl, %r8, %rdx
-; X64-FAST-NEXT: shrq %cl, %r8
-; X64-FAST-NEXT: xorl %eax, %eax
-; X64-FAST-NEXT: testb $64, %cl
-; X64-FAST-NEXT: cmovneq %r8, %rdx
-; X64-FAST-NEXT: cmovneq %rax, %r8
-; X64-FAST-NEXT: movl %r9d, %ecx
-; X64-FAST-NEXT: shlq %cl, %rdi
-; X64-FAST-NEXT: testb $64, %r9b
+; X64-FAST-NEXT: testb $64, %r8b
; X64-FAST-NEXT: cmovneq %rdi, %rsi
-; X64-FAST-NEXT: cmoveq %rdi, %rax
-; X64-FAST-NEXT: orq %rdx, %rax
-; X64-FAST-NEXT: orq %rsi, %r8
-; X64-FAST-NEXT: movq %r8, %rdx
+; X64-FAST-NEXT: cmoveq %rcx, %rdx
+; X64-FAST-NEXT: cmovneq %rcx, %rdi
+; X64-FAST-NEXT: movq %rdi, %rax
+; X64-FAST-NEXT: movl %r8d, %ecx
+; X64-FAST-NEXT: shldq %cl, %rdx, %rax
+; X64-FAST-NEXT: shldq %cl, %rdi, %rsi
+; X64-FAST-NEXT: movq %rsi, %rdx
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: var_shift_i128:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movq %rcx, %r11
-; X64-SLOW-NEXT: movq %rdx, %r9
+; X64-SLOW-NEXT: testb $64, %r8b
+; X64-SLOW-NEXT: cmovneq %rdi, %rsi
+; X64-SLOW-NEXT: cmoveq %rcx, %rdx
+; X64-SLOW-NEXT: cmovneq %rcx, %rdi
+; X64-SLOW-NEXT: movq %rdi, %rax
; X64-SLOW-NEXT: movl %r8d, %ecx
-; X64-SLOW-NEXT: shlq %cl, %rsi
-; X64-SLOW-NEXT: movq %rdi, %rdx
+; X64-SLOW-NEXT: shlq %cl, %rax
; X64-SLOW-NEXT: shrq %rdx
-; X64-SLOW-NEXT: movl %r8d, %r10d
-; X64-SLOW-NEXT: notb %r10b
-; X64-SLOW-NEXT: movl %r10d, %ecx
+; X64-SLOW-NEXT: movl %r8d, %r9d
+; X64-SLOW-NEXT: notb %r9b
+; X64-SLOW-NEXT: movl %r9d, %ecx
; X64-SLOW-NEXT: shrq %cl, %rdx
-; X64-SLOW-NEXT: orq %rsi, %rdx
-; X64-SLOW-NEXT: shrq %r9
-; X64-SLOW-NEXT: movq %r11, %rax
-; X64-SLOW-NEXT: shlq $63, %rax
-; X64-SLOW-NEXT: orq %r9, %rax
-; X64-SLOW-NEXT: shrq %cl, %rax
-; X64-SLOW-NEXT: shrq %r11
-; X64-SLOW-NEXT: leaq (%r11,%r11), %rsi
+; X64-SLOW-NEXT: orq %rdx, %rax
; X64-SLOW-NEXT: movl %r8d, %ecx
; X64-SLOW-NEXT: shlq %cl, %rsi
-; X64-SLOW-NEXT: orq %rax, %rsi
-; X64-SLOW-NEXT: movl %r10d, %ecx
-; X64-SLOW-NEXT: shrq %cl, %r11
-; X64-SLOW-NEXT: xorl %eax, %eax
-; X64-SLOW-NEXT: testb $64, %r10b
-; X64-SLOW-NEXT: cmovneq %r11, %rsi
-; X64-SLOW-NEXT: cmovneq %rax, %r11
-; X64-SLOW-NEXT: movl %r8d, %ecx
-; X64-SLOW-NEXT: shlq %cl, %rdi
-; X64-SLOW-NEXT: testb $64, %r8b
-; X64-SLOW-NEXT: cmovneq %rdi, %rdx
-; X64-SLOW-NEXT: cmoveq %rdi, %rax
-; X64-SLOW-NEXT: orq %rsi, %rax
-; X64-SLOW-NEXT: orq %r11, %rdx
+; X64-SLOW-NEXT: shrq %rdi
+; X64-SLOW-NEXT: movl %r9d, %ecx
+; X64-SLOW-NEXT: shrq %cl, %rdi
+; X64-SLOW-NEXT: orq %rsi, %rdi
+; X64-SLOW-NEXT: movq %rdi, %rdx
; X64-SLOW-NEXT: retq
%tmp = tail call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
ret i128 %tmp
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index ba6bf62e38bff..830dadba73730 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -176,106 +176,60 @@ define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-FAST-LABEL: var_shift_i64:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: pushl %ebp
-; X86-FAST-NEXT: pushl %ebx
-; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-FAST-NEXT: movb %bl, %ch
-; X86-FAST-NEXT: notb %ch
-; X86-FAST-NEXT: shldl $1, %eax, %edx
-; X86-FAST-NEXT: addl %eax, %eax
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: shldl %cl, %eax, %edx
-; X86-FAST-NEXT: movl %ebp, %edi
-; X86-FAST-NEXT: movb %bl, %cl
-; X86-FAST-NEXT: shrl %cl, %edi
-; X86-FAST-NEXT: shrdl %cl, %ebp, %esi
-; X86-FAST-NEXT: testb $32, %bl
-; X86-FAST-NEXT: je .LBB5_2
-; X86-FAST-NEXT: # %bb.1:
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: xorl %edi, %edi
-; X86-FAST-NEXT: .LBB5_2:
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: shll %cl, %eax
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: je .LBB5_4
-; X86-FAST-NEXT: # %bb.3:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: testb $32, %cl
+; X86-FAST-NEXT: je .LBB5_1
+; X86-FAST-NEXT: # %bb.2:
+; X86-FAST-NEXT: movl %esi, %edx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: jmp .LBB5_3
+; X86-FAST-NEXT: .LBB5_1:
; X86-FAST-NEXT: movl %eax, %edx
-; X86-FAST-NEXT: xorl %eax, %eax
-; X86-FAST-NEXT: .LBB5_4:
-; X86-FAST-NEXT: orl %edi, %edx
-; X86-FAST-NEXT: orl %esi, %eax
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: .LBB5_3:
+; X86-FAST-NEXT: shrdl %cl, %edx, %eax
+; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-FAST-NEXT: shrdl %cl, %esi, %edx
; X86-FAST-NEXT: popl %esi
-; X86-FAST-NEXT: popl %edi
-; X86-FAST-NEXT: popl %ebx
-; X86-FAST-NEXT: popl %ebp
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: var_shift_i64:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %ebp
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: pushl %eax
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl %eax, %edi
-; X86-SLOW-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shrl $31, %ecx
-; X86-SLOW-NEXT: leal (%ecx,%edx,2), %edx
-; X86-SLOW-NEXT: movb %bl, %ch
-; X86-SLOW-NEXT: notb %ch
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: movb %bl, %cl
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: leal (%esi,%esi), %ebp
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: movb %bl, %cl
-; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: jne .LBB5_1
+; X86-SLOW-NEXT: je .LBB5_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: orl (%esp), %ebp # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: jmp .LBB5_3
; X86-SLOW-NEXT: .LBB5_1:
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: xorl %esi, %esi
+; X86-SLOW-NEXT: movl %eax, %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: .LBB5_3:
-; X86-SLOW-NEXT: addl %eax, %eax
+; X86-SLOW-NEXT: leal (%esi,%esi), %edi
+; X86-SLOW-NEXT: movb %bl, %ch
+; X86-SLOW-NEXT: notb %ch
; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: testb $32, %ch
-; X86-SLOW-NEXT: jne .LBB5_4
-; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: orl %edi, %edx
-; X86-SLOW-NEXT: jmp .LBB5_6
-; X86-SLOW-NEXT: .LBB5_4:
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: xorl %eax, %eax
-; X86-SLOW-NEXT: .LBB5_6:
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: movb %bl, %cl
+; X86-SLOW-NEXT: shrl %cl, %eax
+; X86-SLOW-NEXT: orl %edi, %eax
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: addl %edx, %edx
+; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: shll %cl, %edx
; X86-SLOW-NEXT: orl %esi, %edx
-; X86-SLOW-NEXT: orl %ebp, %eax
-; X86-SLOW-NEXT: addl $4, %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
-; X86-SLOW-NEXT: popl %ebp
; X86-SLOW-NEXT: retl
;
; X64-FAST-LABEL: var_shift_i64:
@@ -307,243 +261,48 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-FAST-NEXT: pushl %ebx
; X86-FAST-NEXT: pushl %edi
; X86-FAST-NEXT: pushl %esi
-; X86-FAST-NEXT: subl $76, %esp
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-FAST-NEXT: pushl %eax
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-FAST-NEXT: movl %ebx, %ecx
-; X86-FAST-NEXT: andl $127, %ecx
-; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movb %cl, %ch
-; X86-FAST-NEXT: movb $64, %cl
-; X86-FAST-NEXT: subb %ch, %cl
-; X86-FAST-NEXT: shll %cl, %edi
-; X86-FAST-NEXT: movb %cl, (%esp) # 1-byte Spill
-; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: movl $0, %esi
-; X86-FAST-NEXT: jne .LBB6_2
-; X86-FAST-NEXT: # %bb.1:
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: .LBB6_2:
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %eax, %edi
-; X86-FAST-NEXT: movl %ebp, %eax
-; X86-FAST-NEXT: shldl $1, %ebp, %edi
-; X86-FAST-NEXT: addl %ebp, %eax
-; X86-FAST-NEXT: notl %ebx
-; X86-FAST-NEXT: andl $127, %ebx
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movb %bl, %cl
-; X86-FAST-NEXT: shldl %cl, %eax, %edi
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: shll %cl, %eax
-; X86-FAST-NEXT: testb $32, %bl
-; X86-FAST-NEXT: movl %eax, %esi
-; X86-FAST-NEXT: jne .LBB6_4
-; X86-FAST-NEXT: # %bb.3:
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: .LBB6_4:
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: shrdl %cl, %edi, %esi
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %edi, %esi
-; X86-FAST-NEXT: shrl %cl, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: shrl %cl, %edi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: testb $64, %cl
+; X86-FAST-NEXT: je .LBB6_1
+; X86-FAST-NEXT: # %bb.2:
+; X86-FAST-NEXT: movl %edi, %ebp
+; X86-FAST-NEXT: movl %ebx, %edi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-FAST-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl %edx, %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: shrdl %cl, %edx, %ebp
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: jne .LBB6_5
-; X86-FAST-NEXT: # %bb.6:
-; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: jmp .LBB6_7
-; X86-FAST-NEXT: .LBB6_5:
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: xorl %edi, %edi
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_7:
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: testb $32, %bl
-; X86-FAST-NEXT: movl $0, %esi
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: jne .LBB6_9
-; X86-FAST-NEXT: # %bb.8:
-; X86-FAST-NEXT: movl %eax, %esi
-; X86-FAST-NEXT: .LBB6_9:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: jb .LBB6_11
-; X86-FAST-NEXT: # %bb.10:
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_11:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: shrdl $31, %edi, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movb %bl, %cl
-; X86-FAST-NEXT: shll %cl, %eax
-; X86-FAST-NEXT: testb $32, %bl
-; X86-FAST-NEXT: movl $0, %edi
-; X86-FAST-NEXT: jne .LBB6_13
-; X86-FAST-NEXT: # %bb.12:
-; X86-FAST-NEXT: movl %eax, %edi
-; X86-FAST-NEXT: .LBB6_13:
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movb (%esp), %cl # 1-byte Reload
-; X86-FAST-NEXT: shldl %cl, %ebp, %eax
; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: jne .LBB6_15
-; X86-FAST-NEXT: # %bb.14:
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_15:
-; X86-FAST-NEXT: movb %bl, %dh
-; X86-FAST-NEXT: addb $-64, %dh
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: movb %dh, %cl
-; X86-FAST-NEXT: shll %cl, %eax
-; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: testb $32, %dh
-; X86-FAST-NEXT: movl $0, %eax
-; X86-FAST-NEXT: jne .LBB6_17
-; X86-FAST-NEXT: # %bb.16:
-; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-FAST-NEXT: .LBB6_17:
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: jb .LBB6_19
-; X86-FAST-NEXT: # %bb.18:
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_19:
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jb .LBB6_21
-; X86-FAST-NEXT: # %bb.20:
-; X86-FAST-NEXT: xorl %esi, %esi
-; X86-FAST-NEXT: .LBB6_21:
-; X86-FAST-NEXT: addb $-64, %ch
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: shrl %cl, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: movl $0, %eax
-; X86-FAST-NEXT: jne .LBB6_23
-; X86-FAST-NEXT: # %bb.22:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: .LBB6_23:
-; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: jae .LBB6_25
-; X86-FAST-NEXT: # %bb.24:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-FAST-NEXT: .LBB6_25:
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-FAST-NEXT: movb %ch, %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: shrdl %cl, %eax, %ebp
-; X86-FAST-NEXT: testb $32, %ch
-; X86-FAST-NEXT: jne .LBB6_27
-; X86-FAST-NEXT: # %bb.26:
-; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_27:
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jb .LBB6_29
-; X86-FAST-NEXT: # %bb.28:
-; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-FAST-NEXT: .LBB6_29:
-; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-FAST-NEXT: je .LBB6_4
+; X86-FAST-NEXT: jmp .LBB6_5
+; X86-FAST-NEXT: .LBB6_1:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-FAST-NEXT: movl %ebp, (%esp) # 4-byte Spill
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-FAST-NEXT: jae .LBB6_31
-; X86-FAST-NEXT: # %bb.30:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_31:
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: shldl $1, %eax, %ebp
-; X86-FAST-NEXT: movl %ebp, %eax
-; X86-FAST-NEXT: movl %ebx, %ecx
-; X86-FAST-NEXT: shldl %cl, %edi, %eax
-; X86-FAST-NEXT: testb $32, %bl
-; X86-FAST-NEXT: jne .LBB6_33
-; X86-FAST-NEXT: # %bb.32:
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_33:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: movb %dh, %cl
-; X86-FAST-NEXT: shldl %cl, %esi, %eax
-; X86-FAST-NEXT: testb $32, %dh
-; X86-FAST-NEXT: jne .LBB6_35
-; X86-FAST-NEXT: # %bb.34:
-; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_35:
-; X86-FAST-NEXT: movb $64, %cl
-; X86-FAST-NEXT: subb %bl, %cl
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: shrdl %cl, %eax, %esi
-; X86-FAST-NEXT: shrl %cl, %eax
; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: je .LBB6_37
-; X86-FAST-NEXT: # %bb.36:
-; X86-FAST-NEXT: movl %eax, %esi
-; X86-FAST-NEXT: xorl %eax, %eax
-; X86-FAST-NEXT: .LBB6_37:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jae .LBB6_39
-; X86-FAST-NEXT: # %bb.38:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-FAST-NEXT: orl %eax, %ecx
-; X86-FAST-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_39:
-; X86-FAST-NEXT: cmpl $64, %ebx
-; X86-FAST-NEXT: jae .LBB6_41
-; X86-FAST-NEXT: # %bb.40:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-FAST-NEXT: orl %esi, %eax
-; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_41:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: testl %ebx, %ebx
-; X86-FAST-NEXT: je .LBB6_43
-; X86-FAST-NEXT: # %bb.42:
+; X86-FAST-NEXT: jne .LBB6_5
+; X86-FAST-NEXT: .LBB6_4:
+; X86-FAST-NEXT: movl %edx, %ebx
+; X86-FAST-NEXT: movl %edi, %edx
+; X86-FAST-NEXT: movl %esi, %edi
+; X86-FAST-NEXT: movl %ebp, %esi
; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-FAST-NEXT: .LBB6_43:
-; X86-FAST-NEXT: orl %edx, %ebp
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-FAST-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-FAST-NEXT: je .LBB6_45
-; X86-FAST-NEXT: # %bb.44:
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-FAST-NEXT: .LBB6_45:
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-FAST-NEXT: movl %ecx, 4(%eax)
-; X86-FAST-NEXT: movl %esi, (%eax)
-; X86-FAST-NEXT: movl %ebp, 12(%eax)
+; X86-FAST-NEXT: .LBB6_5:
+; X86-FAST-NEXT: shrdl %cl, %esi, %ebp
+; X86-FAST-NEXT: shrdl %cl, %edi, %esi
+; X86-FAST-NEXT: shrdl %cl, %edx, %edi
+; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-FAST-NEXT: shrdl %cl, %ebx, %edx
+; X86-FAST-NEXT: movl %edx, 12(%eax)
; X86-FAST-NEXT: movl %edi, 8(%eax)
-; X86-FAST-NEXT: addl $76, %esp
+; X86-FAST-NEXT: movl %esi, 4(%eax)
+; X86-FAST-NEXT: movl %ebp, (%eax)
+; X86-FAST-NEXT: addl $4, %esp
; X86-FAST-NEXT: popl %esi
; X86-FAST-NEXT: popl %edi
; X86-FAST-NEXT: popl %ebx
@@ -556,281 +315,76 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %ebx
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: subl $72, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: subl $8, %esp
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: andl $127, %eax
-; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: leal (%edi,%edi), %ebp
-; X86-SLOW-NEXT: notb %al
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebp
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: leal (%esi,%esi), %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %edx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dl
-; X86-SLOW-NEXT: jne .LBB6_1
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: testb $64, %cl
+; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X86-SLOW-NEXT: orl %edi, %ebx
-; X86-SLOW-NEXT: movl %ebx, %esi
-; X86-SLOW-NEXT: jmp .LBB6_3
-; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl %eax, %ebp
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %edx
+; X86-SLOW-NEXT: movl %edi, %ebx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: movl %ebp, %eax
+; X86-SLOW-NEXT: movl %esi, %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: jne .LBB6_5
+; X86-SLOW-NEXT: .LBB6_4:
+; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebp, %esi
+; X86-SLOW-NEXT: movl %edx, %ebp
+; X86-SLOW-NEXT: movl %eax, %edx
+; X86-SLOW-NEXT: jmp .LBB6_6
+; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: jb .LBB6_5
-; X86-SLOW-NEXT: # %bb.4:
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: je .LBB6_4
; X86-SLOW-NEXT: .LBB6_5:
-; X86-SLOW-NEXT: leal (%ecx,%ecx), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SLOW-NEXT: notl %ebx
-; X86-SLOW-NEXT: andl $127, %ebx
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %esi
+; X86-SLOW-NEXT: .LBB6_6:
+; X86-SLOW-NEXT: shrl %cl, %edx
+; X86-SLOW-NEXT: movl %ecx, %ebx
+; X86-SLOW-NEXT: notb %bl
+; X86-SLOW-NEXT: leal (%ebp,%ebp), %eax
; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shrl $31, %ecx
-; X86-SLOW-NEXT: leal (%ecx,%edi,2), %ecx
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, %edi
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: orl %edx, %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %ebp
+; X86-SLOW-NEXT: leal (%esi,%esi), %edx
; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %edi
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: movl $0, %edi
-; X86-SLOW-NEXT: movl $0, %ecx
-; X86-SLOW-NEXT: jne .LBB6_7
-; X86-SLOW-NEXT: # %bb.6:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: movl %esi, %ecx
-; X86-SLOW-NEXT: .LBB6_7:
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: shrl $31, %ecx
-; X86-SLOW-NEXT: leal (%ecx,%eax,2), %esi
+; X86-SLOW-NEXT: shll %cl, %edx
+; X86-SLOW-NEXT: orl %ebp, %edx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl (%esp), %esi # 4-byte Reload
+; X86-SLOW-NEXT: leal (%esi,%esi), %ebp
; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: jne .LBB6_9
-; X86-SLOW-NEXT: # %bb.8:
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_9:
-; X86-SLOW-NEXT: movb %bl, %dh
-; X86-SLOW-NEXT: addb $-64, %dh
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: movb %dh, %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dh
-; X86-SLOW-NEXT: movl $0, %ecx
-; X86-SLOW-NEXT: jne .LBB6_11
-; X86-SLOW-NEXT: # %bb.10:
-; X86-SLOW-NEXT: movl %esi, %ecx
-; X86-SLOW-NEXT: .LBB6_11:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: jb .LBB6_13
-; X86-SLOW-NEXT: # %bb.12:
-; X86-SLOW-NEXT: xorl %ebp, %ebp
-; X86-SLOW-NEXT: .LBB6_13:
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb $64, %ch
-; X86-SLOW-NEXT: movb $64, %ah
-; X86-SLOW-NEXT: subb %dl, %ah
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl %esi, %edi
-; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movb %ah, %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb $32, %ah
-; X86-SLOW-NEXT: jne .LBB6_14
-; X86-SLOW-NEXT: # %bb.15:
-; X86-SLOW-NEXT: orl %edi, %ebp
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebp, %esi
-; X86-SLOW-NEXT: jmp .LBB6_16
-; X86-SLOW-NEXT: .LBB6_14:
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: .LBB6_16:
-; X86-SLOW-NEXT: addb $-64, %dl
-; X86-SLOW-NEXT: movb %dl, %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: movb %dl, %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: testb $32, %dl
-; X86-SLOW-NEXT: jne .LBB6_17
-; X86-SLOW-NEXT: # %bb.18:
-; X86-SLOW-NEXT: orl %eax, %ebp
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jae .LBB6_20
-; X86-SLOW-NEXT: jmp .LBB6_21
-; X86-SLOW-NEXT: .LBB6_17:
-; X86-SLOW-NEXT: movl %edi, %ebp
-; X86-SLOW-NEXT: xorl %edi, %edi
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jb .LBB6_21
-; X86-SLOW-NEXT: .LBB6_20:
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-SLOW-NEXT: .LBB6_21:
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: jae .LBB6_23
-; X86-SLOW-NEXT: # %bb.22:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: .LBB6_23:
-; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: jb .LBB6_24
-; X86-SLOW-NEXT: # %bb.25:
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: jmp .LBB6_26
-; X86-SLOW-NEXT: .LBB6_24:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_26:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: shrl $31, %eax
-; X86-SLOW-NEXT: leal (%eax,%esi,2), %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb %bl, %cl
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: testb $32, %bl
-; X86-SLOW-NEXT: jne .LBB6_28
-; X86-SLOW-NEXT: # %bb.27:
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_28:
-; X86-SLOW-NEXT: movl %ebp, %eax
-; X86-SLOW-NEXT: movb %dh, %cl
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %esi
-; X86-SLOW-NEXT: testb $32, %dh
-; X86-SLOW-NEXT: jne .LBB6_30
-; X86-SLOW-NEXT: # %bb.29:
-; X86-SLOW-NEXT: orl %esi, %eax
-; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_30:
-; X86-SLOW-NEXT: subb %bl, %ch
-; X86-SLOW-NEXT: movl %ebp, %eax
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: addl %ebp, %ebp
-; X86-SLOW-NEXT: notb %cl
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: movb %ch, %cl
-; X86-SLOW-NEXT: movl %ebp, %esi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-SLOW-NEXT: shrl %cl, %ebp
-; X86-SLOW-NEXT: testb $32, %ch
+; X86-SLOW-NEXT: addl %edi, %edi
+; X86-SLOW-NEXT: movl %ebx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl %esi, %edi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: jne .LBB6_31
-; X86-SLOW-NEXT: # %bb.32:
-; X86-SLOW-NEXT: orl %ebp, %esi
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jb .LBB6_34
-; X86-SLOW-NEXT: jmp .LBB6_35
-; X86-SLOW-NEXT: .LBB6_31:
-; X86-SLOW-NEXT: movl %eax, %ebp
-; X86-SLOW-NEXT: xorl %eax, %eax
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jae .LBB6_35
-; X86-SLOW-NEXT: .LBB6_34:
-; X86-SLOW-NEXT: movl %ebp, %esi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-SLOW-NEXT: orl %eax, %ebp
-; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: .LBB6_35:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: cmpl $64, %ebx
-; X86-SLOW-NEXT: jae .LBB6_37
-; X86-SLOW-NEXT: # %bb.36:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: orl %ebp, %eax
-; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: .LBB6_37:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: testl %ebx, %ebx
-; X86-SLOW-NEXT: je .LBB6_39
-; X86-SLOW-NEXT: # %bb.38:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-SLOW-NEXT: .LBB6_39:
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-SLOW-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-SLOW-NEXT: je .LBB6_41
-; X86-SLOW-NEXT: # %bb.40:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-SLOW-NEXT: .LBB6_41:
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: orl %ecx, %ebx
-; X86-SLOW-NEXT: orl %ebp, %edx
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-SLOW-NEXT: movl %ebx, (%eax)
-; X86-SLOW-NEXT: movl %esi, 12(%eax)
-; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %edi, 8(%eax)
-; X86-SLOW-NEXT: addl $72, %esp
+; X86-SLOW-NEXT: movl %edi, 12(%ecx)
+; X86-SLOW-NEXT: movl %ebp, 8(%ecx)
+; X86-SLOW-NEXT: movl %edx, 4(%ecx)
+; X86-SLOW-NEXT: movl %eax, (%ecx)
+; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: addl $8, %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-SLOW-NEXT: popl %ebx
@@ -839,65 +393,37 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
;
; X64-FAST-LABEL: var_shift_i128:
; X64-FAST: # %bb.0:
-; X64-FAST-NEXT: movq %r8, %r9
-; X64-FAST-NEXT: movq %rcx, %r8
-; X64-FAST-NEXT: movl %r9d, %ecx
-; X64-FAST-NEXT: shrdq %cl, %r8, %rdx
-; X64-FAST-NEXT: shrq %cl, %r8
-; X64-FAST-NEXT: xorl %eax, %eax
-; X64-FAST-NEXT: testb $64, %r9b
-; X64-FAST-NEXT: cmovneq %r8, %rdx
-; X64-FAST-NEXT: cmovneq %rax, %r8
-; X64-FAST-NEXT: shldq $1, %rdi, %rsi
-; X64-FAST-NEXT: addq %rdi, %rdi
-; X64-FAST-NEXT: notb %r9b
-; X64-FAST-NEXT: movl %r9d, %ecx
-; X64-FAST-NEXT: shldq %cl, %rdi, %rsi
-; X64-FAST-NEXT: shlq %cl, %rdi
-; X64-FAST-NEXT: testb $64, %r9b
-; X64-FAST-NEXT: cmovneq %rdi, %rsi
-; X64-FAST-NEXT: cmoveq %rdi, %rax
-; X64-FAST-NEXT: orq %rdx, %rax
-; X64-FAST-NEXT: orq %rsi, %r8
-; X64-FAST-NEXT: movq %r8, %rdx
+; X64-FAST-NEXT: movq %rdx, %rax
+; X64-FAST-NEXT: testb $64, %r8b
+; X64-FAST-NEXT: cmoveq %rdi, %rsi
+; X64-FAST-NEXT: cmoveq %rcx, %rdi
+; X64-FAST-NEXT: cmovneq %rcx, %rax
+; X64-FAST-NEXT: movl %r8d, %ecx
+; X64-FAST-NEXT: shrdq %cl, %rdi, %rax
+; X64-FAST-NEXT: shrdq %cl, %rsi, %rdi
+; X64-FAST-NEXT: movq %rdi, %rdx
; X64-FAST-NEXT: retq
;
; X64-SLOW-LABEL: var_shift_i128:
; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movq %rcx, %r9
-; X64-SLOW-NEXT: movq %rdx, %r10
-; X64-SLOW-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; X64-SLOW-NEXT: andq %rdi, %rax
-; X64-SLOW-NEXT: movl %r8d, %ecx
-; X64-SLOW-NEXT: shrq %cl, %rax
-; X64-SLOW-NEXT: movq %rdi, %rcx
-; X64-SLOW-NEXT: shrq $63, %rcx
-; X64-SLOW-NEXT: leaq (%rcx,%rsi,2), %rdx
-; X64-SLOW-NEXT: movl %r8d, %r11d
-; X64-SLOW-NEXT: notb %r11b
-; X64-SLOW-NEXT: movl %r11d, %ecx
-; X64-SLOW-NEXT: shlq %cl, %rdx
-; X64-SLOW-NEXT: orq %rax, %rdx
+; X64-SLOW-NEXT: testb $64, %r8b
+; X64-SLOW-NEXT: cmoveq %rdi, %rsi
+; X64-SLOW-NEXT: cmoveq %rcx, %rdi
+; X64-SLOW-NEXT: cmovneq %rcx, %rdx
; X64-SLOW-NEXT: movl %r8d, %ecx
-; X64-SLOW-NEXT: shrq %cl, %r10
-; X64-SLOW-NEXT: leaq (%r9,%r9), %rsi
-; X64-SLOW-NEXT: movl %r11d, %ecx
-; X64-SLOW-NEXT: shlq %cl, %rsi
-; X64-SLOW-NEXT: orq %r10, %rsi
+; X64-SLOW-NEXT: shrq %cl, %rdx
+; X64-SLOW-NEXT: leaq (%rdi,%rdi), %rax
+; X64-SLOW-NEXT: movl %r8d, %r9d
+; X64-SLOW-NEXT: notb %r9b
+; X64-SLOW-NEXT: movl %r9d, %ecx
+; X64-SLOW-NEXT: shlq %cl, %rax
+; X64-SLOW-NEXT: orq %rdx, %rax
; X64-SLOW-NEXT: movl %r8d, %ecx
-; X64-SLOW-NEXT: shrq %cl, %r9
-; X64-SLOW-NEXT: xorl %eax, %eax
-; X64-SLOW-NEXT: testb $64, %r8b
-; X64-SLOW-NEXT: cmovneq %r9, %rsi
-; X64-SLOW-NEXT: cmovneq %rax, %r9
-; X64-SLOW-NEXT: addq %rdi, %rdi
-; X64-SLOW-NEXT: movl %r11d, %ecx
-; X64-SLOW-NEXT: shlq %cl, %rdi
-; X64-SLOW-NEXT: testb $64, %r11b
-; X64-SLOW-NEXT: cmovneq %rdi, %rdx
-; X64-SLOW-NEXT: cmoveq %rdi, %rax
-; X64-SLOW-NEXT: orq %rsi, %rax
-; X64-SLOW-NEXT: orq %r9, %rdx
+; X64-SLOW-NEXT: shrq %cl, %rdi
+; X64-SLOW-NEXT: leaq (%rsi,%rsi), %rdx
+; X64-SLOW-NEXT: movl %r9d, %ecx
+; X64-SLOW-NEXT: shlq %cl, %rdx
+; X64-SLOW-NEXT: orq %rdi, %rdx
; X64-SLOW-NEXT: retq
%tmp = tail call i128 @llvm.fshr.i128(i128 %x, i128 %y, i128 %z)
ret i128 %tmp
@@ -1004,9 +530,9 @@ define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
; X86-FAST-LABEL: const_shift_i64:
; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: shldl $25, %ecx, %edx
; X86-FAST-NEXT: shrdl $7, %ecx, %eax
; X86-FAST-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll
index aaefb082cc8ca..a73ef92f9ff63 100644
--- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll
@@ -276,34 +276,19 @@ define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
define i64 @rotr_i64(i64 %x, i64 %z) nounwind {
; X32-SSE2-LABEL: rotr_i64:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: pushl %ebp
-; X32-SSE2-NEXT: pushl %ebx
-; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
-; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-SSE2-NEXT: movl %edx, %esi
-; X32-SSE2-NEXT: shrl %cl, %esi
-; X32-SSE2-NEXT: movl %ebx, %edi
-; X32-SSE2-NEXT: shrdl %cl, %edx, %edi
-; X32-SSE2-NEXT: xorl %ebp, %ebp
-; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %esi, %edi
-; X32-SSE2-NEXT: cmovnel %ebp, %esi
-; X32-SSE2-NEXT: negb %cl
-; X32-SSE2-NEXT: movl %ebx, %eax
-; X32-SSE2-NEXT: shll %cl, %eax
-; X32-SSE2-NEXT: shldl %cl, %ebx, %edx
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %eax, %edx
-; X32-SSE2-NEXT: cmovnel %ebp, %eax
-; X32-SSE2-NEXT: orl %edi, %eax
-; X32-SSE2-NEXT: orl %esi, %edx
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: cmovel %esi, %edx
+; X32-SSE2-NEXT: cmovel %eax, %esi
+; X32-SSE2-NEXT: movl %esi, %eax
+; X32-SSE2-NEXT: shrdl %cl, %edx, %eax
+; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-SSE2-NEXT: shrdl %cl, %esi, %edx
; X32-SSE2-NEXT: popl %esi
-; X32-SSE2-NEXT: popl %edi
-; X32-SSE2-NEXT: popl %ebx
-; X32-SSE2-NEXT: popl %ebp
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: rotr_i64:
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index dc4c929ee835e..ef1761d39f9e1 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -40,38 +40,22 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
; X32-SSE2-LABEL: fshl_i64:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: pushl %ebp
-; X32-SSE2-NEXT: pushl %ebx
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X32-SSE2-NEXT: movb %ch, %cl
-; X32-SSE2-NEXT: notb %cl
-; X32-SSE2-NEXT: shrdl $1, %ebx, %esi
-; X32-SSE2-NEXT: shrl %ebx
-; X32-SSE2-NEXT: shrdl %cl, %ebx, %esi
-; X32-SSE2-NEXT: shrl %cl, %ebx
-; X32-SSE2-NEXT: xorl %ebp, %ebp
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %ebx, %esi
-; X32-SSE2-NEXT: cmovnel %ebp, %ebx
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: cmovnel %esi, %edi
+; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %esi
; X32-SSE2-NEXT: movl %edi, %eax
-; X32-SSE2-NEXT: movb %ch, %cl
-; X32-SSE2-NEXT: shll %cl, %eax
+; X32-SSE2-NEXT: shldl %cl, %esi, %eax
+; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
; X32-SSE2-NEXT: shldl %cl, %edi, %edx
-; X32-SSE2-NEXT: testb $32, %ch
-; X32-SSE2-NEXT: cmovnel %eax, %edx
-; X32-SSE2-NEXT: cmovnel %ebp, %eax
-; X32-SSE2-NEXT: orl %esi, %eax
-; X32-SSE2-NEXT: orl %ebx, %edx
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
-; X32-SSE2-NEXT: popl %ebx
-; X32-SSE2-NEXT: popl %ebp
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i64:
@@ -92,169 +76,40 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X32-SSE2-NEXT: pushl %ebx
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
-; X32-SSE2-NEXT: subl $64, %esp
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-SSE2-NEXT: movl %esi, %edi
-; X32-SSE2-NEXT: shldl $31, %ecx, %edi
-; X32-SSE2-NEXT: notl %ebx
-; X32-SSE2-NEXT: andl $127, %ebx
-; X32-SSE2-NEXT: movb $64, %cl
-; X32-SSE2-NEXT: subb %bl, %cl
-; X32-SSE2-NEXT: shrl %esi
-; X32-SSE2-NEXT: movl %esi, (%esp) # 4-byte Spill
-; X32-SSE2-NEXT: shldl %cl, %edi, %esi
-; X32-SSE2-NEXT: movl %edi, %ebp
-; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: shll %cl, %ebp
-; X32-SSE2-NEXT: xorl %eax, %eax
-; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %ebp, %esi
-; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: cmovnel %eax, %ebp
-; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: andl $127, %eax
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-SSE2-NEXT: movl %eax, %ecx
-; X32-SSE2-NEXT: shldl %cl, %ebp, %edx
-; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl %ebx, %ecx
-; X32-SSE2-NEXT: addb $-64, %cl
-; X32-SSE2-NEXT: movl (%esp), %esi # 4-byte Reload
-; X32-SSE2-NEXT: shrdl %cl, %esi, %edi
-; X32-SSE2-NEXT: shrl %cl, %esi
-; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %esi, %edi
-; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl $0, %ecx
-; X32-SSE2-NEXT: cmovnel %ecx, %esi
-; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-SSE2-NEXT: movl %eax, %ecx
-; X32-SSE2-NEXT: shldl %cl, %edi, %esi
-; X32-SSE2-NEXT: movl %edi, %edx
-; X32-SSE2-NEXT: shll %cl, %edx
-; X32-SSE2-NEXT: shll %cl, %ebp
-; X32-SSE2-NEXT: testb $32, %al
-; X32-SSE2-NEXT: movl %eax, %ecx
-; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-SSE2-NEXT: cmovnel %ebp, %eax
-; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: cmovnel %edx, %esi
-; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl $0, %eax
-; X32-SSE2-NEXT: cmovnel %eax, %ebp
-; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: cmovnel %eax, %edx
-; X32-SSE2-NEXT: xorl %eax, %eax
-; X32-SSE2-NEXT: cmpl $64, %ecx
-; X32-SSE2-NEXT: cmovael %eax, %edx
-; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-SSE2-NEXT: shldl $31, %eax, %ebp
-; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-SSE2-NEXT: shrdl $1, %eax, %esi
-; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl %ebx, %ecx
-; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X32-SSE2-NEXT: movl (%esp), %edx # 4-byte Reload
-; X32-SSE2-NEXT: shrdl %cl, %edx, %eax
-; X32-SSE2-NEXT: shrl %cl, %edx
-; X32-SSE2-NEXT: movl %esi, %ebx
-; X32-SSE2-NEXT: shrdl %cl, %ebp, %ebx
-; X32-SSE2-NEXT: movl %ebp, %esi
-; X32-SSE2-NEXT: shrl %cl, %esi
-; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %esi, %ebx
-; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl %edx, %ecx
-; X32-SSE2-NEXT: cmovnel %edx, %eax
-; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movl $0, %eax
-; X32-SSE2-NEXT: cmovnel %eax, %esi
-; X32-SSE2-NEXT: cmovnel %eax, %ecx
-; X32-SSE2-NEXT: movl %ecx, (%esp) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-SSE2-NEXT: cmpl $64, %ebx
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-SSE2-NEXT: cmovael %eax, %ecx
-; X32-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: xorl %ebp, %ebp
-; X32-SSE2-NEXT: movb $64, %ch
-; X32-SSE2-NEXT: subb %bl, %ch
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-SSE2-NEXT: movb %ch, %cl
-; X32-SSE2-NEXT: shrl %cl, %edx
-; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: testb $32, %ch
-; X32-SSE2-NEXT: cmovnel %ebp, %edx
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X32-SSE2-NEXT: movb %bl, %cl
-; X32-SSE2-NEXT: addb $-64, %cl
-; X32-SSE2-NEXT: movl %edi, %ebp
-; X32-SSE2-NEXT: shll %cl, %ebp
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: shldl %cl, %edi, %eax
-; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %ebp, %eax
-; X32-SSE2-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-SSE2-NEXT: movl (%esp), %ebx # 4-byte Reload
-; X32-SSE2-NEXT: movl $0, %edi
-; X32-SSE2-NEXT: cmovael %edi, %ebx
-; X32-SSE2-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X32-SSE2-NEXT: cmpl $64, %ebx
-; X32-SSE2-NEXT: cmovbl %edx, %eax
-; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: movl $0, %edi
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE2-NEXT: testb $64, %cl
+; X32-SSE2-NEXT: movl %esi, %eax
+; X32-SSE2-NEXT: cmovnel %ebx, %eax
+; X32-SSE2-NEXT: movl %edx, %ebp
; X32-SSE2-NEXT: cmovnel %edi, %ebp
-; X32-SSE2-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-SSE2-NEXT: cmovael %edi, %edx
-; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-SSE2-NEXT: movb %ch, %cl
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-SSE2-NEXT: shrdl %cl, %edx, %edi
-; X32-SSE2-NEXT: testb $32, %ch
-; X32-SSE2-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-SSE2-NEXT: cmpl $64, %ebx
-; X32-SSE2-NEXT: cmovael %ebp, %edi
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-SSE2-NEXT: cmpl $64, %edx
-; X32-SSE2-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-SSE2-NEXT: cmpl $64, %edx
-; X32-SSE2-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-SSE2-NEXT: testl %edx, %edx
-; X32-SSE2-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-SSE2-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X32-SSE2-NEXT: movl %ecx, %edx
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X32-SSE2-NEXT: testl %ebx, %ebx
-; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edi
-; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X32-SSE2-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx
+; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT: testb $32, %cl
+; X32-SSE2-NEXT: cmovnel %esi, %edx
+; X32-SSE2-NEXT: cmovnel %ebp, %esi
+; X32-SSE2-NEXT: cmovnel %eax, %ebp
+; X32-SSE2-NEXT: cmovel %edi, %ebx
+; X32-SSE2-NEXT: cmovel %eax, %edi
+; X32-SSE2-NEXT: movl %edi, %eax
+; X32-SSE2-NEXT: shldl %cl, %ebx, %eax
+; X32-SSE2-NEXT: movl %ebp, %ebx
+; X32-SSE2-NEXT: shldl %cl, %edi, %ebx
+; X32-SSE2-NEXT: movl %esi, %edi
+; X32-SSE2-NEXT: shldl %cl, %ebp, %edi
+; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-SSE2-NEXT: shldl %cl, %esi, %edx
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: movl %eax, 12(%ecx)
+; X32-SSE2-NEXT: movl %edx, 12(%ecx)
; X32-SSE2-NEXT: movl %edi, 8(%ecx)
-; X32-SSE2-NEXT: movl %esi, 4(%ecx)
-; X32-SSE2-NEXT: movl %edx, (%ecx)
+; X32-SSE2-NEXT: movl %ebx, 4(%ecx)
+; X32-SSE2-NEXT: movl %eax, (%ecx)
; X32-SSE2-NEXT: movl %ecx, %eax
-; X32-SSE2-NEXT: addl $64, %esp
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: popl %ebx
@@ -263,27 +118,15 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
;
; X64-AVX2-LABEL: fshl_i128:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: movq %r8, %r9
-; X64-AVX2-NEXT: movq %rcx, %r8
-; X64-AVX2-NEXT: movl %r9d, %ecx
-; X64-AVX2-NEXT: shldq %cl, %rdi, %rsi
-; X64-AVX2-NEXT: shrdq $1, %r8, %rdx
-; X64-AVX2-NEXT: shrq %r8
-; X64-AVX2-NEXT: notb %cl
-; X64-AVX2-NEXT: shrdq %cl, %r8, %rdx
-; X64-AVX2-NEXT: shrq %cl, %r8
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: testb $64, %cl
-; X64-AVX2-NEXT: cmovneq %r8, %rdx
-; X64-AVX2-NEXT: cmovneq %rax, %r8
-; X64-AVX2-NEXT: movl %r9d, %ecx
-; X64-AVX2-NEXT: shlq %cl, %rdi
-; X64-AVX2-NEXT: testb $64, %r9b
+; X64-AVX2-NEXT: testb $64, %r8b
; X64-AVX2-NEXT: cmovneq %rdi, %rsi
-; X64-AVX2-NEXT: cmoveq %rdi, %rax
-; X64-AVX2-NEXT: orq %rdx, %rax
-; X64-AVX2-NEXT: orq %rsi, %r8
-; X64-AVX2-NEXT: movq %r8, %rdx
+; X64-AVX2-NEXT: cmoveq %rcx, %rdx
+; X64-AVX2-NEXT: cmovneq %rcx, %rdi
+; X64-AVX2-NEXT: movq %rdi, %rax
+; X64-AVX2-NEXT: movl %r8d, %ecx
+; X64-AVX2-NEXT: shldq %cl, %rdx, %rax
+; X64-AVX2-NEXT: shldq %cl, %rdi, %rsi
+; X64-AVX2-NEXT: movq %rsi, %rdx
; X64-AVX2-NEXT: retq
%f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
ret i128 %f
@@ -294,7 +137,6 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X32-SSE2-LABEL: fshl_i37:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: pushl %ebp
; X32-SSE2-NEXT: pushl %ebx
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
@@ -302,40 +144,31 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-SSE2-NEXT: shldl $27, %ebx, %edi
-; X32-SSE2-NEXT: shll $27, %ebx
-; X32-SSE2-NEXT: shrdl $1, %edi, %ebx
-; X32-SSE2-NEXT: shrl %edi
; X32-SSE2-NEXT: pushl $0
; X32-SSE2-NEXT: pushl $37
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X32-SSE2-NEXT: calll __umoddi3
; X32-SSE2-NEXT: addl $16, %esp
-; X32-SSE2-NEXT: movl %eax, %edx
-; X32-SSE2-NEXT: movl %edx, %ecx
-; X32-SSE2-NEXT: notb %cl
-; X32-SSE2-NEXT: shrdl %cl, %edi, %ebx
-; X32-SSE2-NEXT: shrl %cl, %edi
-; X32-SSE2-NEXT: xorl %eax, %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %edi, %ebx
-; X32-SSE2-NEXT: cmovnel %eax, %edi
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movl %edx, %ecx
-; X32-SSE2-NEXT: shll %cl, %eax
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-SSE2-NEXT: shldl %cl, %ebp, %esi
-; X32-SSE2-NEXT: testb $32, %dl
-; X32-SSE2-NEXT: cmovnel %eax, %esi
-; X32-SSE2-NEXT: movl $0, %ecx
-; X32-SSE2-NEXT: cmovnel %ecx, %eax
-; X32-SSE2-NEXT: orl %ebx, %eax
-; X32-SSE2-NEXT: orl %edi, %esi
+; X32-SSE2-NEXT: jne .LBB3_1
+; X32-SSE2-NEXT: # %bb.2:
+; X32-SSE2-NEXT: movl %edi, %ebx
+; X32-SSE2-NEXT: movl %esi, %edi
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-SSE2-NEXT: jmp .LBB3_3
+; X32-SSE2-NEXT: .LBB3_1:
+; X32-SSE2-NEXT: shll $27, %ebx
+; X32-SSE2-NEXT: .LBB3_3:
+; X32-SSE2-NEXT: movl %edi, %eax
+; X32-SSE2-NEXT: shldl %cl, %ebx, %eax
+; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-SSE2-NEXT: shldl %cl, %edi, %esi
; X32-SSE2-NEXT: movl %esi, %edx
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: popl %ebx
-; X32-SSE2-NEXT: popl %ebp
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i37:
@@ -468,51 +301,39 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X32-SSE2-LABEL: fshr_i37:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: pushl %ebp
; X32-SSE2-NEXT: pushl %ebx
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-SSE2-NEXT: shldl $1, %edi, %esi
-; X32-SSE2-NEXT: addl %edi, %edi
+; X32-SSE2-NEXT: shldl $27, %ebx, %esi
; X32-SSE2-NEXT: pushl $0
; X32-SSE2-NEXT: pushl $37
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X32-SSE2-NEXT: calll __umoddi3
; X32-SSE2-NEXT: addl $16, %esp
-; X32-SSE2-NEXT: movl %eax, %edx
-; X32-SSE2-NEXT: addb $27, %dl
-; X32-SSE2-NEXT: movl %edx, %eax
-; X32-SSE2-NEXT: notb %al
-; X32-SSE2-NEXT: movl %eax, %ecx
-; X32-SSE2-NEXT: shldl %cl, %edi, %esi
-; X32-SSE2-NEXT: shldl $27, %ebp, %ebx
-; X32-SSE2-NEXT: shll $27, %ebp
-; X32-SSE2-NEXT: movl %edx, %ecx
-; X32-SSE2-NEXT: shrdl %cl, %ebx, %ebp
-; X32-SSE2-NEXT: shrl %cl, %ebx
-; X32-SSE2-NEXT: xorl %ecx, %ecx
-; X32-SSE2-NEXT: testb $32, %dl
-; X32-SSE2-NEXT: cmovnel %ebx, %ebp
-; X32-SSE2-NEXT: cmovnel %ecx, %ebx
-; X32-SSE2-NEXT: xorl %edx, %edx
; X32-SSE2-NEXT: movl %eax, %ecx
-; X32-SSE2-NEXT: shll %cl, %edi
-; X32-SSE2-NEXT: testb $32, %al
-; X32-SSE2-NEXT: cmovnel %edi, %esi
-; X32-SSE2-NEXT: cmovnel %edx, %edi
-; X32-SSE2-NEXT: orl %ebp, %edi
-; X32-SSE2-NEXT: orl %ebx, %esi
-; X32-SSE2-NEXT: movl %edi, %eax
+; X32-SSE2-NEXT: addl $27, %ecx
+; X32-SSE2-NEXT: testb $32, %cl
+; X32-SSE2-NEXT: je .LBB10_1
+; X32-SSE2-NEXT: # %bb.2:
+; X32-SSE2-NEXT: movl %edi, %edx
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-SSE2-NEXT: jmp .LBB10_3
+; X32-SSE2-NEXT: .LBB10_1:
+; X32-SSE2-NEXT: shll $27, %ebx
; X32-SSE2-NEXT: movl %esi, %edx
+; X32-SSE2-NEXT: movl %ebx, %esi
+; X32-SSE2-NEXT: .LBB10_3:
+; X32-SSE2-NEXT: shrdl %cl, %edx, %esi
+; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-SSE2-NEXT: shrdl %cl, %edi, %edx
+; X32-SSE2-NEXT: movl %esi, %eax
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: popl %ebx
-; X32-SSE2-NEXT: popl %ebp
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshr_i37:
@@ -1070,9 +891,9 @@ define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
; X32-SSE2-LABEL: fshr_i64_const_overshift:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
; X32-SSE2-NEXT: shldl $23, %ecx, %edx
; X32-SSE2-NEXT: retl
More information about the llvm-commits
mailing list