[llvm] 387c157 - [NFC][Codegen] Tests with wide scalar shifts, for new potential legalization strategy
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 23 14:30:12 PST 2022
Author: Roman Lebedev
Date: 2022-12-24T00:47:25+03:00
New Revision: 387c1573f89117687f4b964ae3a90ea7c91a4f90
URL: https://github.com/llvm/llvm-project/commit/387c1573f89117687f4b964ae3a90ea7c91a4f90
DIFF: https://github.com/llvm/llvm-project/commit/387c1573f89117687f4b964ae3a90ea7c91a4f90.diff
LOG: [NFC][Codegen] Tests with wide scalar shifts, for new potential legalization strategy
Added:
llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
new file mode 100644
index 0000000000000..b909fd3229f70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
@@ -0,0 +1,380 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=ALL
+
+define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: lshr_4bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr w8, [x1]
+; ALL-NEXT: ldr w9, [x0]
+; ALL-NEXT: lsl w8, w8, #3
+; ALL-NEXT: lsr w8, w9, w8
+; ALL-NEXT: str w8, [x2]
+; ALL-NEXT: ret
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = lshr i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: shl_4bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr w8, [x1]
+; ALL-NEXT: ldr w9, [x0]
+; ALL-NEXT: lsl w8, w8, #3
+; ALL-NEXT: lsl w8, w9, w8
+; ALL-NEXT: str w8, [x2]
+; ALL-NEXT: ret
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = shl i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: ashr_4bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr w8, [x1]
+; ALL-NEXT: ldr w9, [x0]
+; ALL-NEXT: lsl w8, w8, #3
+; ALL-NEXT: asr w8, w9, w8
+; ALL-NEXT: str w8, [x2]
+; ALL-NEXT: ret
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = ashr i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: lshr_8bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x8, [x1]
+; ALL-NEXT: ldr x9, [x0]
+; ALL-NEXT: lsl x8, x8, #3
+; ALL-NEXT: lsr x8, x9, x8
+; ALL-NEXT: str x8, [x2]
+; ALL-NEXT: ret
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = lshr i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: shl_8bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x8, [x1]
+; ALL-NEXT: ldr x9, [x0]
+; ALL-NEXT: lsl x8, x8, #3
+; ALL-NEXT: lsl x8, x9, x8
+; ALL-NEXT: str x8, [x2]
+; ALL-NEXT: ret
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = shl i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: ashr_8bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x8, [x1]
+; ALL-NEXT: ldr x9, [x0]
+; ALL-NEXT: lsl x8, x8, #3
+; ALL-NEXT: asr x8, x9, x8
+; ALL-NEXT: str x8, [x2]
+; ALL-NEXT: ret
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = ashr i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: lshr_16bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x8, [x1]
+; ALL-NEXT: ldp x10, x9, [x0]
+; ALL-NEXT: lsl x8, x8, #3
+; ALL-NEXT: and x11, x8, #0x38
+; ALL-NEXT: mvn w12, w8
+; ALL-NEXT: tst x8, #0x40
+; ALL-NEXT: lsl x13, x9, #1
+; ALL-NEXT: lsr x10, x10, x11
+; ALL-NEXT: lsl x12, x13, x12
+; ALL-NEXT: lsr x9, x9, x11
+; ALL-NEXT: orr x8, x12, x10
+; ALL-NEXT: csel x10, xzr, x9, ne
+; ALL-NEXT: csel x8, x9, x8, ne
+; ALL-NEXT: stp x8, x10, [x2]
+; ALL-NEXT: ret
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = lshr i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: shl_16bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x8, [x1]
+; ALL-NEXT: ldp x9, x10, [x0]
+; ALL-NEXT: lsl x8, x8, #3
+; ALL-NEXT: and x11, x8, #0x38
+; ALL-NEXT: mvn w12, w8
+; ALL-NEXT: lsr x13, x9, #1
+; ALL-NEXT: tst x8, #0x40
+; ALL-NEXT: lsl x10, x10, x11
+; ALL-NEXT: lsr x12, x13, x12
+; ALL-NEXT: lsl x9, x9, x11
+; ALL-NEXT: orr x8, x10, x12
+; ALL-NEXT: csel x10, xzr, x9, ne
+; ALL-NEXT: csel x8, x9, x8, ne
+; ALL-NEXT: stp x10, x8, [x2]
+; ALL-NEXT: ret
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = shl i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: ashr_16bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x8, [x1]
+; ALL-NEXT: ldp x10, x9, [x0]
+; ALL-NEXT: lsl x8, x8, #3
+; ALL-NEXT: and x11, x8, #0x38
+; ALL-NEXT: mvn w12, w8
+; ALL-NEXT: tst x8, #0x40
+; ALL-NEXT: lsl x13, x9, #1
+; ALL-NEXT: asr x8, x9, #63
+; ALL-NEXT: lsr x10, x10, x11
+; ALL-NEXT: lsl x12, x13, x12
+; ALL-NEXT: asr x11, x9, x11
+; ALL-NEXT: orr x9, x12, x10
+; ALL-NEXT: csel x8, x8, x11, ne
+; ALL-NEXT: csel x9, x11, x9, ne
+; ALL-NEXT: stp x9, x8, [x2]
+; ALL-NEXT: ret
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = ashr i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: lshr_32bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x9, [x1]
+; ALL-NEXT: mov w8, #128
+; ALL-NEXT: ldp x11, x10, [x0, #8]
+; ALL-NEXT: lsl x9, x9, #3
+; ALL-NEXT: ldr x12, [x0]
+; ALL-NEXT: sub x8, x8, x9
+; ALL-NEXT: ldr x13, [x0, #24]
+; ALL-NEXT: and x17, x8, #0x38
+; ALL-NEXT: mvn w0, w8
+; ALL-NEXT: lsr x14, x10, #1
+; ALL-NEXT: and x15, x9, #0x38
+; ALL-NEXT: mvn w16, w9
+; ALL-NEXT: tst x8, #0x40
+; ALL-NEXT: lsl x3, x13, x17
+; ALL-NEXT: lsr x14, x14, x0
+; ALL-NEXT: lsl x17, x10, x17
+; ALL-NEXT: orr x14, x3, x14
+; ALL-NEXT: lsl x18, x13, #1
+; ALL-NEXT: csel x0, xzr, x17, ne
+; ALL-NEXT: csel x14, x17, x14, ne
+; ALL-NEXT: lsl x17, x11, #1
+; ALL-NEXT: lsr x8, x10, x15
+; ALL-NEXT: lsl x1, x18, x16
+; ALL-NEXT: lsr x3, x12, x15
+; ALL-NEXT: lsl x16, x17, x16
+; ALL-NEXT: orr x8, x1, x8
+; ALL-NEXT: lsr x1, x13, x15
+; ALL-NEXT: tst x9, #0x40
+; ALL-NEXT: orr x16, x16, x3
+; ALL-NEXT: lsr x15, x11, x15
+; ALL-NEXT: csel x8, x1, x8, ne
+; ALL-NEXT: csel x16, x15, x16, ne
+; ALL-NEXT: csel x15, xzr, x15, ne
+; ALL-NEXT: csel x17, xzr, x1, ne
+; ALL-NEXT: subs x1, x9, #128
+; ALL-NEXT: and x3, x1, #0x38
+; ALL-NEXT: mvn w4, w1
+; ALL-NEXT: csel x17, x17, xzr, lo
+; ALL-NEXT: tst x1, #0x40
+; ALL-NEXT: orr x16, x16, x0
+; ALL-NEXT: orr x14, x15, x14
+; ALL-NEXT: lsr x10, x10, x3
+; ALL-NEXT: lsl x18, x18, x4
+; ALL-NEXT: orr x10, x18, x10
+; ALL-NEXT: lsr x13, x13, x3
+; ALL-NEXT: csel x10, x13, x10, ne
+; ALL-NEXT: csel x13, xzr, x13, ne
+; ALL-NEXT: cmp x9, #128
+; ALL-NEXT: csel x10, x16, x10, lo
+; ALL-NEXT: csel x8, x8, xzr, lo
+; ALL-NEXT: csel x13, x14, x13, lo
+; ALL-NEXT: cmp x9, #0
+; ALL-NEXT: csel x9, x12, x10, eq
+; ALL-NEXT: csel x10, x11, x13, eq
+; ALL-NEXT: stp x8, x17, [x2, #16]
+; ALL-NEXT: stp x9, x10, [x2]
+; ALL-NEXT: ret
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = lshr i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: shl_32bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x9, [x1]
+; ALL-NEXT: mov w8, #128
+; ALL-NEXT: ldp x10, x11, [x0, #8]
+; ALL-NEXT: lsl x9, x9, #3
+; ALL-NEXT: ldr x12, [x0, #24]
+; ALL-NEXT: sub x8, x8, x9
+; ALL-NEXT: ldr x13, [x0]
+; ALL-NEXT: and x17, x8, #0x38
+; ALL-NEXT: mvn w0, w8
+; ALL-NEXT: lsl x14, x10, #1
+; ALL-NEXT: and x15, x9, #0x38
+; ALL-NEXT: mvn w16, w9
+; ALL-NEXT: tst x8, #0x40
+; ALL-NEXT: lsr x3, x13, x17
+; ALL-NEXT: lsl x14, x14, x0
+; ALL-NEXT: lsr x17, x10, x17
+; ALL-NEXT: orr x14, x14, x3
+; ALL-NEXT: lsr x18, x13, #1
+; ALL-NEXT: csel x0, xzr, x17, ne
+; ALL-NEXT: csel x14, x17, x14, ne
+; ALL-NEXT: lsr x17, x11, #1
+; ALL-NEXT: lsl x8, x10, x15
+; ALL-NEXT: lsr x1, x18, x16
+; ALL-NEXT: lsl x3, x12, x15
+; ALL-NEXT: lsr x16, x17, x16
+; ALL-NEXT: orr x8, x8, x1
+; ALL-NEXT: lsl x1, x13, x15
+; ALL-NEXT: tst x9, #0x40
+; ALL-NEXT: orr x16, x3, x16
+; ALL-NEXT: lsl x15, x11, x15
+; ALL-NEXT: csel x8, x1, x8, ne
+; ALL-NEXT: csel x16, x15, x16, ne
+; ALL-NEXT: csel x15, xzr, x15, ne
+; ALL-NEXT: csel x17, xzr, x1, ne
+; ALL-NEXT: subs x1, x9, #128
+; ALL-NEXT: and x3, x1, #0x38
+; ALL-NEXT: mvn w4, w1
+; ALL-NEXT: csel x17, x17, xzr, lo
+; ALL-NEXT: tst x1, #0x40
+; ALL-NEXT: orr x16, x16, x0
+; ALL-NEXT: orr x14, x15, x14
+; ALL-NEXT: lsl x10, x10, x3
+; ALL-NEXT: lsr x18, x18, x4
+; ALL-NEXT: orr x10, x10, x18
+; ALL-NEXT: lsl x13, x13, x3
+; ALL-NEXT: csel x10, x13, x10, ne
+; ALL-NEXT: csel x13, xzr, x13, ne
+; ALL-NEXT: cmp x9, #128
+; ALL-NEXT: csel x10, x16, x10, lo
+; ALL-NEXT: csel x8, x8, xzr, lo
+; ALL-NEXT: csel x13, x14, x13, lo
+; ALL-NEXT: cmp x9, #0
+; ALL-NEXT: csel x9, x12, x10, eq
+; ALL-NEXT: csel x10, x11, x13, eq
+; ALL-NEXT: stp x17, x8, [x2]
+; ALL-NEXT: stp x10, x9, [x2, #16]
+; ALL-NEXT: ret
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = shl i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: ashr_32bytes:
+; ALL: // %bb.0:
+; ALL-NEXT: ldr x9, [x1]
+; ALL-NEXT: mov w8, #128
+; ALL-NEXT: ldp x11, x10, [x0, #8]
+; ALL-NEXT: lsl x9, x9, #3
+; ALL-NEXT: ldr x12, [x0]
+; ALL-NEXT: sub x8, x8, x9
+; ALL-NEXT: ldr x13, [x0, #24]
+; ALL-NEXT: and x18, x8, #0x38
+; ALL-NEXT: mvn w0, w8
+; ALL-NEXT: lsr x14, x10, #1
+; ALL-NEXT: and x15, x9, #0x38
+; ALL-NEXT: mvn w16, w9
+; ALL-NEXT: lsl x17, x13, #1
+; ALL-NEXT: lsl x4, x13, x18
+; ALL-NEXT: lsr x14, x14, x0
+; ALL-NEXT: tst x8, #0x40
+; ALL-NEXT: lsl x18, x10, x18
+; ALL-NEXT: orr x14, x4, x14
+; ALL-NEXT: lsr x8, x10, x15
+; ALL-NEXT: lsl x1, x17, x16
+; ALL-NEXT: csel x0, xzr, x18, ne
+; ALL-NEXT: csel x14, x18, x14, ne
+; ALL-NEXT: lsl x18, x11, #1
+; ALL-NEXT: orr x8, x1, x8
+; ALL-NEXT: lsr x1, x12, x15
+; ALL-NEXT: lsl x16, x18, x16
+; ALL-NEXT: asr x3, x13, x15
+; ALL-NEXT: tst x9, #0x40
+; ALL-NEXT: orr x16, x16, x1
+; ALL-NEXT: lsr x15, x11, x15
+; ALL-NEXT: asr x18, x13, #63
+; ALL-NEXT: csel x8, x3, x8, ne
+; ALL-NEXT: csel x16, x15, x16, ne
+; ALL-NEXT: csel x15, xzr, x15, ne
+; ALL-NEXT: csel x1, x18, x3, ne
+; ALL-NEXT: subs x3, x9, #128
+; ALL-NEXT: orr x16, x16, x0
+; ALL-NEXT: and x4, x3, #0x38
+; ALL-NEXT: mvn w5, w3
+; ALL-NEXT: orr x14, x15, x14
+; ALL-NEXT: lsr x10, x10, x4
+; ALL-NEXT: lsl x17, x17, x5
+; ALL-NEXT: orr x10, x17, x10
+; ALL-NEXT: csel x17, x1, x18, lo
+; ALL-NEXT: asr x13, x13, x4
+; ALL-NEXT: tst x3, #0x40
+; ALL-NEXT: csel x10, x13, x10, ne
+; ALL-NEXT: csel x13, x18, x13, ne
+; ALL-NEXT: cmp x9, #128
+; ALL-NEXT: csel x10, x16, x10, lo
+; ALL-NEXT: csel x8, x8, x18, lo
+; ALL-NEXT: csel x13, x14, x13, lo
+; ALL-NEXT: cmp x9, #0
+; ALL-NEXT: csel x9, x12, x10, eq
+; ALL-NEXT: csel x10, x11, x13, eq
+; ALL-NEXT: stp x8, x17, [x2, #16]
+; ALL-NEXT: stp x9, x10, [x2]
+; ALL-NEXT: ret
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = ashr i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
new file mode 100644
index 0000000000000..c6dfa12db0405
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll
@@ -0,0 +1,2385 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,LE,LE-64BIT
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BE
+; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=ALL,LE,LE-32BIT
+
+define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: lshr_4bytes:
+; ALL: # %bb.0:
+; ALL-NEXT: lwz 4, 0(4)
+; ALL-NEXT: lwz 3, 0(3)
+; ALL-NEXT: slwi 4, 4, 3
+; ALL-NEXT: srw 3, 3, 4
+; ALL-NEXT: stw 3, 0(5)
+; ALL-NEXT: blr
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = lshr i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: shl_4bytes:
+; ALL: # %bb.0:
+; ALL-NEXT: lwz 4, 0(4)
+; ALL-NEXT: lwz 3, 0(3)
+; ALL-NEXT: slwi 4, 4, 3
+; ALL-NEXT: slw 3, 3, 4
+; ALL-NEXT: stw 3, 0(5)
+; ALL-NEXT: blr
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = shl i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; ALL-LABEL: ashr_4bytes:
+; ALL: # %bb.0:
+; ALL-NEXT: lwz 4, 0(4)
+; ALL-NEXT: lwz 3, 0(3)
+; ALL-NEXT: slwi 4, 4, 3
+; ALL-NEXT: sraw 3, 3, 4
+; ALL-NEXT: stw 3, 0(5)
+; ALL-NEXT: blr
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = ashr i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: lshr_8bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 3, 0(3)
+; LE-64BIT-NEXT: slwi 4, 4, 3
+; LE-64BIT-NEXT: srd 3, 3, 4
+; LE-64BIT-NEXT: std 3, 0(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: lshr_8bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 4(4)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: slwi 4, 4, 3
+; BE-NEXT: srd 3, 3, 4
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: lshr_8bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: lwz 4, 4(4)
+; LE-32BIT-NEXT: lwz 6, 4(3)
+; LE-32BIT-NEXT: lwz 3, 0(3)
+; LE-32BIT-NEXT: slwi 4, 4, 3
+; LE-32BIT-NEXT: subfic 7, 4, 32
+; LE-32BIT-NEXT: srw 6, 6, 4
+; LE-32BIT-NEXT: addi 8, 4, -32
+; LE-32BIT-NEXT: slw 7, 3, 7
+; LE-32BIT-NEXT: srw 4, 3, 4
+; LE-32BIT-NEXT: srw 3, 3, 8
+; LE-32BIT-NEXT: or 6, 6, 7
+; LE-32BIT-NEXT: or 3, 6, 3
+; LE-32BIT-NEXT: stw 4, 0(5)
+; LE-32BIT-NEXT: stw 3, 4(5)
+; LE-32BIT-NEXT: blr
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = lshr i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: shl_8bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 3, 0(3)
+; LE-64BIT-NEXT: slwi 4, 4, 3
+; LE-64BIT-NEXT: sld 3, 3, 4
+; LE-64BIT-NEXT: std 3, 0(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: shl_8bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 4(4)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: slwi 4, 4, 3
+; BE-NEXT: sld 3, 3, 4
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: shl_8bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: lwz 4, 4(4)
+; LE-32BIT-NEXT: lwz 6, 0(3)
+; LE-32BIT-NEXT: lwz 3, 4(3)
+; LE-32BIT-NEXT: slwi 4, 4, 3
+; LE-32BIT-NEXT: subfic 7, 4, 32
+; LE-32BIT-NEXT: slw 6, 6, 4
+; LE-32BIT-NEXT: addi 8, 4, -32
+; LE-32BIT-NEXT: srw 7, 3, 7
+; LE-32BIT-NEXT: slw 4, 3, 4
+; LE-32BIT-NEXT: slw 3, 3, 8
+; LE-32BIT-NEXT: or 6, 6, 7
+; LE-32BIT-NEXT: or 3, 6, 3
+; LE-32BIT-NEXT: stw 4, 4(5)
+; LE-32BIT-NEXT: stw 3, 0(5)
+; LE-32BIT-NEXT: blr
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = shl i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: ashr_8bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 3, 0(3)
+; LE-64BIT-NEXT: slwi 4, 4, 3
+; LE-64BIT-NEXT: srad 3, 3, 4
+; LE-64BIT-NEXT: std 3, 0(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: ashr_8bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 4(4)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: slwi 4, 4, 3
+; BE-NEXT: srad 3, 3, 4
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: ashr_8bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: lwz 4, 4(4)
+; LE-32BIT-NEXT: lwz 6, 4(3)
+; LE-32BIT-NEXT: lwz 3, 0(3)
+; LE-32BIT-NEXT: slwi 4, 4, 3
+; LE-32BIT-NEXT: subfic 7, 4, 32
+; LE-32BIT-NEXT: srw 6, 6, 4
+; LE-32BIT-NEXT: addi 8, 4, -32
+; LE-32BIT-NEXT: slw 7, 3, 7
+; LE-32BIT-NEXT: sraw 4, 3, 4
+; LE-32BIT-NEXT: sraw 3, 3, 8
+; LE-32BIT-NEXT: cmpwi 8, 1
+; LE-32BIT-NEXT: or 6, 6, 7
+; LE-32BIT-NEXT: bc 12, 0, .LBB5_1
+; LE-32BIT-NEXT: b .LBB5_2
+; LE-32BIT-NEXT: .LBB5_1:
+; LE-32BIT-NEXT: addi 3, 6, 0
+; LE-32BIT-NEXT: .LBB5_2:
+; LE-32BIT-NEXT: stw 4, 0(5)
+; LE-32BIT-NEXT: stw 3, 4(5)
+; LE-32BIT-NEXT: blr
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = ashr i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: lshr_16bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 6, 0(3)
+; LE-64BIT-NEXT: ld 3, 8(3)
+; LE-64BIT-NEXT: slwi 4, 4, 3
+; LE-64BIT-NEXT: subfic 7, 4, 64
+; LE-64BIT-NEXT: srd 6, 6, 4
+; LE-64BIT-NEXT: addi 8, 4, -64
+; LE-64BIT-NEXT: sld 7, 3, 7
+; LE-64BIT-NEXT: or 6, 6, 7
+; LE-64BIT-NEXT: srd 7, 3, 8
+; LE-64BIT-NEXT: or 6, 6, 7
+; LE-64BIT-NEXT: srd 3, 3, 4
+; LE-64BIT-NEXT: std 3, 8(5)
+; LE-64BIT-NEXT: std 6, 0(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: lshr_16bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 12(4)
+; BE-NEXT: ld 6, 0(3)
+; BE-NEXT: ld 3, 8(3)
+; BE-NEXT: slwi 4, 4, 3
+; BE-NEXT: subfic 7, 4, 64
+; BE-NEXT: srd 3, 3, 4
+; BE-NEXT: sld 7, 6, 7
+; BE-NEXT: addi 8, 4, -64
+; BE-NEXT: or 3, 3, 7
+; BE-NEXT: srd 7, 6, 8
+; BE-NEXT: srd 4, 6, 4
+; BE-NEXT: or 3, 3, 7
+; BE-NEXT: std 4, 0(5)
+; BE-NEXT: std 3, 8(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: lshr_16bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: stwu 1, -32(1)
+; LE-32BIT-NEXT: lwz 4, 12(4)
+; LE-32BIT-NEXT: li 8, 0
+; LE-32BIT-NEXT: lwz 6, 8(3)
+; LE-32BIT-NEXT: lwz 7, 12(3)
+; LE-32BIT-NEXT: rlwinm. 4, 4, 3, 0, 28
+; LE-32BIT-NEXT: lwz 9, 4(3)
+; LE-32BIT-NEXT: subfic 10, 4, 96
+; LE-32BIT-NEXT: lwz 3, 0(3)
+; LE-32BIT-NEXT: addi 11, 4, -64
+; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: cmplwi 1, 4, 64
+; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 28, 3, 4
+; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 30, 4, 32
+; LE-32BIT-NEXT: slw 10, 3, 10
+; LE-32BIT-NEXT: srw 27, 9, 11
+; LE-32BIT-NEXT: addi 12, 4, -96
+; LE-32BIT-NEXT: srw 0, 7, 4
+; LE-32BIT-NEXT: or 10, 27, 10
+; LE-32BIT-NEXT: slw 27, 6, 30
+; LE-32BIT-NEXT: bc 12, 4, .LBB6_2
+; LE-32BIT-NEXT: # %bb.1:
+; LE-32BIT-NEXT: ori 28, 8, 0
+; LE-32BIT-NEXT: b .LBB6_2
+; LE-32BIT-NEXT: .LBB6_2:
+; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 29, 9, 4
+; LE-32BIT-NEXT: or 0, 0, 27
+; LE-32BIT-NEXT: slw 27, 3, 30
+; LE-32BIT-NEXT: stw 28, 0(5)
+; LE-32BIT-NEXT: subfic 28, 4, 64
+; LE-32BIT-NEXT: srw 12, 3, 12
+; LE-32BIT-NEXT: or 29, 29, 27
+; LE-32BIT-NEXT: addi 27, 4, -32
+; LE-32BIT-NEXT: or 10, 10, 12
+; LE-32BIT-NEXT: subfic 12, 28, 32
+; LE-32BIT-NEXT: slw 30, 9, 30
+; LE-32BIT-NEXT: srw 12, 9, 12
+; LE-32BIT-NEXT: slw 9, 9, 28
+; LE-32BIT-NEXT: slw 28, 3, 28
+; LE-32BIT-NEXT: srw 11, 3, 11
+; LE-32BIT-NEXT: srw 3, 3, 27
+; LE-32BIT-NEXT: srw 27, 6, 27
+; LE-32BIT-NEXT: or 0, 0, 27
+; LE-32BIT-NEXT: or 12, 28, 12
+; LE-32BIT-NEXT: srw 4, 6, 4
+; LE-32BIT-NEXT: or 3, 29, 3
+; LE-32BIT-NEXT: or 9, 0, 9
+; LE-32BIT-NEXT: or 12, 12, 30
+; LE-32BIT-NEXT: bc 12, 4, .LBB6_4
+; LE-32BIT-NEXT: # %bb.3:
+; LE-32BIT-NEXT: ori 3, 8, 0
+; LE-32BIT-NEXT: ori 8, 10, 0
+; LE-32BIT-NEXT: b .LBB6_5
+; LE-32BIT-NEXT: .LBB6_4:
+; LE-32BIT-NEXT: addi 8, 9, 0
+; LE-32BIT-NEXT: .LBB6_5:
+; LE-32BIT-NEXT: or 4, 4, 12
+; LE-32BIT-NEXT: stw 3, 4(5)
+; LE-32BIT-NEXT: bc 12, 2, .LBB6_7
+; LE-32BIT-NEXT: # %bb.6:
+; LE-32BIT-NEXT: ori 3, 8, 0
+; LE-32BIT-NEXT: b .LBB6_8
+; LE-32BIT-NEXT: .LBB6_7:
+; LE-32BIT-NEXT: addi 3, 7, 0
+; LE-32BIT-NEXT: .LBB6_8:
+; LE-32BIT-NEXT: bc 12, 4, .LBB6_10
+; LE-32BIT-NEXT: # %bb.9:
+; LE-32BIT-NEXT: ori 4, 11, 0
+; LE-32BIT-NEXT: b .LBB6_10
+; LE-32BIT-NEXT: .LBB6_10:
+; LE-32BIT-NEXT: stw 3, 12(5)
+; LE-32BIT-NEXT: bc 12, 2, .LBB6_12
+; LE-32BIT-NEXT: # %bb.11:
+; LE-32BIT-NEXT: ori 3, 4, 0
+; LE-32BIT-NEXT: b .LBB6_13
+; LE-32BIT-NEXT: .LBB6_12:
+; LE-32BIT-NEXT: addi 3, 6, 0
+; LE-32BIT-NEXT: .LBB6_13:
+; LE-32BIT-NEXT: stw 3, 8(5)
+; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: addi 1, 1, 32
+; LE-32BIT-NEXT: blr
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = lshr i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: shl_16bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 6, 8(3)
+; LE-64BIT-NEXT: ld 3, 0(3)
+; LE-64BIT-NEXT: slwi 4, 4, 3
+; LE-64BIT-NEXT: subfic 7, 4, 64
+; LE-64BIT-NEXT: sld 6, 6, 4
+; LE-64BIT-NEXT: addi 8, 4, -64
+; LE-64BIT-NEXT: srd 7, 3, 7
+; LE-64BIT-NEXT: or 6, 6, 7
+; LE-64BIT-NEXT: sld 7, 3, 8
+; LE-64BIT-NEXT: or 6, 6, 7
+; LE-64BIT-NEXT: sld 3, 3, 4
+; LE-64BIT-NEXT: std 3, 0(5)
+; LE-64BIT-NEXT: std 6, 8(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: shl_16bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 12(4)
+; BE-NEXT: ld 6, 8(3)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: slwi 4, 4, 3
+; BE-NEXT: subfic 7, 4, 64
+; BE-NEXT: sld 3, 3, 4
+; BE-NEXT: srd 7, 6, 7
+; BE-NEXT: addi 8, 4, -64
+; BE-NEXT: or 3, 3, 7
+; BE-NEXT: sld 7, 6, 8
+; BE-NEXT: sld 4, 6, 4
+; BE-NEXT: or 3, 3, 7
+; BE-NEXT: std 4, 8(5)
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: shl_16bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: stwu 1, -32(1)
+; LE-32BIT-NEXT: lwz 4, 12(4)
+; LE-32BIT-NEXT: li 8, 0
+; LE-32BIT-NEXT: lwz 6, 4(3)
+; LE-32BIT-NEXT: lwz 7, 0(3)
+; LE-32BIT-NEXT: rlwinm. 4, 4, 3, 0, 28
+; LE-32BIT-NEXT: lwz 9, 8(3)
+; LE-32BIT-NEXT: subfic 10, 4, 96
+; LE-32BIT-NEXT: lwz 3, 12(3)
+; LE-32BIT-NEXT: addi 11, 4, -64
+; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: cmplwi 1, 4, 64
+; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: slw 28, 3, 4
+; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 30, 4, 32
+; LE-32BIT-NEXT: srw 10, 3, 10
+; LE-32BIT-NEXT: slw 27, 9, 11
+; LE-32BIT-NEXT: addi 12, 4, -96
+; LE-32BIT-NEXT: slw 0, 7, 4
+; LE-32BIT-NEXT: or 10, 27, 10
+; LE-32BIT-NEXT: srw 27, 6, 30
+; LE-32BIT-NEXT: bc 12, 4, .LBB7_2
+; LE-32BIT-NEXT: # %bb.1:
+; LE-32BIT-NEXT: ori 28, 8, 0
+; LE-32BIT-NEXT: b .LBB7_2
+; LE-32BIT-NEXT: .LBB7_2:
+; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: slw 29, 9, 4
+; LE-32BIT-NEXT: or 0, 0, 27
+; LE-32BIT-NEXT: srw 27, 3, 30
+; LE-32BIT-NEXT: stw 28, 12(5)
+; LE-32BIT-NEXT: subfic 28, 4, 64
+; LE-32BIT-NEXT: slw 12, 3, 12
+; LE-32BIT-NEXT: or 29, 29, 27
+; LE-32BIT-NEXT: addi 27, 4, -32
+; LE-32BIT-NEXT: or 10, 10, 12
+; LE-32BIT-NEXT: subfic 12, 28, 32
+; LE-32BIT-NEXT: srw 30, 9, 30
+; LE-32BIT-NEXT: slw 12, 9, 12
+; LE-32BIT-NEXT: srw 9, 9, 28
+; LE-32BIT-NEXT: srw 28, 3, 28
+; LE-32BIT-NEXT: slw 11, 3, 11
+; LE-32BIT-NEXT: slw 3, 3, 27
+; LE-32BIT-NEXT: slw 27, 6, 27
+; LE-32BIT-NEXT: or 0, 0, 27
+; LE-32BIT-NEXT: or 12, 28, 12
+; LE-32BIT-NEXT: slw 4, 6, 4
+; LE-32BIT-NEXT: or 3, 29, 3
+; LE-32BIT-NEXT: or 9, 0, 9
+; LE-32BIT-NEXT: or 12, 12, 30
+; LE-32BIT-NEXT: bc 12, 4, .LBB7_4
+; LE-32BIT-NEXT: # %bb.3:
+; LE-32BIT-NEXT: ori 3, 8, 0
+; LE-32BIT-NEXT: ori 8, 10, 0
+; LE-32BIT-NEXT: b .LBB7_5
+; LE-32BIT-NEXT: .LBB7_4:
+; LE-32BIT-NEXT: addi 8, 9, 0
+; LE-32BIT-NEXT: .LBB7_5:
+; LE-32BIT-NEXT: or 4, 4, 12
+; LE-32BIT-NEXT: stw 3, 8(5)
+; LE-32BIT-NEXT: bc 12, 2, .LBB7_7
+; LE-32BIT-NEXT: # %bb.6:
+; LE-32BIT-NEXT: ori 3, 8, 0
+; LE-32BIT-NEXT: b .LBB7_8
+; LE-32BIT-NEXT: .LBB7_7:
+; LE-32BIT-NEXT: addi 3, 7, 0
+; LE-32BIT-NEXT: .LBB7_8:
+; LE-32BIT-NEXT: bc 12, 4, .LBB7_10
+; LE-32BIT-NEXT: # %bb.9:
+; LE-32BIT-NEXT: ori 4, 11, 0
+; LE-32BIT-NEXT: b .LBB7_10
+; LE-32BIT-NEXT: .LBB7_10:
+; LE-32BIT-NEXT: stw 3, 0(5)
+; LE-32BIT-NEXT: bc 12, 2, .LBB7_12
+; LE-32BIT-NEXT: # %bb.11:
+; LE-32BIT-NEXT: ori 3, 4, 0
+; LE-32BIT-NEXT: b .LBB7_13
+; LE-32BIT-NEXT: .LBB7_12:
+; LE-32BIT-NEXT: addi 3, 6, 0
+; LE-32BIT-NEXT: .LBB7_13:
+; LE-32BIT-NEXT: stw 3, 4(5)
+; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: addi 1, 1, 32
+; LE-32BIT-NEXT: blr
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = shl i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: ashr_16bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 6, 0(3)
+; LE-64BIT-NEXT: ld 3, 8(3)
+; LE-64BIT-NEXT: slwi 4, 4, 3
+; LE-64BIT-NEXT: subfic 7, 4, 64
+; LE-64BIT-NEXT: srd 6, 6, 4
+; LE-64BIT-NEXT: addi 8, 4, -64
+; LE-64BIT-NEXT: sld 7, 3, 7
+; LE-64BIT-NEXT: cmpwi 8, 1
+; LE-64BIT-NEXT: or 6, 6, 7
+; LE-64BIT-NEXT: srad 7, 3, 8
+; LE-64BIT-NEXT: isellt 6, 6, 7
+; LE-64BIT-NEXT: srad 3, 3, 4
+; LE-64BIT-NEXT: std 3, 8(5)
+; LE-64BIT-NEXT: std 6, 0(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: ashr_16bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 12(4)
+; BE-NEXT: ld 6, 8(3)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: slwi 4, 4, 3
+; BE-NEXT: subfic 7, 4, 64
+; BE-NEXT: srd 6, 6, 4
+; BE-NEXT: addi 8, 4, -64
+; BE-NEXT: sld 7, 3, 7
+; BE-NEXT: cmpwi 8, 1
+; BE-NEXT: or 6, 6, 7
+; BE-NEXT: srad 7, 3, 8
+; BE-NEXT: srad 3, 3, 4
+; BE-NEXT: bc 12, 0, .LBB8_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: ori 6, 7, 0
+; BE-NEXT: b .LBB8_2
+; BE-NEXT: .LBB8_2:
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: std 6, 8(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: ashr_16bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: stwu 1, -32(1)
+; LE-32BIT-NEXT: lwz 4, 12(4)
+; LE-32BIT-NEXT: lwz 8, 0(3)
+; LE-32BIT-NEXT: lwz 9, 4(3)
+; LE-32BIT-NEXT: lwz 6, 8(3)
+; LE-32BIT-NEXT: lwz 7, 12(3)
+; LE-32BIT-NEXT: rlwinm. 3, 4, 3, 0, 28
+; LE-32BIT-NEXT: subfic 10, 3, 96
+; LE-32BIT-NEXT: addi 11, 3, -64
+; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: addi 12, 3, -96
+; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 30, 3, 32
+; LE-32BIT-NEXT: slw 10, 8, 10
+; LE-32BIT-NEXT: srw 27, 9, 11
+; LE-32BIT-NEXT: stw 26, 8(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: sraw 26, 8, 12
+; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 28, 9, 3
+; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: addi 29, 3, -32
+; LE-32BIT-NEXT: cmpwi 1, 12, 1
+; LE-32BIT-NEXT: slw 12, 8, 30
+; LE-32BIT-NEXT: or 10, 27, 10
+; LE-32BIT-NEXT: srw 0, 7, 3
+; LE-32BIT-NEXT: sraw 27, 8, 29
+; LE-32BIT-NEXT: bc 12, 4, .LBB8_2
+; LE-32BIT-NEXT: # %bb.1:
+; LE-32BIT-NEXT: ori 10, 26, 0
+; LE-32BIT-NEXT: b .LBB8_2
+; LE-32BIT-NEXT: .LBB8_2:
+; LE-32BIT-NEXT: cmpwi 1, 29, 1
+; LE-32BIT-NEXT: or 12, 28, 12
+; LE-32BIT-NEXT: subfic 28, 3, 64
+; LE-32BIT-NEXT: slw 26, 6, 30
+; LE-32BIT-NEXT: srawi 4, 8, 31
+; LE-32BIT-NEXT: bc 12, 4, .LBB8_4
+; LE-32BIT-NEXT: # %bb.3:
+; LE-32BIT-NEXT: ori 12, 27, 0
+; LE-32BIT-NEXT: b .LBB8_4
+; LE-32BIT-NEXT: .LBB8_4:
+; LE-32BIT-NEXT: sraw 27, 8, 3
+; LE-32BIT-NEXT: or 0, 0, 26
+; LE-32BIT-NEXT: slw 26, 9, 28
+; LE-32BIT-NEXT: sraw 11, 8, 11
+; LE-32BIT-NEXT: slw 8, 8, 28
+; LE-32BIT-NEXT: subfic 28, 28, 32
+; LE-32BIT-NEXT: slw 30, 9, 30
+; LE-32BIT-NEXT: srw 9, 9, 28
+; LE-32BIT-NEXT: srw 29, 6, 29
+; LE-32BIT-NEXT: or 8, 8, 9
+; LE-32BIT-NEXT: cmplwi 1, 3, 64
+; LE-32BIT-NEXT: or 0, 0, 29
+; LE-32BIT-NEXT: srw 3, 6, 3
+; LE-32BIT-NEXT: or 8, 8, 30
+; LE-32BIT-NEXT: or 9, 0, 26
+; LE-32BIT-NEXT: or 3, 3, 8
+; LE-32BIT-NEXT: bc 12, 4, .LBB8_6
+; LE-32BIT-NEXT: # %bb.5:
+; LE-32BIT-NEXT: ori 28, 4, 0
+; LE-32BIT-NEXT: ori 9, 10, 0
+; LE-32BIT-NEXT: ori 3, 11, 0
+; LE-32BIT-NEXT: b .LBB8_7
+; LE-32BIT-NEXT: .LBB8_6:
+; LE-32BIT-NEXT: addi 28, 27, 0
+; LE-32BIT-NEXT: addi 4, 12, 0
+; LE-32BIT-NEXT: .LBB8_7:
+; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 2, .LBB8_8
+; LE-32BIT-NEXT: b .LBB8_9
+; LE-32BIT-NEXT: .LBB8_8:
+; LE-32BIT-NEXT: addi 3, 6, 0
+; LE-32BIT-NEXT: .LBB8_9:
+; LE-32BIT-NEXT: stw 4, 4(5)
+; LE-32BIT-NEXT: bc 12, 2, .LBB8_11
+; LE-32BIT-NEXT: # %bb.10:
+; LE-32BIT-NEXT: ori 4, 9, 0
+; LE-32BIT-NEXT: b .LBB8_12
+; LE-32BIT-NEXT: .LBB8_11:
+; LE-32BIT-NEXT: addi 4, 7, 0
+; LE-32BIT-NEXT: .LBB8_12:
+; LE-32BIT-NEXT: stw 28, 0(5)
+; LE-32BIT-NEXT: stw 4, 12(5)
+; LE-32BIT-NEXT: stw 3, 8(5)
+; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 26, 8(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: addi 1, 1, 32
+; LE-32BIT-NEXT: blr
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = ashr i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: lshr_32bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 7, 0(3)
+; LE-64BIT-NEXT: ld 8, 8(3)
+; LE-64BIT-NEXT: ld 9, 16(3)
+; LE-64BIT-NEXT: li 6, 0
+; LE-64BIT-NEXT: ld 3, 24(3)
+; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28
+; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: subfic 28, 4, 64
+; LE-64BIT-NEXT: subfic 11, 4, 192
+; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: addi 0, 4, -128
+; LE-64BIT-NEXT: srd 29, 9, 4
+; LE-64BIT-NEXT: addi 27, 4, -64
+; LE-64BIT-NEXT: subfic 25, 4, 128
+; LE-64BIT-NEXT: sld 24, 8, 28
+; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: sld 21, 9, 28
+; LE-64BIT-NEXT: sld 28, 3, 28
+; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: srd 10, 7, 4
+; LE-64BIT-NEXT: addi 30, 4, -192
+; LE-64BIT-NEXT: subfic 22, 25, 64
+; LE-64BIT-NEXT: sld 11, 3, 11
+; LE-64BIT-NEXT: srd 26, 9, 0
+; LE-64BIT-NEXT: or 29, 29, 28
+; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: or 10, 10, 24
+; LE-64BIT-NEXT: srd 28, 3, 27
+; LE-64BIT-NEXT: srd 30, 3, 30
+; LE-64BIT-NEXT: or 11, 26, 11
+; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: srd 23, 8, 27
+; LE-64BIT-NEXT: srd 27, 9, 22
+; LE-64BIT-NEXT: or 29, 29, 28
+; LE-64BIT-NEXT: or 11, 11, 30
+; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: sld 28, 3, 25
+; LE-64BIT-NEXT: or 10, 10, 23
+; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: sld 9, 9, 25
+; LE-64BIT-NEXT: or 30, 28, 27
+; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: cmplwi 1, 4, 128
+; LE-64BIT-NEXT: srd 12, 8, 4
+; LE-64BIT-NEXT: or 9, 10, 9
+; LE-64BIT-NEXT: or 30, 30, 21
+; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: srd 10, 3, 0
+; LE-64BIT-NEXT: isel 9, 9, 11, 4
+; LE-64BIT-NEXT: or 11, 12, 30
+; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: iseleq 7, 7, 9
+; LE-64BIT-NEXT: srd 3, 3, 4
+; LE-64BIT-NEXT: isel 9, 11, 10, 4
+; LE-64BIT-NEXT: std 7, 0(5)
+; LE-64BIT-NEXT: isel 0, 29, 6, 4
+; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: iseleq 4, 8, 9
+; LE-64BIT-NEXT: std 0, 16(5)
+; LE-64BIT-NEXT: isel 3, 3, 6, 4
+; LE-64BIT-NEXT: std 4, 8(5)
+; LE-64BIT-NEXT: std 3, 24(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: lshr_32bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 28(4)
+; BE-NEXT: ld 7, 16(3)
+; BE-NEXT: ld 8, 24(3)
+; BE-NEXT: ld 9, 8(3)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; BE-NEXT: li 6, 0
+; BE-NEXT: rlwinm. 4, 4, 3, 0, 28
+; BE-NEXT: subfic 10, 4, 192
+; BE-NEXT: addi 11, 4, -128
+; BE-NEXT: addi 12, 4, -192
+; BE-NEXT: subfic 30, 4, 64
+; BE-NEXT: sld 10, 3, 10
+; BE-NEXT: srd 27, 9, 11
+; BE-NEXT: srd 0, 8, 4
+; BE-NEXT: addi 29, 4, -64
+; BE-NEXT: subfic 28, 4, 128
+; BE-NEXT: srd 12, 3, 12
+; BE-NEXT: or 10, 27, 10
+; BE-NEXT: sld 27, 7, 30
+; BE-NEXT: or 10, 10, 12
+; BE-NEXT: or 0, 0, 27
+; BE-NEXT: srd 27, 7, 29
+; BE-NEXT: subfic 12, 28, 64
+; BE-NEXT: or 0, 0, 27
+; BE-NEXT: sld 27, 3, 28
+; BE-NEXT: srd 12, 9, 12
+; BE-NEXT: sld 28, 9, 28
+; BE-NEXT: cmplwi 1, 4, 128
+; BE-NEXT: or 12, 27, 12
+; BE-NEXT: or 28, 0, 28
+; BE-NEXT: sld 0, 9, 30
+; BE-NEXT: srd 9, 9, 4
+; BE-NEXT: srd 11, 3, 11
+; BE-NEXT: bc 12, 4, .LBB9_1
+; BE-NEXT: b .LBB9_2
+; BE-NEXT: .LBB9_1:
+; BE-NEXT: addi 10, 28, 0
+; BE-NEXT: .LBB9_2:
+; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; BE-NEXT: or 12, 12, 0
+; BE-NEXT: srd 0, 7, 4
+; BE-NEXT: or 12, 0, 12
+; BE-NEXT: sld 0, 3, 30
+; BE-NEXT: srd 30, 3, 29
+; BE-NEXT: bc 12, 4, .LBB9_3
+; BE-NEXT: b .LBB9_4
+; BE-NEXT: .LBB9_3:
+; BE-NEXT: addi 11, 12, 0
+; BE-NEXT: .LBB9_4:
+; BE-NEXT: srd 3, 3, 4
+; BE-NEXT: bc 12, 2, .LBB9_6
+; BE-NEXT: # %bb.5:
+; BE-NEXT: ori 4, 10, 0
+; BE-NEXT: b .LBB9_7
+; BE-NEXT: .LBB9_6:
+; BE-NEXT: addi 4, 8, 0
+; BE-NEXT: .LBB9_7:
+; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; BE-NEXT: or 9, 9, 0
+; BE-NEXT: or 9, 9, 30
+; BE-NEXT: bc 12, 2, .LBB9_9
+; BE-NEXT: # %bb.8:
+; BE-NEXT: ori 7, 11, 0
+; BE-NEXT: b .LBB9_9
+; BE-NEXT: .LBB9_9:
+; BE-NEXT: bc 12, 4, .LBB9_11
+; BE-NEXT: # %bb.10:
+; BE-NEXT: ori 8, 6, 0
+; BE-NEXT: ori 3, 6, 0
+; BE-NEXT: b .LBB9_12
+; BE-NEXT: .LBB9_11:
+; BE-NEXT: addi 8, 9, 0
+; BE-NEXT: .LBB9_12:
+; BE-NEXT: std 4, 24(5)
+; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: std 8, 8(5)
+; BE-NEXT: std 7, 16(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: lshr_32bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: stwu 1, -144(1)
+; LE-32BIT-NEXT: mfcr 12
+; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 12, 68(1)
+; LE-32BIT-NEXT: lwz 0, 28(4)
+; LE-32BIT-NEXT: lwz 11, 4(3)
+; LE-32BIT-NEXT: lwz 6, 0(3)
+; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28
+; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 21, 30, 224
+; LE-32BIT-NEXT: lwz 5, 24(3)
+; LE-32BIT-NEXT: subfic 4, 30, 160
+; LE-32BIT-NEXT: lwz 7, 28(3)
+; LE-32BIT-NEXT: addi 0, 30, -128
+; LE-32BIT-NEXT: lwz 10, 20(3)
+; LE-32BIT-NEXT: subfic 28, 30, 96
+; LE-32BIT-NEXT: lwz 8, 16(3)
+; LE-32BIT-NEXT: addi 29, 30, -64
+; LE-32BIT-NEXT: lwz 27, 12(3)
+; LE-32BIT-NEXT: subfic 12, 30, 32
+; LE-32BIT-NEXT: lwz 9, 8(3)
+; LE-32BIT-NEXT: addi 3, 30, -192
+; LE-32BIT-NEXT: slw 21, 6, 21
+; LE-32BIT-NEXT: srw 16, 11, 3
+; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 20, 7, 30
+; LE-32BIT-NEXT: slw 15, 9, 4
+; LE-32BIT-NEXT: srw 14, 27, 0
+; LE-32BIT-NEXT: slw 31, 8, 28
+; LE-32BIT-NEXT: srw 3, 10, 29
+; LE-32BIT-NEXT: or 21, 16, 21
+; LE-32BIT-NEXT: slw 16, 5, 12
+; LE-32BIT-NEXT: srw 19, 10, 30
+; LE-32BIT-NEXT: or 15, 14, 15
+; LE-32BIT-NEXT: slw 14, 8, 12
+; LE-32BIT-NEXT: or 3, 3, 31
+; LE-32BIT-NEXT: slw 31, 6, 4
+; LE-32BIT-NEXT: or 20, 20, 16
+; LE-32BIT-NEXT: srw 16, 11, 0
+; LE-32BIT-NEXT: stw 7, 60(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: addi 26, 30, -224
+; LE-32BIT-NEXT: mr 7, 10
+; LE-32BIT-NEXT: mr 10, 12
+; LE-32BIT-NEXT: or 19, 19, 14
+; LE-32BIT-NEXT: slw 14, 6, 28
+; LE-32BIT-NEXT: or 16, 16, 31
+; LE-32BIT-NEXT: srw 31, 11, 29
+; LE-32BIT-NEXT: addi 23, 30, -160
+; LE-32BIT-NEXT: srw 18, 27, 30
+; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: mr 12, 28
+; LE-32BIT-NEXT: or 14, 31, 14
+; LE-32BIT-NEXT: mr 28, 9
+; LE-32BIT-NEXT: slw 31, 9, 10
+; LE-32BIT-NEXT: srw 0, 6, 26
+; LE-32BIT-NEXT: addi 25, 30, -96
+; LE-32BIT-NEXT: srw 17, 11, 30
+; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 18, 18, 31
+; LE-32BIT-NEXT: slw 31, 6, 10
+; LE-32BIT-NEXT: or 4, 21, 0
+; LE-32BIT-NEXT: srw 0, 28, 23
+; LE-32BIT-NEXT: or 17, 17, 31
+; LE-32BIT-NEXT: addi 31, 30, -32
+; LE-32BIT-NEXT: or 0, 15, 0
+; LE-32BIT-NEXT: srw 15, 8, 25
+; LE-32BIT-NEXT: or 3, 3, 15
+; LE-32BIT-NEXT: srw 15, 5, 31
+; LE-32BIT-NEXT: or 20, 20, 15
+; LE-32BIT-NEXT: srw 15, 8, 31
+; LE-32BIT-NEXT: stw 3, 24(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 3, 19, 15
+; LE-32BIT-NEXT: srw 23, 6, 23
+; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 15, 30, 64
+; LE-32BIT-NEXT: or 3, 16, 23
+; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 3, 15, 32
+; LE-32BIT-NEXT: slw 16, 28, 15
+; LE-32BIT-NEXT: srw 22, 27, 3
+; LE-32BIT-NEXT: stw 4, 32(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 4, 16, 22
+; LE-32BIT-NEXT: subfic 16, 30, 128
+; LE-32BIT-NEXT: stw 5, 28(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 5, 16, 32
+; LE-32BIT-NEXT: stw 4, 20(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: slw 4, 6, 16
+; LE-32BIT-NEXT: srw 24, 11, 5
+; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: mr 29, 27
+; LE-32BIT-NEXT: or 22, 4, 24
+; LE-32BIT-NEXT: slw 24, 28, 16
+; LE-32BIT-NEXT: srw 27, 27, 5
+; LE-32BIT-NEXT: or 27, 24, 27
+; LE-32BIT-NEXT: slw 24, 8, 15
+; LE-32BIT-NEXT: srw 26, 7, 3
+; LE-32BIT-NEXT: or 26, 24, 26
+; LE-32BIT-NEXT: subfic 24, 30, 192
+; LE-32BIT-NEXT: mr 9, 10
+; LE-32BIT-NEXT: mr 10, 28
+; LE-32BIT-NEXT: subfic 28, 24, 32
+; LE-32BIT-NEXT: srw 28, 11, 28
+; LE-32BIT-NEXT: slw 19, 6, 24
+; LE-32BIT-NEXT: or 28, 19, 28
+; LE-32BIT-NEXT: srw 19, 6, 25
+; LE-32BIT-NEXT: or 19, 14, 19
+; LE-32BIT-NEXT: srw 14, 10, 31
+; LE-32BIT-NEXT: lwz 4, 64(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 18, 18, 14
+; LE-32BIT-NEXT: srw 3, 11, 3
+; LE-32BIT-NEXT: slw 14, 6, 15
+; LE-32BIT-NEXT: cmplwi 5, 30, 64
+; LE-32BIT-NEXT: cmplwi 1, 30, 128
+; LE-32BIT-NEXT: slw 24, 11, 24
+; LE-32BIT-NEXT: mr 21, 8
+; LE-32BIT-NEXT: or 8, 14, 3
+; LE-32BIT-NEXT: srw 14, 6, 31
+; LE-32BIT-NEXT: crnand 28, 4, 20
+; LE-32BIT-NEXT: srw 31, 6, 30
+; LE-32BIT-NEXT: or 24, 0, 24
+; LE-32BIT-NEXT: slw 0, 7, 15
+; LE-32BIT-NEXT: mr 23, 7
+; LE-32BIT-NEXT: or 17, 17, 14
+; LE-32BIT-NEXT: bc 12, 28, .LBB9_2
+; LE-32BIT-NEXT: # %bb.1:
+; LE-32BIT-NEXT: ori 14, 31, 0
+; LE-32BIT-NEXT: b .LBB9_3
+; LE-32BIT-NEXT: .LBB9_2:
+; LE-32BIT-NEXT: li 14, 0
+; LE-32BIT-NEXT: .LBB9_3:
+; LE-32BIT-NEXT: or 20, 20, 0
+; LE-32BIT-NEXT: subfic 0, 16, 64
+; LE-32BIT-NEXT: lwz 7, 20(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: srw 31, 29, 0
+; LE-32BIT-NEXT: stw 14, 0(4)
+; LE-32BIT-NEXT: subfic 14, 0, 32
+; LE-32BIT-NEXT: slw 14, 10, 14
+; LE-32BIT-NEXT: or 14, 31, 14
+; LE-32BIT-NEXT: slw 31, 29, 9
+; LE-32BIT-NEXT: lwz 3, 36(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 7, 7, 31
+; LE-32BIT-NEXT: slw 31, 11, 12
+; LE-32BIT-NEXT: stw 7, 20(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 7, 22, 31
+; LE-32BIT-NEXT: slw 31, 29, 12
+; LE-32BIT-NEXT: or 27, 27, 31
+; LE-32BIT-NEXT: slw 31, 23, 9
+; LE-32BIT-NEXT: or 26, 26, 31
+; LE-32BIT-NEXT: slw 31, 11, 3
+; LE-32BIT-NEXT: or 28, 28, 31
+; LE-32BIT-NEXT: slw 31, 11, 15
+; LE-32BIT-NEXT: lwz 22, 28(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 18, 18, 31
+; LE-32BIT-NEXT: lwz 31, 40(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: srw 0, 10, 0
+; LE-32BIT-NEXT: or 7, 7, 0
+; LE-32BIT-NEXT: srw 0, 22, 30
+; LE-32BIT-NEXT: slw 25, 11, 9
+; LE-32BIT-NEXT: or 26, 0, 26
+; LE-32BIT-NEXT: srw 0, 10, 31
+; LE-32BIT-NEXT: or 3, 8, 25
+; LE-32BIT-NEXT: or 28, 0, 28
+; LE-32BIT-NEXT: srw 0, 10, 30
+; LE-32BIT-NEXT: srw 5, 10, 5
+; LE-32BIT-NEXT: or 3, 0, 3
+; LE-32BIT-NEXT: bc 12, 28, .LBB9_5
+; LE-32BIT-NEXT: # %bb.4:
+; LE-32BIT-NEXT: ori 0, 17, 0
+; LE-32BIT-NEXT: b .LBB9_6
+; LE-32BIT-NEXT: .LBB9_5:
+; LE-32BIT-NEXT: li 0, 0
+; LE-32BIT-NEXT: .LBB9_6:
+; LE-32BIT-NEXT: lwz 8, 32(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 5, 14, 5
+; LE-32BIT-NEXT: mr 14, 4
+; LE-32BIT-NEXT: stw 0, 4(4)
+; LE-32BIT-NEXT: slw 0, 11, 16
+; LE-32BIT-NEXT: lwz 4, 52(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: cmplwi 6, 31, 64
+; LE-32BIT-NEXT: mr 9, 21
+; LE-32BIT-NEXT: or 5, 0, 5
+; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 24, .LBB9_8
+; LE-32BIT-NEXT: # %bb.7:
+; LE-32BIT-NEXT: ori 25, 8, 0
+; LE-32BIT-NEXT: b .LBB9_9
+; LE-32BIT-NEXT: .LBB9_8:
+; LE-32BIT-NEXT: addi 25, 24, 0
+; LE-32BIT-NEXT: .LBB9_9:
+; LE-32BIT-NEXT: bc 12, 20, .LBB9_11
+; LE-32BIT-NEXT: # %bb.10:
+; LE-32BIT-NEXT: ori 24, 19, 0
+; LE-32BIT-NEXT: b .LBB9_12
+; LE-32BIT-NEXT: .LBB9_11:
+; LE-32BIT-NEXT: addi 24, 18, 0
+; LE-32BIT-NEXT: .LBB9_12:
+; LE-32BIT-NEXT: srw 19, 9, 4
+; LE-32BIT-NEXT: srw 17, 6, 4
+; LE-32BIT-NEXT: lwz 4, 20(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: srw 30, 21, 30
+; LE-32BIT-NEXT: lwz 8, 24(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: slw 21, 29, 16
+; LE-32BIT-NEXT: cmplwi 7, 16, 64
+; LE-32BIT-NEXT: cmplwi 3, 16, 0
+; LE-32BIT-NEXT: li 16, 0
+; LE-32BIT-NEXT: srw 18, 6, 0
+; LE-32BIT-NEXT: bc 12, 28, .LBB9_14
+; LE-32BIT-NEXT: # %bb.13:
+; LE-32BIT-NEXT: ori 0, 16, 0
+; LE-32BIT-NEXT: b .LBB9_15
+; LE-32BIT-NEXT: .LBB9_14:
+; LE-32BIT-NEXT: addi 0, 21, 0
+; LE-32BIT-NEXT: .LBB9_15:
+; LE-32BIT-NEXT: lwz 21, 60(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 28, .LBB9_16
+; LE-32BIT-NEXT: b .LBB9_17
+; LE-32BIT-NEXT: .LBB9_16:
+; LE-32BIT-NEXT: addi 4, 7, 0
+; LE-32BIT-NEXT: .LBB9_17:
+; LE-32BIT-NEXT: bc 12, 20, .LBB9_18
+; LE-32BIT-NEXT: b .LBB9_19
+; LE-32BIT-NEXT: .LBB9_18:
+; LE-32BIT-NEXT: addi 8, 20, 0
+; LE-32BIT-NEXT: .LBB9_19:
+; LE-32BIT-NEXT: mr 12, 29
+; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: slw 20, 29, 15
+; LE-32BIT-NEXT: srw 29, 6, 31
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_20
+; LE-32BIT-NEXT: b .LBB9_21
+; LE-32BIT-NEXT: .LBB9_20:
+; LE-32BIT-NEXT: addi 8, 21, 0
+; LE-32BIT-NEXT: .LBB9_21:
+; LE-32BIT-NEXT: cmplwi 2, 31, 0
+; LE-32BIT-NEXT: bc 12, 20, .LBB9_23
+; LE-32BIT-NEXT: # %bb.22:
+; LE-32BIT-NEXT: ori 26, 19, 0
+; LE-32BIT-NEXT: ori 3, 17, 0
+; LE-32BIT-NEXT: b .LBB9_23
+; LE-32BIT-NEXT: .LBB9_23:
+; LE-32BIT-NEXT: or 8, 8, 0
+; LE-32BIT-NEXT: bc 12, 20, .LBB9_25
+; LE-32BIT-NEXT: # %bb.24:
+; LE-32BIT-NEXT: ori 0, 16, 0
+; LE-32BIT-NEXT: b .LBB9_26
+; LE-32BIT-NEXT: .LBB9_25:
+; LE-32BIT-NEXT: addi 0, 30, 0
+; LE-32BIT-NEXT: .LBB9_26:
+; LE-32BIT-NEXT: bc 12, 24, .LBB9_28
+; LE-32BIT-NEXT: # %bb.27:
+; LE-32BIT-NEXT: ori 30, 16, 0
+; LE-32BIT-NEXT: b .LBB9_29
+; LE-32BIT-NEXT: .LBB9_28:
+; LE-32BIT-NEXT: addi 30, 29, 0
+; LE-32BIT-NEXT: .LBB9_29:
+; LE-32BIT-NEXT: bc 12, 20, .LBB9_31
+; LE-32BIT-NEXT: # %bb.30:
+; LE-32BIT-NEXT: ori 29, 16, 0
+; LE-32BIT-NEXT: b .LBB9_32
+; LE-32BIT-NEXT: .LBB9_31:
+; LE-32BIT-NEXT: addi 29, 7, 0
+; LE-32BIT-NEXT: .LBB9_32:
+; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 10, .LBB9_33
+; LE-32BIT-NEXT: b .LBB9_34
+; LE-32BIT-NEXT: .LBB9_33:
+; LE-32BIT-NEXT: addi 25, 12, 0
+; LE-32BIT-NEXT: .LBB9_34:
+; LE-32BIT-NEXT: bc 12, 14, .LBB9_35
+; LE-32BIT-NEXT: b .LBB9_36
+; LE-32BIT-NEXT: .LBB9_35:
+; LE-32BIT-NEXT: addi 4, 6, 0
+; LE-32BIT-NEXT: .LBB9_36:
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_38
+; LE-32BIT-NEXT: # %bb.37:
+; LE-32BIT-NEXT: ori 6, 26, 0
+; LE-32BIT-NEXT: b .LBB9_39
+; LE-32BIT-NEXT: .LBB9_38:
+; LE-32BIT-NEXT: addi 6, 22, 0
+; LE-32BIT-NEXT: .LBB9_39:
+; LE-32BIT-NEXT: li 26, 0
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_40
+; LE-32BIT-NEXT: b .LBB9_41
+; LE-32BIT-NEXT: .LBB9_40:
+; LE-32BIT-NEXT: addi 3, 10, 0
+; LE-32BIT-NEXT: .LBB9_41:
+; LE-32BIT-NEXT: bc 12, 28, .LBB9_43
+; LE-32BIT-NEXT: # %bb.42:
+; LE-32BIT-NEXT: ori 5, 20, 0
+; LE-32BIT-NEXT: b .LBB9_43
+; LE-32BIT-NEXT: .LBB9_43:
+; LE-32BIT-NEXT: bc 12, 4, .LBB9_45
+; LE-32BIT-NEXT: # %bb.44:
+; LE-32BIT-NEXT: ori 8, 25, 0
+; LE-32BIT-NEXT: b .LBB9_45
+; LE-32BIT-NEXT: .LBB9_45:
+; LE-32BIT-NEXT: bc 12, 24, .LBB9_47
+; LE-32BIT-NEXT: # %bb.46:
+; LE-32BIT-NEXT: ori 28, 18, 0
+; LE-32BIT-NEXT: b .LBB9_47
+; LE-32BIT-NEXT: .LBB9_47:
+; LE-32BIT-NEXT: bc 12, 28, .LBB9_49
+; LE-32BIT-NEXT: # %bb.48:
+; LE-32BIT-NEXT: ori 27, 16, 0
+; LE-32BIT-NEXT: b .LBB9_49
+; LE-32BIT-NEXT: .LBB9_49:
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_51
+; LE-32BIT-NEXT: # %bb.50:
+; LE-32BIT-NEXT: ori 12, 24, 0
+; LE-32BIT-NEXT: b .LBB9_51
+; LE-32BIT-NEXT: .LBB9_51:
+; LE-32BIT-NEXT: bc 12, 4, .LBB9_53
+; LE-32BIT-NEXT: # %bb.52:
+; LE-32BIT-NEXT: ori 3, 26, 0
+; LE-32BIT-NEXT: b .LBB9_53
+; LE-32BIT-NEXT: .LBB9_53:
+; LE-32BIT-NEXT: bc 12, 14, .LBB9_54
+; LE-32BIT-NEXT: b .LBB9_55
+; LE-32BIT-NEXT: .LBB9_54:
+; LE-32BIT-NEXT: addi 5, 11, 0
+; LE-32BIT-NEXT: .LBB9_55:
+; LE-32BIT-NEXT: bc 12, 10, .LBB9_56
+; LE-32BIT-NEXT: b .LBB9_57
+; LE-32BIT-NEXT: .LBB9_56:
+; LE-32BIT-NEXT: addi 28, 10, 0
+; LE-32BIT-NEXT: .LBB9_57:
+; LE-32BIT-NEXT: or 6, 6, 27
+; LE-32BIT-NEXT: stw 3, 8(14)
+; LE-32BIT-NEXT: or 3, 0, 4
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_59
+; LE-32BIT-NEXT: # %bb.58:
+; LE-32BIT-NEXT: ori 4, 8, 0
+; LE-32BIT-NEXT: b .LBB9_60
+; LE-32BIT-NEXT: .LBB9_59:
+; LE-32BIT-NEXT: addi 4, 21, 0
+; LE-32BIT-NEXT: .LBB9_60:
+; LE-32BIT-NEXT: bc 12, 24, .LBB9_62
+; LE-32BIT-NEXT: # %bb.61:
+; LE-32BIT-NEXT: ori 24, 16, 0
+; LE-32BIT-NEXT: b .LBB9_63
+; LE-32BIT-NEXT: .LBB9_62:
+; LE-32BIT-NEXT: addi 24, 7, 0
+; LE-32BIT-NEXT: .LBB9_63:
+; LE-32BIT-NEXT: bc 12, 4, .LBB9_65
+; LE-32BIT-NEXT: # %bb.64:
+; LE-32BIT-NEXT: ori 3, 30, 0
+; LE-32BIT-NEXT: ori 6, 28, 0
+; LE-32BIT-NEXT: ori 12, 16, 0
+; LE-32BIT-NEXT: b .LBB9_65
+; LE-32BIT-NEXT: .LBB9_65:
+; LE-32BIT-NEXT: stw 4, 28(14)
+; LE-32BIT-NEXT: or 4, 29, 5
+; LE-32BIT-NEXT: bc 12, 4, .LBB9_67
+; LE-32BIT-NEXT: # %bb.66:
+; LE-32BIT-NEXT: ori 4, 24, 0
+; LE-32BIT-NEXT: b .LBB9_67
+; LE-32BIT-NEXT: .LBB9_67:
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_69
+; LE-32BIT-NEXT: # %bb.68:
+; LE-32BIT-NEXT: ori 5, 6, 0
+; LE-32BIT-NEXT: b .LBB9_70
+; LE-32BIT-NEXT: .LBB9_69:
+; LE-32BIT-NEXT: addi 3, 9, 0
+; LE-32BIT-NEXT: addi 5, 22, 0
+; LE-32BIT-NEXT: .LBB9_70:
+; LE-32BIT-NEXT: stw 12, 12(14)
+; LE-32BIT-NEXT: stw 3, 16(14)
+; LE-32BIT-NEXT: bc 12, 2, .LBB9_72
+; LE-32BIT-NEXT: # %bb.71:
+; LE-32BIT-NEXT: ori 3, 4, 0
+; LE-32BIT-NEXT: b .LBB9_73
+; LE-32BIT-NEXT: .LBB9_72:
+; LE-32BIT-NEXT: addi 3, 23, 0
+; LE-32BIT-NEXT: .LBB9_73:
+; LE-32BIT-NEXT: stw 5, 24(14)
+; LE-32BIT-NEXT: stw 3, 20(14)
+; LE-32BIT-NEXT: lwz 12, 68(1)
+; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: mtcrf 32, 12 # cr2
+; LE-32BIT-NEXT: mtcrf 16, 12 # cr3
+; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: addi 1, 1, 144
+; LE-32BIT-NEXT: blr
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = lshr i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: shl_32bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 7, 24(3)
+; LE-64BIT-NEXT: ld 8, 16(3)
+; LE-64BIT-NEXT: ld 9, 8(3)
+; LE-64BIT-NEXT: li 6, 0
+; LE-64BIT-NEXT: ld 3, 0(3)
+; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28
+; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: subfic 28, 4, 64
+; LE-64BIT-NEXT: subfic 11, 4, 192
+; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: addi 0, 4, -128
+; LE-64BIT-NEXT: sld 29, 9, 4
+; LE-64BIT-NEXT: addi 27, 4, -64
+; LE-64BIT-NEXT: subfic 25, 4, 128
+; LE-64BIT-NEXT: srd 24, 8, 28
+; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: srd 21, 9, 28
+; LE-64BIT-NEXT: srd 28, 3, 28
+; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: sld 10, 7, 4
+; LE-64BIT-NEXT: addi 30, 4, -192
+; LE-64BIT-NEXT: subfic 22, 25, 64
+; LE-64BIT-NEXT: srd 11, 3, 11
+; LE-64BIT-NEXT: sld 26, 9, 0
+; LE-64BIT-NEXT: or 29, 29, 28
+; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: or 10, 10, 24
+; LE-64BIT-NEXT: sld 28, 3, 27
+; LE-64BIT-NEXT: sld 30, 3, 30
+; LE-64BIT-NEXT: or 11, 26, 11
+; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: sld 23, 8, 27
+; LE-64BIT-NEXT: sld 27, 9, 22
+; LE-64BIT-NEXT: or 29, 29, 28
+; LE-64BIT-NEXT: or 11, 11, 30
+; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: srd 28, 3, 25
+; LE-64BIT-NEXT: or 10, 10, 23
+; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: srd 9, 9, 25
+; LE-64BIT-NEXT: or 30, 28, 27
+; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: cmplwi 1, 4, 128
+; LE-64BIT-NEXT: sld 12, 8, 4
+; LE-64BIT-NEXT: or 9, 10, 9
+; LE-64BIT-NEXT: or 30, 30, 21
+; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: sld 10, 3, 0
+; LE-64BIT-NEXT: isel 9, 9, 11, 4
+; LE-64BIT-NEXT: or 11, 12, 30
+; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: iseleq 7, 7, 9
+; LE-64BIT-NEXT: sld 3, 3, 4
+; LE-64BIT-NEXT: isel 9, 11, 10, 4
+; LE-64BIT-NEXT: std 7, 24(5)
+; LE-64BIT-NEXT: isel 0, 29, 6, 4
+; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: iseleq 4, 8, 9
+; LE-64BIT-NEXT: std 0, 8(5)
+; LE-64BIT-NEXT: isel 3, 3, 6, 4
+; LE-64BIT-NEXT: std 4, 16(5)
+; LE-64BIT-NEXT: std 3, 0(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: shl_32bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 28(4)
+; BE-NEXT: ld 7, 8(3)
+; BE-NEXT: ld 8, 0(3)
+; BE-NEXT: ld 9, 16(3)
+; BE-NEXT: ld 3, 24(3)
+; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; BE-NEXT: li 6, 0
+; BE-NEXT: rlwinm. 4, 4, 3, 0, 28
+; BE-NEXT: subfic 10, 4, 192
+; BE-NEXT: addi 11, 4, -128
+; BE-NEXT: addi 12, 4, -192
+; BE-NEXT: subfic 30, 4, 64
+; BE-NEXT: srd 10, 3, 10
+; BE-NEXT: sld 27, 9, 11
+; BE-NEXT: sld 0, 8, 4
+; BE-NEXT: addi 29, 4, -64
+; BE-NEXT: subfic 28, 4, 128
+; BE-NEXT: sld 12, 3, 12
+; BE-NEXT: or 10, 27, 10
+; BE-NEXT: srd 27, 7, 30
+; BE-NEXT: or 10, 10, 12
+; BE-NEXT: or 0, 0, 27
+; BE-NEXT: sld 27, 7, 29
+; BE-NEXT: subfic 12, 28, 64
+; BE-NEXT: or 0, 0, 27
+; BE-NEXT: srd 27, 3, 28
+; BE-NEXT: sld 12, 9, 12
+; BE-NEXT: srd 28, 9, 28
+; BE-NEXT: cmplwi 1, 4, 128
+; BE-NEXT: or 12, 27, 12
+; BE-NEXT: or 28, 0, 28
+; BE-NEXT: srd 0, 9, 30
+; BE-NEXT: sld 9, 9, 4
+; BE-NEXT: sld 11, 3, 11
+; BE-NEXT: bc 12, 4, .LBB10_1
+; BE-NEXT: b .LBB10_2
+; BE-NEXT: .LBB10_1:
+; BE-NEXT: addi 10, 28, 0
+; BE-NEXT: .LBB10_2:
+; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; BE-NEXT: or 12, 12, 0
+; BE-NEXT: sld 0, 7, 4
+; BE-NEXT: or 12, 0, 12
+; BE-NEXT: srd 0, 3, 30
+; BE-NEXT: sld 30, 3, 29
+; BE-NEXT: bc 12, 4, .LBB10_3
+; BE-NEXT: b .LBB10_4
+; BE-NEXT: .LBB10_3:
+; BE-NEXT: addi 11, 12, 0
+; BE-NEXT: .LBB10_4:
+; BE-NEXT: sld 3, 3, 4
+; BE-NEXT: bc 12, 2, .LBB10_6
+; BE-NEXT: # %bb.5:
+; BE-NEXT: ori 4, 10, 0
+; BE-NEXT: b .LBB10_7
+; BE-NEXT: .LBB10_6:
+; BE-NEXT: addi 4, 8, 0
+; BE-NEXT: .LBB10_7:
+; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; BE-NEXT: or 9, 9, 0
+; BE-NEXT: or 9, 9, 30
+; BE-NEXT: bc 12, 2, .LBB10_9
+; BE-NEXT: # %bb.8:
+; BE-NEXT: ori 7, 11, 0
+; BE-NEXT: b .LBB10_9
+; BE-NEXT: .LBB10_9:
+; BE-NEXT: bc 12, 4, .LBB10_11
+; BE-NEXT: # %bb.10:
+; BE-NEXT: ori 8, 6, 0
+; BE-NEXT: ori 3, 6, 0
+; BE-NEXT: b .LBB10_12
+; BE-NEXT: .LBB10_11:
+; BE-NEXT: addi 8, 9, 0
+; BE-NEXT: .LBB10_12:
+; BE-NEXT: std 4, 0(5)
+; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; BE-NEXT: std 3, 24(5)
+; BE-NEXT: std 8, 16(5)
+; BE-NEXT: std 7, 8(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: shl_32bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: stwu 1, -144(1)
+; LE-32BIT-NEXT: mfcr 12
+; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 12, 68(1)
+; LE-32BIT-NEXT: lwz 0, 28(4)
+; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: lwz 6, 24(3)
+; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28
+; LE-32BIT-NEXT: lwz 5, 28(3)
+; LE-32BIT-NEXT: subfic 21, 30, 224
+; LE-32BIT-NEXT: lwz 7, 4(3)
+; LE-32BIT-NEXT: subfic 0, 30, 160
+; LE-32BIT-NEXT: lwz 9, 0(3)
+; LE-32BIT-NEXT: addi 4, 30, -128
+; LE-32BIT-NEXT: lwz 10, 8(3)
+; LE-32BIT-NEXT: subfic 28, 30, 96
+; LE-32BIT-NEXT: lwz 8, 12(3)
+; LE-32BIT-NEXT: addi 29, 30, -64
+; LE-32BIT-NEXT: lwz 12, 16(3)
+; LE-32BIT-NEXT: subfic 25, 30, 32
+; LE-32BIT-NEXT: lwz 11, 20(3)
+; LE-32BIT-NEXT: addi 3, 30, -192
+; LE-32BIT-NEXT: srw 21, 5, 21
+; LE-32BIT-NEXT: slw 16, 6, 3
+; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: slw 20, 9, 30
+; LE-32BIT-NEXT: srw 15, 11, 0
+; LE-32BIT-NEXT: slw 14, 12, 4
+; LE-32BIT-NEXT: srw 31, 8, 28
+; LE-32BIT-NEXT: slw 3, 10, 29
+; LE-32BIT-NEXT: or 21, 16, 21
+; LE-32BIT-NEXT: srw 16, 7, 25
+; LE-32BIT-NEXT: slw 19, 10, 30
+; LE-32BIT-NEXT: or 15, 14, 15
+; LE-32BIT-NEXT: srw 14, 8, 25
+; LE-32BIT-NEXT: or 3, 3, 31
+; LE-32BIT-NEXT: srw 31, 5, 0
+; LE-32BIT-NEXT: or 20, 20, 16
+; LE-32BIT-NEXT: slw 16, 6, 4
+; LE-32BIT-NEXT: addi 27, 30, -224
+; LE-32BIT-NEXT: or 19, 19, 14
+; LE-32BIT-NEXT: srw 14, 5, 28
+; LE-32BIT-NEXT: or 16, 16, 31
+; LE-32BIT-NEXT: slw 31, 6, 29
+; LE-32BIT-NEXT: addi 23, 30, -160
+; LE-32BIT-NEXT: slw 18, 12, 30
+; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 14, 31, 14
+; LE-32BIT-NEXT: srw 31, 11, 25
+; LE-32BIT-NEXT: slw 0, 5, 27
+; LE-32BIT-NEXT: addi 26, 30, -96
+; LE-32BIT-NEXT: slw 17, 6, 30
+; LE-32BIT-NEXT: or 18, 18, 31
+; LE-32BIT-NEXT: srw 31, 5, 25
+; LE-32BIT-NEXT: or 21, 21, 0
+; LE-32BIT-NEXT: slw 0, 11, 23
+; LE-32BIT-NEXT: or 17, 17, 31
+; LE-32BIT-NEXT: addi 31, 30, -32
+; LE-32BIT-NEXT: or 0, 15, 0
+; LE-32BIT-NEXT: slw 15, 8, 26
+; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 29, 3, 15
+; LE-32BIT-NEXT: slw 15, 7, 31
+; LE-32BIT-NEXT: or 20, 20, 15
+; LE-32BIT-NEXT: slw 15, 8, 31
+; LE-32BIT-NEXT: or 3, 19, 15
+; LE-32BIT-NEXT: subfic 15, 30, 128
+; LE-32BIT-NEXT: slw 23, 5, 23
+; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 3, 16, 23
+; LE-32BIT-NEXT: subfic 16, 15, 32
+; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 3, 11, 15
+; LE-32BIT-NEXT: slw 22, 12, 16
+; LE-32BIT-NEXT: or 23, 3, 22
+; LE-32BIT-NEXT: subfic 22, 30, 64
+; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: mr 9, 10
+; LE-32BIT-NEXT: subfic 3, 22, 32
+; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 4, 8, 22
+; LE-32BIT-NEXT: slw 24, 9, 3
+; LE-32BIT-NEXT: or 4, 4, 24
+; LE-32BIT-NEXT: subfic 24, 30, 192
+; LE-32BIT-NEXT: subfic 27, 24, 32
+; LE-32BIT-NEXT: mr 10, 26
+; LE-32BIT-NEXT: slw 27, 6, 27
+; LE-32BIT-NEXT: srw 26, 5, 24
+; LE-32BIT-NEXT: stw 28, 24(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 27, 26, 27
+; LE-32BIT-NEXT: srw 26, 11, 22
+; LE-32BIT-NEXT: slw 28, 12, 3
+; LE-32BIT-NEXT: or 28, 26, 28
+; LE-32BIT-NEXT: srw 26, 5, 15
+; LE-32BIT-NEXT: slw 19, 6, 16
+; LE-32BIT-NEXT: or 26, 26, 19
+; LE-32BIT-NEXT: slw 19, 5, 10
+; LE-32BIT-NEXT: stw 7, 32(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: mr 7, 9
+; LE-32BIT-NEXT: or 19, 14, 19
+; LE-32BIT-NEXT: slw 14, 11, 31
+; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 18, 18, 14
+; LE-32BIT-NEXT: slw 3, 6, 3
+; LE-32BIT-NEXT: srw 14, 5, 22
+; LE-32BIT-NEXT: cmplwi 5, 30, 64
+; LE-32BIT-NEXT: cmplwi 1, 30, 128
+; LE-32BIT-NEXT: srw 24, 6, 24
+; LE-32BIT-NEXT: or 10, 14, 3
+; LE-32BIT-NEXT: slw 14, 5, 31
+; LE-32BIT-NEXT: crnand 28, 4, 20
+; LE-32BIT-NEXT: slw 31, 5, 30
+; LE-32BIT-NEXT: or 24, 0, 24
+; LE-32BIT-NEXT: mr 3, 7
+; LE-32BIT-NEXT: stw 7, 28(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 0, 7, 22
+; LE-32BIT-NEXT: lwz 7, 24(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 17, 17, 14
+; LE-32BIT-NEXT: bc 12, 28, .LBB10_2
+; LE-32BIT-NEXT: # %bb.1:
+; LE-32BIT-NEXT: ori 14, 31, 0
+; LE-32BIT-NEXT: b .LBB10_3
+; LE-32BIT-NEXT: .LBB10_2:
+; LE-32BIT-NEXT: li 14, 0
+; LE-32BIT-NEXT: .LBB10_3:
+; LE-32BIT-NEXT: or 20, 20, 0
+; LE-32BIT-NEXT: subfic 0, 15, 64
+; LE-32BIT-NEXT: stw 14, 28(9)
+; LE-32BIT-NEXT: subfic 14, 0, 32
+; LE-32BIT-NEXT: srw 14, 11, 14
+; LE-32BIT-NEXT: slw 31, 12, 0
+; LE-32BIT-NEXT: or 14, 31, 14
+; LE-32BIT-NEXT: srw 31, 12, 7
+; LE-32BIT-NEXT: or 23, 23, 31
+; LE-32BIT-NEXT: srw 31, 3, 25
+; LE-32BIT-NEXT: lwz 3, 40(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 4, 4, 31
+; LE-32BIT-NEXT: slw 0, 11, 0
+; LE-32BIT-NEXT: cmplwi 3, 15, 0
+; LE-32BIT-NEXT: srw 31, 6, 3
+; LE-32BIT-NEXT: or 27, 27, 31
+; LE-32BIT-NEXT: srw 31, 12, 25
+; LE-32BIT-NEXT: or 28, 28, 31
+; LE-32BIT-NEXT: srw 31, 6, 7
+; LE-32BIT-NEXT: or 26, 26, 31
+; LE-32BIT-NEXT: srw 31, 6, 22
+; LE-32BIT-NEXT: or 18, 18, 31
+; LE-32BIT-NEXT: lwz 31, 36(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: srw 25, 6, 25
+; LE-32BIT-NEXT: or 3, 10, 25
+; LE-32BIT-NEXT: or 26, 26, 0
+; LE-32BIT-NEXT: cmplwi 6, 31, 64
+; LE-32BIT-NEXT: slw 0, 11, 30
+; LE-32BIT-NEXT: bc 12, 24, .LBB10_5
+; LE-32BIT-NEXT: # %bb.4:
+; LE-32BIT-NEXT: ori 25, 21, 0
+; LE-32BIT-NEXT: b .LBB10_6
+; LE-32BIT-NEXT: .LBB10_5:
+; LE-32BIT-NEXT: addi 25, 24, 0
+; LE-32BIT-NEXT: .LBB10_6:
+; LE-32BIT-NEXT: slw 24, 11, 16
+; LE-32BIT-NEXT: lwz 10, 32(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 3, 0, 3
+; LE-32BIT-NEXT: bc 12, 28, .LBB10_8
+; LE-32BIT-NEXT: # %bb.7:
+; LE-32BIT-NEXT: ori 0, 17, 0
+; LE-32BIT-NEXT: b .LBB10_9
+; LE-32BIT-NEXT: .LBB10_8:
+; LE-32BIT-NEXT: li 0, 0
+; LE-32BIT-NEXT: .LBB10_9:
+; LE-32BIT-NEXT: or 24, 14, 24
+; LE-32BIT-NEXT: stw 0, 24(9)
+; LE-32BIT-NEXT: srw 0, 6, 15
+; LE-32BIT-NEXT: or 24, 0, 24
+; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: slw 21, 10, 30
+; LE-32BIT-NEXT: bc 12, 20, .LBB10_11
+; LE-32BIT-NEXT: # %bb.10:
+; LE-32BIT-NEXT: ori 7, 29, 0
+; LE-32BIT-NEXT: b .LBB10_12
+; LE-32BIT-NEXT: .LBB10_11:
+; LE-32BIT-NEXT: addi 7, 20, 0
+; LE-32BIT-NEXT: .LBB10_12:
+; LE-32BIT-NEXT: or 4, 21, 4
+; LE-32BIT-NEXT: slw 21, 11, 31
+; LE-32BIT-NEXT: srw 20, 12, 15
+; LE-32BIT-NEXT: cmplwi 7, 15, 64
+; LE-32BIT-NEXT: li 15, 0
+; LE-32BIT-NEXT: or 27, 21, 27
+; LE-32BIT-NEXT: bc 12, 20, .LBB10_14
+; LE-32BIT-NEXT: # %bb.13:
+; LE-32BIT-NEXT: ori 21, 19, 0
+; LE-32BIT-NEXT: b .LBB10_15
+; LE-32BIT-NEXT: .LBB10_14:
+; LE-32BIT-NEXT: addi 21, 18, 0
+; LE-32BIT-NEXT: .LBB10_15:
+; LE-32BIT-NEXT: mr 16, 9
+; LE-32BIT-NEXT: lwz 9, 52(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: slw 18, 5, 0
+; LE-32BIT-NEXT: bc 12, 28, .LBB10_17
+; LE-32BIT-NEXT: # %bb.16:
+; LE-32BIT-NEXT: ori 0, 15, 0
+; LE-32BIT-NEXT: b .LBB10_18
+; LE-32BIT-NEXT: .LBB10_17:
+; LE-32BIT-NEXT: addi 0, 20, 0
+; LE-32BIT-NEXT: .LBB10_18:
+; LE-32BIT-NEXT: lwz 20, 60(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: slw 30, 8, 30
+; LE-32BIT-NEXT: slw 19, 8, 9
+; LE-32BIT-NEXT: slw 17, 5, 9
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_20
+; LE-32BIT-NEXT: # %bb.19:
+; LE-32BIT-NEXT: ori 9, 7, 0
+; LE-32BIT-NEXT: b .LBB10_21
+; LE-32BIT-NEXT: .LBB10_20:
+; LE-32BIT-NEXT: addi 9, 20, 0
+; LE-32BIT-NEXT: .LBB10_21:
+; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: slw 29, 5, 31
+; LE-32BIT-NEXT: or 9, 9, 0
+; LE-32BIT-NEXT: bc 12, 20, .LBB10_23
+; LE-32BIT-NEXT: # %bb.22:
+; LE-32BIT-NEXT: ori 0, 15, 0
+; LE-32BIT-NEXT: b .LBB10_24
+; LE-32BIT-NEXT: .LBB10_23:
+; LE-32BIT-NEXT: addi 0, 30, 0
+; LE-32BIT-NEXT: .LBB10_24:
+; LE-32BIT-NEXT: bc 12, 24, .LBB10_26
+; LE-32BIT-NEXT: # %bb.25:
+; LE-32BIT-NEXT: ori 30, 15, 0
+; LE-32BIT-NEXT: b .LBB10_27
+; LE-32BIT-NEXT: .LBB10_26:
+; LE-32BIT-NEXT: addi 30, 29, 0
+; LE-32BIT-NEXT: .LBB10_27:
+; LE-32BIT-NEXT: bc 12, 28, .LBB10_28
+; LE-32BIT-NEXT: b .LBB10_29
+; LE-32BIT-NEXT: .LBB10_28:
+; LE-32BIT-NEXT: addi 28, 26, 0
+; LE-32BIT-NEXT: .LBB10_29:
+; LE-32BIT-NEXT: bc 12, 20, .LBB10_31
+; LE-32BIT-NEXT: # %bb.30:
+; LE-32BIT-NEXT: ori 3, 17, 0
+; LE-32BIT-NEXT: b .LBB10_31
+; LE-32BIT-NEXT: .LBB10_31:
+; LE-32BIT-NEXT: srw 22, 12, 22
+; LE-32BIT-NEXT: bc 12, 20, .LBB10_33
+; LE-32BIT-NEXT: # %bb.32:
+; LE-32BIT-NEXT: ori 29, 15, 0
+; LE-32BIT-NEXT: b .LBB10_34
+; LE-32BIT-NEXT: .LBB10_33:
+; LE-32BIT-NEXT: addi 29, 7, 0
+; LE-32BIT-NEXT: .LBB10_34:
+; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 20, .LBB10_36
+; LE-32BIT-NEXT: # %bb.35:
+; LE-32BIT-NEXT: ori 4, 19, 0
+; LE-32BIT-NEXT: b .LBB10_36
+; LE-32BIT-NEXT: .LBB10_36:
+; LE-32BIT-NEXT: bc 12, 14, .LBB10_38
+; LE-32BIT-NEXT: # %bb.37:
+; LE-32BIT-NEXT: ori 5, 28, 0
+; LE-32BIT-NEXT: b .LBB10_38
+; LE-32BIT-NEXT: .LBB10_38:
+; LE-32BIT-NEXT: li 28, 0
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_39
+; LE-32BIT-NEXT: b .LBB10_40
+; LE-32BIT-NEXT: .LBB10_39:
+; LE-32BIT-NEXT: addi 3, 11, 0
+; LE-32BIT-NEXT: .LBB10_40:
+; LE-32BIT-NEXT: cmplwi 2, 31, 0
+; LE-32BIT-NEXT: bc 12, 24, .LBB10_42
+; LE-32BIT-NEXT: # %bb.41:
+; LE-32BIT-NEXT: ori 27, 18, 0
+; LE-32BIT-NEXT: b .LBB10_42
+; LE-32BIT-NEXT: .LBB10_42:
+; LE-32BIT-NEXT: bc 12, 28, .LBB10_44
+; LE-32BIT-NEXT: # %bb.43:
+; LE-32BIT-NEXT: ori 26, 22, 0
+; LE-32BIT-NEXT: b .LBB10_45
+; LE-32BIT-NEXT: .LBB10_44:
+; LE-32BIT-NEXT: addi 26, 24, 0
+; LE-32BIT-NEXT: .LBB10_45:
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_46
+; LE-32BIT-NEXT: b .LBB10_47
+; LE-32BIT-NEXT: .LBB10_46:
+; LE-32BIT-NEXT: addi 4, 10, 0
+; LE-32BIT-NEXT: .LBB10_47:
+; LE-32BIT-NEXT: bc 12, 4, .LBB10_49
+; LE-32BIT-NEXT: # %bb.48:
+; LE-32BIT-NEXT: ori 3, 28, 0
+; LE-32BIT-NEXT: b .LBB10_49
+; LE-32BIT-NEXT: .LBB10_49:
+; LE-32BIT-NEXT: bc 12, 10, .LBB10_50
+; LE-32BIT-NEXT: b .LBB10_51
+; LE-32BIT-NEXT: .LBB10_50:
+; LE-32BIT-NEXT: addi 25, 12, 0
+; LE-32BIT-NEXT: .LBB10_51:
+; LE-32BIT-NEXT: or 5, 0, 5
+; LE-32BIT-NEXT: bc 12, 24, .LBB10_53
+; LE-32BIT-NEXT: # %bb.52:
+; LE-32BIT-NEXT: ori 24, 15, 0
+; LE-32BIT-NEXT: b .LBB10_54
+; LE-32BIT-NEXT: .LBB10_53:
+; LE-32BIT-NEXT: addi 24, 7, 0
+; LE-32BIT-NEXT: .LBB10_54:
+; LE-32BIT-NEXT: bc 12, 28, .LBB10_56
+; LE-32BIT-NEXT: # %bb.55:
+; LE-32BIT-NEXT: ori 7, 15, 0
+; LE-32BIT-NEXT: b .LBB10_57
+; LE-32BIT-NEXT: .LBB10_56:
+; LE-32BIT-NEXT: addi 7, 23, 0
+; LE-32BIT-NEXT: .LBB10_57:
+; LE-32BIT-NEXT: bc 12, 10, .LBB10_58
+; LE-32BIT-NEXT: b .LBB10_59
+; LE-32BIT-NEXT: .LBB10_58:
+; LE-32BIT-NEXT: addi 27, 11, 0
+; LE-32BIT-NEXT: .LBB10_59:
+; LE-32BIT-NEXT: stw 3, 20(16)
+; LE-32BIT-NEXT: or 3, 4, 7
+; LE-32BIT-NEXT: bc 12, 4, .LBB10_61
+; LE-32BIT-NEXT: # %bb.60:
+; LE-32BIT-NEXT: ori 3, 27, 0
+; LE-32BIT-NEXT: ori 9, 25, 0
+; LE-32BIT-NEXT: b .LBB10_61
+; LE-32BIT-NEXT: .LBB10_61:
+; LE-32BIT-NEXT: bc 12, 14, .LBB10_63
+; LE-32BIT-NEXT: # %bb.62:
+; LE-32BIT-NEXT: ori 6, 26, 0
+; LE-32BIT-NEXT: b .LBB10_63
+; LE-32BIT-NEXT: .LBB10_63:
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_65
+; LE-32BIT-NEXT: # %bb.64:
+; LE-32BIT-NEXT: ori 12, 21, 0
+; LE-32BIT-NEXT: b .LBB10_65
+; LE-32BIT-NEXT: .LBB10_65:
+; LE-32BIT-NEXT: bc 12, 4, .LBB10_67
+; LE-32BIT-NEXT: # %bb.66:
+; LE-32BIT-NEXT: ori 5, 30, 0
+; LE-32BIT-NEXT: b .LBB10_67
+; LE-32BIT-NEXT: .LBB10_67:
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_69
+; LE-32BIT-NEXT: # %bb.68:
+; LE-32BIT-NEXT: ori 4, 9, 0
+; LE-32BIT-NEXT: b .LBB10_70
+; LE-32BIT-NEXT: .LBB10_69:
+; LE-32BIT-NEXT: addi 3, 10, 0
+; LE-32BIT-NEXT: addi 4, 20, 0
+; LE-32BIT-NEXT: .LBB10_70:
+; LE-32BIT-NEXT: bc 12, 4, .LBB10_72
+; LE-32BIT-NEXT: # %bb.71:
+; LE-32BIT-NEXT: ori 12, 15, 0
+; LE-32BIT-NEXT: b .LBB10_72
+; LE-32BIT-NEXT: .LBB10_72:
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_73
+; LE-32BIT-NEXT: b .LBB10_74
+; LE-32BIT-NEXT: .LBB10_73:
+; LE-32BIT-NEXT: addi 5, 8, 0
+; LE-32BIT-NEXT: .LBB10_74:
+; LE-32BIT-NEXT: stw 3, 4(16)
+; LE-32BIT-NEXT: lwz 3, 28(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: stw 4, 0(16)
+; LE-32BIT-NEXT: or 4, 29, 6
+; LE-32BIT-NEXT: bc 12, 4, .LBB10_76
+; LE-32BIT-NEXT: # %bb.75:
+; LE-32BIT-NEXT: ori 4, 24, 0
+; LE-32BIT-NEXT: b .LBB10_76
+; LE-32BIT-NEXT: .LBB10_76:
+; LE-32BIT-NEXT: stw 12, 16(16)
+; LE-32BIT-NEXT: bc 12, 2, .LBB10_78
+; LE-32BIT-NEXT: # %bb.77:
+; LE-32BIT-NEXT: ori 3, 4, 0
+; LE-32BIT-NEXT: b .LBB10_78
+; LE-32BIT-NEXT: .LBB10_78:
+; LE-32BIT-NEXT: stw 5, 12(16)
+; LE-32BIT-NEXT: stw 3, 8(16)
+; LE-32BIT-NEXT: lwz 12, 68(1)
+; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: mtcrf 32, 12 # cr2
+; LE-32BIT-NEXT: mtcrf 16, 12 # cr3
+; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: addi 1, 1, 144
+; LE-32BIT-NEXT: blr
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = shl i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; LE-64BIT-LABEL: ashr_32bytes:
+; LE-64BIT: # %bb.0:
+; LE-64BIT-NEXT: lwz 4, 0(4)
+; LE-64BIT-NEXT: ld 6, 24(3)
+; LE-64BIT-NEXT: ld 8, 16(3)
+; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28
+; LE-64BIT-NEXT: sradi 9, 6, 63
+; LE-64BIT-NEXT: subfic 10, 4, 192
+; LE-64BIT-NEXT: addi 11, 4, -128
+; LE-64BIT-NEXT: addi 30, 4, -192
+; LE-64BIT-NEXT: sld 10, 6, 10
+; LE-64BIT-NEXT: srd 29, 8, 11
+; LE-64BIT-NEXT: subfic 28, 4, 64
+; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: ld 7, 0(3)
+; LE-64BIT-NEXT: ld 3, 8(3)
+; LE-64BIT-NEXT: srd 0, 8, 4
+; LE-64BIT-NEXT: srad 27, 6, 30
+; LE-64BIT-NEXT: or 10, 29, 10
+; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; LE-64BIT-NEXT: cmpwi 1, 30, 1
+; LE-64BIT-NEXT: sld 26, 6, 28
+; LE-64BIT-NEXT: addi 30, 4, -64
+; LE-64BIT-NEXT: isel 10, 10, 27, 4
+; LE-64BIT-NEXT: or 27, 0, 26
+; LE-64BIT-NEXT: subfic 0, 4, 128
+; LE-64BIT-NEXT: srd 12, 7, 4
+; LE-64BIT-NEXT: sld 26, 3, 28
+; LE-64BIT-NEXT: subfic 25, 0, 64
+; LE-64BIT-NEXT: srad 29, 6, 30
+; LE-64BIT-NEXT: cmpwi 1, 30, 1
+; LE-64BIT-NEXT: or 12, 12, 26
+; LE-64BIT-NEXT: srd 30, 3, 30
+; LE-64BIT-NEXT: sld 28, 8, 28
+; LE-64BIT-NEXT: srd 26, 8, 25
+; LE-64BIT-NEXT: sld 8, 8, 0
+; LE-64BIT-NEXT: or 12, 12, 30
+; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: sld 0, 6, 0
+; LE-64BIT-NEXT: isel 29, 27, 29, 4
+; LE-64BIT-NEXT: or 8, 12, 8
+; LE-64BIT-NEXT: or 0, 0, 26
+; LE-64BIT-NEXT: cmplwi 1, 4, 128
+; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: srd 27, 3, 4
+; LE-64BIT-NEXT: or 0, 0, 28
+; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: srad 11, 6, 11
+; LE-64BIT-NEXT: isel 8, 8, 10, 4
+; LE-64BIT-NEXT: or 10, 27, 0
+; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: iseleq 7, 7, 8
+; LE-64BIT-NEXT: srad 4, 6, 4
+; LE-64BIT-NEXT: isel 8, 10, 11, 4
+; LE-64BIT-NEXT: std 7, 0(5)
+; LE-64BIT-NEXT: isel 12, 29, 9, 4
+; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; LE-64BIT-NEXT: iseleq 3, 3, 8
+; LE-64BIT-NEXT: std 12, 16(5)
+; LE-64BIT-NEXT: isel 4, 4, 9, 4
+; LE-64BIT-NEXT: std 3, 8(5)
+; LE-64BIT-NEXT: std 4, 24(5)
+; LE-64BIT-NEXT: blr
+;
+; BE-LABEL: ashr_32bytes:
+; BE: # %bb.0:
+; BE-NEXT: lwz 4, 28(4)
+; BE-NEXT: ld 6, 16(3)
+; BE-NEXT: ld 7, 24(3)
+; BE-NEXT: ld 8, 8(3)
+; BE-NEXT: ld 3, 0(3)
+; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; BE-NEXT: rlwinm. 4, 4, 3, 0, 28
+; BE-NEXT: subfic 9, 4, 192
+; BE-NEXT: addi 10, 4, -128
+; BE-NEXT: addi 11, 4, -192
+; BE-NEXT: subfic 0, 4, 64
+; BE-NEXT: sld 9, 3, 9
+; BE-NEXT: srd 27, 8, 10
+; BE-NEXT: srd 12, 7, 4
+; BE-NEXT: subfic 29, 4, 128
+; BE-NEXT: cmpwi 1, 11, 1
+; BE-NEXT: srad 11, 3, 11
+; BE-NEXT: or 9, 27, 9
+; BE-NEXT: sld 27, 6, 0
+; BE-NEXT: addi 30, 4, -64
+; BE-NEXT: srd 28, 8, 4
+; BE-NEXT: or 12, 12, 27
+; BE-NEXT: sld 27, 3, 0
+; BE-NEXT: bc 12, 4, .LBB11_2
+; BE-NEXT: # %bb.1:
+; BE-NEXT: ori 9, 11, 0
+; BE-NEXT: b .LBB11_2
+; BE-NEXT: .LBB11_2:
+; BE-NEXT: subfic 11, 29, 64
+; BE-NEXT: or 28, 28, 27
+; BE-NEXT: srd 27, 6, 30
+; BE-NEXT: sld 0, 8, 0
+; BE-NEXT: srd 11, 8, 11
+; BE-NEXT: sld 8, 8, 29
+; BE-NEXT: sld 29, 3, 29
+; BE-NEXT: cmplwi 1, 4, 128
+; BE-NEXT: or 12, 12, 27
+; BE-NEXT: or 11, 29, 11
+; BE-NEXT: or 8, 12, 8
+; BE-NEXT: srd 12, 6, 4
+; BE-NEXT: or 11, 11, 0
+; BE-NEXT: srad 10, 3, 10
+; BE-NEXT: srad 29, 3, 30
+; BE-NEXT: or 11, 12, 11
+; BE-NEXT: cmpwi 5, 30, 1
+; BE-NEXT: bc 12, 20, .LBB11_4
+; BE-NEXT: # %bb.3:
+; BE-NEXT: ori 12, 29, 0
+; BE-NEXT: b .LBB11_5
+; BE-NEXT: .LBB11_4:
+; BE-NEXT: addi 12, 28, 0
+; BE-NEXT: .LBB11_5:
+; BE-NEXT: bc 12, 4, .LBB11_7
+; BE-NEXT: # %bb.6:
+; BE-NEXT: ori 8, 9, 0
+; BE-NEXT: ori 9, 10, 0
+; BE-NEXT: b .LBB11_8
+; BE-NEXT: .LBB11_7:
+; BE-NEXT: addi 9, 11, 0
+; BE-NEXT: .LBB11_8:
+; BE-NEXT: sradi 10, 3, 63
+; BE-NEXT: srad 3, 3, 4
+; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; BE-NEXT: bc 12, 2, .LBB11_10
+; BE-NEXT: # %bb.9:
+; BE-NEXT: ori 4, 8, 0
+; BE-NEXT: ori 6, 9, 0
+; BE-NEXT: b .LBB11_11
+; BE-NEXT: .LBB11_10:
+; BE-NEXT: addi 4, 7, 0
+; BE-NEXT: .LBB11_11:
+; BE-NEXT: bc 12, 4, .LBB11_13
+; BE-NEXT: # %bb.12:
+; BE-NEXT: ori 7, 10, 0
+; BE-NEXT: ori 3, 10, 0
+; BE-NEXT: b .LBB11_14
+; BE-NEXT: .LBB11_13:
+; BE-NEXT: addi 7, 12, 0
+; BE-NEXT: .LBB11_14:
+; BE-NEXT: std 3, 0(5)
+; BE-NEXT: std 7, 8(5)
+; BE-NEXT: std 4, 24(5)
+; BE-NEXT: std 6, 16(5)
+; BE-NEXT: blr
+;
+; LE-32BIT-LABEL: ashr_32bytes:
+; LE-32BIT: # %bb.0:
+; LE-32BIT-NEXT: stwu 1, -144(1)
+; LE-32BIT-NEXT: mfcr 12
+; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: stw 12, 68(1)
+; LE-32BIT-NEXT: lwz 0, 28(4)
+; LE-32BIT-NEXT: lwz 29, 4(3)
+; LE-32BIT-NEXT: lwz 12, 0(3)
+; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28
+; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 23, 30, 224
+; LE-32BIT-NEXT: lwz 5, 24(3)
+; LE-32BIT-NEXT: addi 21, 30, -224
+; LE-32BIT-NEXT: lwz 8, 28(3)
+; LE-32BIT-NEXT: subfic 4, 30, 160
+; LE-32BIT-NEXT: lwz 10, 20(3)
+; LE-32BIT-NEXT: addi 11, 30, -128
+; LE-32BIT-NEXT: lwz 9, 16(3)
+; LE-32BIT-NEXT: subfic 25, 30, 96
+; LE-32BIT-NEXT: lwz 26, 12(3)
+; LE-32BIT-NEXT: addi 0, 30, -64
+; LE-32BIT-NEXT: lwz 7, 8(3)
+; LE-32BIT-NEXT: addi 3, 30, -192
+; LE-32BIT-NEXT: subfic 27, 30, 32
+; LE-32BIT-NEXT: slw 23, 12, 23
+; LE-32BIT-NEXT: srw 16, 29, 3
+; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 20, 8, 30
+; LE-32BIT-NEXT: sraw 15, 12, 21
+; LE-32BIT-NEXT: cmpwi 1, 21, 1
+; LE-32BIT-NEXT: slw 21, 7, 4
+; LE-32BIT-NEXT: srw 14, 26, 11
+; LE-32BIT-NEXT: slw 31, 9, 25
+; LE-32BIT-NEXT: srw 3, 10, 0
+; LE-32BIT-NEXT: or 23, 16, 23
+; LE-32BIT-NEXT: slw 16, 5, 27
+; LE-32BIT-NEXT: srw 19, 10, 30
+; LE-32BIT-NEXT: or 21, 14, 21
+; LE-32BIT-NEXT: slw 14, 9, 27
+; LE-32BIT-NEXT: or 3, 3, 31
+; LE-32BIT-NEXT: slw 31, 12, 4
+; LE-32BIT-NEXT: or 20, 20, 16
+; LE-32BIT-NEXT: srw 16, 29, 11
+; LE-32BIT-NEXT: or 19, 19, 14
+; LE-32BIT-NEXT: slw 14, 12, 25
+; LE-32BIT-NEXT: or 16, 16, 31
+; LE-32BIT-NEXT: srw 31, 29, 0
+; LE-32BIT-NEXT: addi 24, 30, -160
+; LE-32BIT-NEXT: srw 18, 26, 30
+; LE-32BIT-NEXT: or 14, 31, 14
+; LE-32BIT-NEXT: slw 31, 7, 27
+; LE-32BIT-NEXT: addi 28, 30, -96
+; LE-32BIT-NEXT: srw 17, 29, 30
+; LE-32BIT-NEXT: stw 4, 32(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 18, 18, 31
+; LE-32BIT-NEXT: slw 31, 12, 27
+; LE-32BIT-NEXT: bc 12, 4, .LBB11_2
+; LE-32BIT-NEXT: # %bb.1:
+; LE-32BIT-NEXT: ori 4, 15, 0
+; LE-32BIT-NEXT: b .LBB11_3
+; LE-32BIT-NEXT: .LBB11_2:
+; LE-32BIT-NEXT: addi 4, 23, 0
+; LE-32BIT-NEXT: .LBB11_3:
+; LE-32BIT-NEXT: srw 15, 7, 24
+; LE-32BIT-NEXT: or 17, 17, 31
+; LE-32BIT-NEXT: addi 31, 30, -32
+; LE-32BIT-NEXT: or 21, 21, 15
+; LE-32BIT-NEXT: srw 15, 9, 28
+; LE-32BIT-NEXT: or 3, 3, 15
+; LE-32BIT-NEXT: srw 15, 5, 31
+; LE-32BIT-NEXT: or 20, 20, 15
+; LE-32BIT-NEXT: srw 15, 9, 31
+; LE-32BIT-NEXT: stw 3, 28(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: or 3, 19, 15
+; LE-32BIT-NEXT: subfic 15, 30, 64
+; LE-32BIT-NEXT: stw 4, 24(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: cmpwi 1, 24, 1
+; LE-32BIT-NEXT: sraw 24, 12, 24
+; LE-32BIT-NEXT: subfic 4, 15, 32
+; LE-32BIT-NEXT: stw 0, 52(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 0, 26, 4
+; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: bc 12, 4, .LBB11_5
+; LE-32BIT-NEXT: # %bb.4:
+; LE-32BIT-NEXT: ori 3, 24, 0
+; LE-32BIT-NEXT: b .LBB11_6
+; LE-32BIT-NEXT: .LBB11_5:
+; LE-32BIT-NEXT: addi 3, 16, 0
+; LE-32BIT-NEXT: .LBB11_6:
+; LE-32BIT-NEXT: slw 16, 7, 15
+; LE-32BIT-NEXT: or 0, 16, 0
+; LE-32BIT-NEXT: subfic 16, 30, 128
+; LE-32BIT-NEXT: stw 5, 36(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: subfic 5, 16, 32
+; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: slw 3, 12, 16
+; LE-32BIT-NEXT: srw 22, 29, 5
+; LE-32BIT-NEXT: stw 8, 60(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: mr 8, 10
+; LE-32BIT-NEXT: mr 10, 27
+; LE-32BIT-NEXT: or 23, 3, 22
+; LE-32BIT-NEXT: slw 22, 7, 16
+; LE-32BIT-NEXT: srw 27, 26, 5
+; LE-32BIT-NEXT: stw 11, 40(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: mr 6, 26
+; LE-32BIT-NEXT: or 11, 22, 27
+; LE-32BIT-NEXT: slw 22, 9, 15
+; LE-32BIT-NEXT: srw 26, 8, 4
+; LE-32BIT-NEXT: subfic 3, 30, 192
+; LE-32BIT-NEXT: or 26, 22, 26
+; LE-32BIT-NEXT: cmpwi 1, 28, 1
+; LE-32BIT-NEXT: sraw 22, 12, 28
+; LE-32BIT-NEXT: subfic 19, 3, 32
+; LE-32BIT-NEXT: srw 4, 29, 4
+; LE-32BIT-NEXT: slw 28, 12, 15
+; LE-32BIT-NEXT: stw 9, 20(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: srw 19, 29, 19
+; LE-32BIT-NEXT: slw 24, 12, 3
+; LE-32BIT-NEXT: or 9, 28, 4
+; LE-32BIT-NEXT: lwz 4, 64(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 24, 24, 19
+; LE-32BIT-NEXT: bc 12, 4, .LBB11_7
+; LE-32BIT-NEXT: b .LBB11_8
+; LE-32BIT-NEXT: .LBB11_7:
+; LE-32BIT-NEXT: addi 22, 14, 0
+; LE-32BIT-NEXT: .LBB11_8:
+; LE-32BIT-NEXT: srw 19, 7, 31
+; LE-32BIT-NEXT: cmplwi 5, 30, 64
+; LE-32BIT-NEXT: cmplwi 1, 30, 128
+; LE-32BIT-NEXT: slw 3, 29, 3
+; LE-32BIT-NEXT: or 19, 18, 19
+; LE-32BIT-NEXT: cmpwi 6, 31, 1
+; LE-32BIT-NEXT: sraw 18, 12, 31
+; LE-32BIT-NEXT: crand 28, 4, 20
+; LE-32BIT-NEXT: srawi 14, 12, 31
+; LE-32BIT-NEXT: sraw 31, 12, 30
+; LE-32BIT-NEXT: or 3, 21, 3
+; LE-32BIT-NEXT: slw 21, 8, 15
+; LE-32BIT-NEXT: bc 12, 24, .LBB11_10
+; LE-32BIT-NEXT: # %bb.9:
+; LE-32BIT-NEXT: ori 28, 18, 0
+; LE-32BIT-NEXT: b .LBB11_11
+; LE-32BIT-NEXT: .LBB11_10:
+; LE-32BIT-NEXT: addi 28, 17, 0
+; LE-32BIT-NEXT: .LBB11_11:
+; LE-32BIT-NEXT: bc 12, 28, .LBB11_13
+; LE-32BIT-NEXT: # %bb.12:
+; LE-32BIT-NEXT: ori 18, 14, 0
+; LE-32BIT-NEXT: b .LBB11_14
+; LE-32BIT-NEXT: .LBB11_13:
+; LE-32BIT-NEXT: addi 18, 31, 0
+; LE-32BIT-NEXT: .LBB11_14:
+; LE-32BIT-NEXT: or 21, 20, 21
+; LE-32BIT-NEXT: subfic 20, 16, 64
+; LE-32BIT-NEXT: stw 18, 0(4)
+; LE-32BIT-NEXT: subfic 18, 20, 32
+; LE-32BIT-NEXT: slw 18, 7, 18
+; LE-32BIT-NEXT: srw 17, 6, 20
+; LE-32BIT-NEXT: or 18, 17, 18
+; LE-32BIT-NEXT: slw 17, 6, 10
+; LE-32BIT-NEXT: or 27, 0, 17
+; LE-32BIT-NEXT: slw 0, 29, 25
+; LE-32BIT-NEXT: mr 31, 8
+; LE-32BIT-NEXT: or 8, 23, 0
+; LE-32BIT-NEXT: slw 0, 6, 25
+; LE-32BIT-NEXT: or 11, 11, 0
+; LE-32BIT-NEXT: stw 11, 16(1) # 4-byte Folded Spill
+; LE-32BIT-NEXT: slw 0, 31, 10
+; LE-32BIT-NEXT: lwz 11, 32(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 0, 26, 0
+; LE-32BIT-NEXT: slw 25, 29, 10
+; LE-32BIT-NEXT: or 23, 9, 25
+; LE-32BIT-NEXT: slw 26, 29, 11
+; LE-32BIT-NEXT: or 26, 24, 26
+; LE-32BIT-NEXT: slw 24, 29, 15
+; LE-32BIT-NEXT: or 24, 19, 24
+; LE-32BIT-NEXT: lwz 19, 40(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: srw 25, 7, 20
+; LE-32BIT-NEXT: lwz 9, 24(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 8, 8, 25
+; LE-32BIT-NEXT: cmplwi 6, 19, 64
+; LE-32BIT-NEXT: srw 5, 7, 5
+; LE-32BIT-NEXT: bc 12, 24, .LBB11_16
+; LE-32BIT-NEXT: # %bb.15:
+; LE-32BIT-NEXT: ori 3, 9, 0
+; LE-32BIT-NEXT: b .LBB11_16
+; LE-32BIT-NEXT: .LBB11_16:
+; LE-32BIT-NEXT: lwz 9, 28(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 5, 18, 5
+; LE-32BIT-NEXT: lwz 17, 20(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: mr 18, 4
+; LE-32BIT-NEXT: bc 12, 20, .LBB11_18
+; LE-32BIT-NEXT: # %bb.17:
+; LE-32BIT-NEXT: ori 10, 9, 0
+; LE-32BIT-NEXT: b .LBB11_19
+; LE-32BIT-NEXT: .LBB11_18:
+; LE-32BIT-NEXT: addi 10, 21, 0
+; LE-32BIT-NEXT: .LBB11_19:
+; LE-32BIT-NEXT: lwz 9, 36(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 20, .LBB11_21
+; LE-32BIT-NEXT: # %bb.20:
+; LE-32BIT-NEXT: ori 24, 22, 0
+; LE-32BIT-NEXT: b .LBB11_21
+; LE-32BIT-NEXT: .LBB11_21:
+; LE-32BIT-NEXT: cmplwi 2, 19, 0
+; LE-32BIT-NEXT: bc 12, 10, .LBB11_22
+; LE-32BIT-NEXT: b .LBB11_23
+; LE-32BIT-NEXT: .LBB11_22:
+; LE-32BIT-NEXT: addi 3, 6, 0
+; LE-32BIT-NEXT: .LBB11_23:
+; LE-32BIT-NEXT: cmplwi 3, 16, 0
+; LE-32BIT-NEXT: srw 25, 9, 30
+; LE-32BIT-NEXT: or 25, 25, 0
+; LE-32BIT-NEXT: srw 0, 7, 19
+; LE-32BIT-NEXT: or 26, 0, 26
+; LE-32BIT-NEXT: srw 0, 7, 30
+; LE-32BIT-NEXT: or 11, 0, 23
+; LE-32BIT-NEXT: bc 12, 28, .LBB11_25
+; LE-32BIT-NEXT: # %bb.24:
+; LE-32BIT-NEXT: ori 0, 14, 0
+; LE-32BIT-NEXT: b .LBB11_26
+; LE-32BIT-NEXT: .LBB11_25:
+; LE-32BIT-NEXT: addi 0, 28, 0
+; LE-32BIT-NEXT: .LBB11_26:
+; LE-32BIT-NEXT: slw 28, 6, 16
+; LE-32BIT-NEXT: stw 0, 4(4)
+; LE-32BIT-NEXT: slw 0, 29, 16
+; LE-32BIT-NEXT: lwz 4, 52(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: or 5, 0, 5
+; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: cmplwi 7, 16, 64
+; LE-32BIT-NEXT: slw 23, 6, 15
+; LE-32BIT-NEXT: srw 22, 17, 4
+; LE-32BIT-NEXT: li 15, 0
+; LE-32BIT-NEXT: sraw 21, 12, 0
+; LE-32BIT-NEXT: bc 12, 28, .LBB11_28
+; LE-32BIT-NEXT: # %bb.27:
+; LE-32BIT-NEXT: ori 0, 15, 0
+; LE-32BIT-NEXT: b .LBB11_29
+; LE-32BIT-NEXT: .LBB11_28:
+; LE-32BIT-NEXT: addi 0, 28, 0
+; LE-32BIT-NEXT: .LBB11_29:
+; LE-32BIT-NEXT: bc 12, 20, .LBB11_31
+; LE-32BIT-NEXT: # %bb.30:
+; LE-32BIT-NEXT: ori 28, 22, 0
+; LE-32BIT-NEXT: b .LBB11_32
+; LE-32BIT-NEXT: .LBB11_31:
+; LE-32BIT-NEXT: addi 28, 25, 0
+; LE-32BIT-NEXT: .LBB11_32:
+; LE-32BIT-NEXT: bc 12, 2, .LBB11_34
+; LE-32BIT-NEXT: # %bb.33:
+; LE-32BIT-NEXT: ori 22, 24, 0
+; LE-32BIT-NEXT: b .LBB11_35
+; LE-32BIT-NEXT: .LBB11_34:
+; LE-32BIT-NEXT: addi 22, 6, 0
+; LE-32BIT-NEXT: .LBB11_35:
+; LE-32BIT-NEXT: lwz 6, 48(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: sraw 20, 12, 4
+; LE-32BIT-NEXT: lwz 16, 60(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 20, .LBB11_37
+; LE-32BIT-NEXT: # %bb.36:
+; LE-32BIT-NEXT: ori 4, 20, 0
+; LE-32BIT-NEXT: b .LBB11_38
+; LE-32BIT-NEXT: .LBB11_37:
+; LE-32BIT-NEXT: addi 4, 11, 0
+; LE-32BIT-NEXT: .LBB11_38:
+; LE-32BIT-NEXT: srw 30, 17, 30
+; LE-32BIT-NEXT: bc 12, 20, .LBB11_40
+; LE-32BIT-NEXT: # %bb.39:
+; LE-32BIT-NEXT: ori 25, 15, 0
+; LE-32BIT-NEXT: b .LBB11_41
+; LE-32BIT-NEXT: .LBB11_40:
+; LE-32BIT-NEXT: addi 25, 6, 0
+; LE-32BIT-NEXT: .LBB11_41:
+; LE-32BIT-NEXT: lwz 6, 44(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 28, .LBB11_43
+; LE-32BIT-NEXT: # %bb.42:
+; LE-32BIT-NEXT: ori 8, 27, 0
+; LE-32BIT-NEXT: ori 5, 23, 0
+; LE-32BIT-NEXT: b .LBB11_43
+; LE-32BIT-NEXT: .LBB11_43:
+; LE-32BIT-NEXT: bc 12, 2, .LBB11_44
+; LE-32BIT-NEXT: b .LBB11_45
+; LE-32BIT-NEXT: .LBB11_44:
+; LE-32BIT-NEXT: addi 4, 7, 0
+; LE-32BIT-NEXT: .LBB11_45:
+; LE-32BIT-NEXT: sraw 19, 12, 19
+; LE-32BIT-NEXT: bc 12, 2, .LBB11_46
+; LE-32BIT-NEXT: b .LBB11_47
+; LE-32BIT-NEXT: .LBB11_46:
+; LE-32BIT-NEXT: addi 10, 16, 0
+; LE-32BIT-NEXT: .LBB11_47:
+; LE-32BIT-NEXT: bc 12, 24, .LBB11_49
+; LE-32BIT-NEXT: # %bb.48:
+; LE-32BIT-NEXT: ori 26, 21, 0
+; LE-32BIT-NEXT: b .LBB11_49
+; LE-32BIT-NEXT: .LBB11_49:
+; LE-32BIT-NEXT: bc 12, 14, .LBB11_50
+; LE-32BIT-NEXT: b .LBB11_51
+; LE-32BIT-NEXT: .LBB11_50:
+; LE-32BIT-NEXT: addi 5, 29, 0
+; LE-32BIT-NEXT: .LBB11_51:
+; LE-32BIT-NEXT: bc 12, 4, .LBB11_53
+; LE-32BIT-NEXT: # %bb.52:
+; LE-32BIT-NEXT: ori 4, 14, 0
+; LE-32BIT-NEXT: b .LBB11_53
+; LE-32BIT-NEXT: .LBB11_53:
+; LE-32BIT-NEXT: or 10, 10, 0
+; LE-32BIT-NEXT: bc 12, 24, .LBB11_55
+; LE-32BIT-NEXT: # %bb.54:
+; LE-32BIT-NEXT: ori 24, 14, 0
+; LE-32BIT-NEXT: b .LBB11_56
+; LE-32BIT-NEXT: .LBB11_55:
+; LE-32BIT-NEXT: addi 24, 6, 0
+; LE-32BIT-NEXT: .LBB11_56:
+; LE-32BIT-NEXT: lwz 6, 16(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: bc 12, 4, .LBB11_57
+; LE-32BIT-NEXT: b .LBB11_58
+; LE-32BIT-NEXT: .LBB11_57:
+; LE-32BIT-NEXT: addi 3, 10, 0
+; LE-32BIT-NEXT: .LBB11_58:
+; LE-32BIT-NEXT: bc 12, 20, .LBB11_60
+; LE-32BIT-NEXT: # %bb.59:
+; LE-32BIT-NEXT: ori 0, 15, 0
+; LE-32BIT-NEXT: b .LBB11_61
+; LE-32BIT-NEXT: .LBB11_60:
+; LE-32BIT-NEXT: addi 0, 30, 0
+; LE-32BIT-NEXT: .LBB11_61:
+; LE-32BIT-NEXT: bc 12, 24, .LBB11_63
+; LE-32BIT-NEXT: # %bb.62:
+; LE-32BIT-NEXT: ori 30, 14, 0
+; LE-32BIT-NEXT: b .LBB11_64
+; LE-32BIT-NEXT: .LBB11_63:
+; LE-32BIT-NEXT: addi 30, 19, 0
+; LE-32BIT-NEXT: .LBB11_64:
+; LE-32BIT-NEXT: bc 12, 2, .LBB11_65
+; LE-32BIT-NEXT: b .LBB11_66
+; LE-32BIT-NEXT: .LBB11_65:
+; LE-32BIT-NEXT: addi 3, 16, 0
+; LE-32BIT-NEXT: .LBB11_66:
+; LE-32BIT-NEXT: stw 4, 8(18)
+; LE-32BIT-NEXT: bc 12, 28, .LBB11_68
+; LE-32BIT-NEXT: # %bb.67:
+; LE-32BIT-NEXT: ori 27, 15, 0
+; LE-32BIT-NEXT: b .LBB11_69
+; LE-32BIT-NEXT: .LBB11_68:
+; LE-32BIT-NEXT: addi 27, 6, 0
+; LE-32BIT-NEXT: .LBB11_69:
+; LE-32BIT-NEXT: bc 12, 14, .LBB11_71
+; LE-32BIT-NEXT: # %bb.70:
+; LE-32BIT-NEXT: ori 6, 8, 0
+; LE-32BIT-NEXT: b .LBB11_72
+; LE-32BIT-NEXT: .LBB11_71:
+; LE-32BIT-NEXT: addi 6, 12, 0
+; LE-32BIT-NEXT: .LBB11_72:
+; LE-32BIT-NEXT: bc 12, 2, .LBB11_74
+; LE-32BIT-NEXT: # %bb.73:
+; LE-32BIT-NEXT: ori 8, 28, 0
+; LE-32BIT-NEXT: b .LBB11_75
+; LE-32BIT-NEXT: .LBB11_74:
+; LE-32BIT-NEXT: addi 8, 9, 0
+; LE-32BIT-NEXT: .LBB11_75:
+; LE-32BIT-NEXT: bc 12, 10, .LBB11_77
+; LE-32BIT-NEXT: # %bb.76:
+; LE-32BIT-NEXT: ori 28, 26, 0
+; LE-32BIT-NEXT: b .LBB11_78
+; LE-32BIT-NEXT: .LBB11_77:
+; LE-32BIT-NEXT: addi 28, 7, 0
+; LE-32BIT-NEXT: .LBB11_78:
+; LE-32BIT-NEXT: stw 3, 28(18)
+; LE-32BIT-NEXT: or 7, 8, 27
+; LE-32BIT-NEXT: or 4, 0, 6
+; LE-32BIT-NEXT: or 3, 25, 5
+; LE-32BIT-NEXT: bc 12, 4, .LBB11_80
+; LE-32BIT-NEXT: # %bb.79:
+; LE-32BIT-NEXT: ori 6, 28, 0
+; LE-32BIT-NEXT: ori 4, 30, 0
+; LE-32BIT-NEXT: ori 3, 24, 0
+; LE-32BIT-NEXT: ori 12, 14, 0
+; LE-32BIT-NEXT: b .LBB11_81
+; LE-32BIT-NEXT: .LBB11_80:
+; LE-32BIT-NEXT: addi 6, 7, 0
+; LE-32BIT-NEXT: addi 12, 22, 0
+; LE-32BIT-NEXT: .LBB11_81:
+; LE-32BIT-NEXT: bc 12, 2, .LBB11_83
+; LE-32BIT-NEXT: # %bb.82:
+; LE-32BIT-NEXT: ori 5, 6, 0
+; LE-32BIT-NEXT: b .LBB11_84
+; LE-32BIT-NEXT: .LBB11_83:
+; LE-32BIT-NEXT: addi 5, 9, 0
+; LE-32BIT-NEXT: addi 4, 17, 0
+; LE-32BIT-NEXT: addi 3, 31, 0
+; LE-32BIT-NEXT: .LBB11_84:
+; LE-32BIT-NEXT: stw 12, 12(18)
+; LE-32BIT-NEXT: stw 5, 24(18)
+; LE-32BIT-NEXT: stw 4, 16(18)
+; LE-32BIT-NEXT: stw 3, 20(18)
+; LE-32BIT-NEXT: lwz 12, 68(1)
+; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: mtcrf 32, 12 # cr2
+; LE-32BIT-NEXT: mtcrf 16, 12 # cr3
+; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload
+; LE-32BIT-NEXT: addi 1, 1, 144
+; LE-32BIT-NEXT: blr
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = ashr i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LE: {{.*}}
diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll
new file mode 100644
index 0000000000000..c6d3a5f7a90bd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll
@@ -0,0 +1,8030 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-NO-SHLD,X64-NO-BMI2-NO-SHLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-NO-BMI2,X64-SHLD,X64-NO-BMI2-HAVE-SHLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-NO-SHLD,X64-HAVE-BMI2-NO-SHLD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-BMI2,X64-SHLD,X64-HAVE-BMI2-HAVE-SHLD
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-NO-SHLD,X32-NO-BMI2-NO-SHLD
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-NO-BMI2,X32-SHLD,X32-NO-BMI2-HAVE-SHLD
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-NO-SHLD,X32-HAVE-BMI2-NO-SHLD
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X32,X32-BMI2,X32-SHLD,X32-HAVE-BMI2-HAVE-SHLD
+
+define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-LABEL: lshr_4bytes:
+; X64-NO-BMI2: # %bb.0:
+; X64-NO-BMI2-NEXT: movl (%rdi), %eax
+; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-NEXT: shlb $3, %cl
+; X64-NO-BMI2-NEXT: shrl %cl, %eax
+; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
+; X64-NO-BMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: lshr_4bytes:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movzbl (%rsi), %eax
+; X64-BMI2-NEXT: shlb $3, %al
+; X64-BMI2-NEXT: shrxl %eax, (%rdi), %eax
+; X64-BMI2-NEXT: movl %eax, (%rdx)
+; X64-BMI2-NEXT: retq
+;
+; X32-NO-BMI2-LABEL: lshr_4bytes:
+; X32-NO-BMI2: # %bb.0:
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NEXT: movl (%edx), %edx
+; X32-NO-BMI2-NEXT: movzbl (%ecx), %ecx
+; X32-NO-BMI2-NEXT: shlb $3, %cl
+; X32-NO-BMI2-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-NEXT: retl
+;
+; X32-BMI2-LABEL: lshr_4bytes:
+; X32-BMI2: # %bb.0:
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-BMI2-NEXT: movzbl (%edx), %edx
+; X32-BMI2-NEXT: shlb $3, %dl
+; X32-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
+; X32-BMI2-NEXT: movl %ecx, (%eax)
+; X32-BMI2-NEXT: retl
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = lshr i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-LABEL: shl_4bytes:
+; X64-NO-BMI2: # %bb.0:
+; X64-NO-BMI2-NEXT: movl (%rdi), %eax
+; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-NEXT: shlb $3, %cl
+; X64-NO-BMI2-NEXT: shll %cl, %eax
+; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
+; X64-NO-BMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: shl_4bytes:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movzbl (%rsi), %eax
+; X64-BMI2-NEXT: shlb $3, %al
+; X64-BMI2-NEXT: shlxl %eax, (%rdi), %eax
+; X64-BMI2-NEXT: movl %eax, (%rdx)
+; X64-BMI2-NEXT: retq
+;
+; X32-NO-BMI2-LABEL: shl_4bytes:
+; X32-NO-BMI2: # %bb.0:
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NEXT: movl (%edx), %edx
+; X32-NO-BMI2-NEXT: movzbl (%ecx), %ecx
+; X32-NO-BMI2-NEXT: shlb $3, %cl
+; X32-NO-BMI2-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-NEXT: retl
+;
+; X32-BMI2-LABEL: shl_4bytes:
+; X32-BMI2: # %bb.0:
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-BMI2-NEXT: movzbl (%edx), %edx
+; X32-BMI2-NEXT: shlb $3, %dl
+; X32-BMI2-NEXT: shlxl %edx, (%ecx), %ecx
+; X32-BMI2-NEXT: movl %ecx, (%eax)
+; X32-BMI2-NEXT: retl
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = shl i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-LABEL: ashr_4bytes:
+; X64-NO-BMI2: # %bb.0:
+; X64-NO-BMI2-NEXT: movl (%rdi), %eax
+; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-NEXT: shlb $3, %cl
+; X64-NO-BMI2-NEXT: sarl %cl, %eax
+; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
+; X64-NO-BMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: ashr_4bytes:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movzbl (%rsi), %eax
+; X64-BMI2-NEXT: shlb $3, %al
+; X64-BMI2-NEXT: sarxl %eax, (%rdi), %eax
+; X64-BMI2-NEXT: movl %eax, (%rdx)
+; X64-BMI2-NEXT: retq
+;
+; X32-NO-BMI2-LABEL: ashr_4bytes:
+; X32-NO-BMI2: # %bb.0:
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NEXT: movl (%edx), %edx
+; X32-NO-BMI2-NEXT: movzbl (%ecx), %ecx
+; X32-NO-BMI2-NEXT: shlb $3, %cl
+; X32-NO-BMI2-NEXT: sarl %cl, %edx
+; X32-NO-BMI2-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-NEXT: retl
+;
+; X32-BMI2-LABEL: ashr_4bytes:
+; X32-BMI2: # %bb.0:
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-BMI2-NEXT: movzbl (%edx), %edx
+; X32-BMI2-NEXT: shlb $3, %dl
+; X32-BMI2-NEXT: sarxl %edx, (%ecx), %ecx
+; X32-BMI2-NEXT: movl %ecx, (%eax)
+; X32-BMI2-NEXT: retl
+ %src = load i32, ptr %src.ptr, align 1
+ %byteOff = load i32, ptr %byteOff.ptr, align 1
+ %bitOff = shl i32 %byteOff, 3
+ %res = ashr i32 %src, %bitOff
+ store i32 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-LABEL: lshr_8bytes:
+; X64-NO-BMI2: # %bb.0:
+; X64-NO-BMI2-NEXT: movq (%rdi), %rax
+; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-NEXT: shlb $3, %cl
+; X64-NO-BMI2-NEXT: shrq %cl, %rax
+; X64-NO-BMI2-NEXT: movq %rax, (%rdx)
+; X64-NO-BMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: lshr_8bytes:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movzbl (%rsi), %eax
+; X64-BMI2-NEXT: shlb $3, %al
+; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rax
+; X64-BMI2-NEXT: movq %rax, (%rdx)
+; X64-BMI2-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: lshr_8bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%edx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%edx)
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_8bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_8bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%edx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_8bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = lshr i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-LABEL: shl_8bytes:
+; X64-NO-BMI2: # %bb.0:
+; X64-NO-BMI2-NEXT: movq (%rdi), %rax
+; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-NEXT: shlb $3, %cl
+; X64-NO-BMI2-NEXT: shlq %cl, %rax
+; X64-NO-BMI2-NEXT: movq %rax, (%rdx)
+; X64-NO-BMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: shl_8bytes:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movzbl (%rsi), %eax
+; X64-BMI2-NEXT: shlb $3, %al
+; X64-BMI2-NEXT: shlxq %rax, (%rdi), %rax
+; X64-BMI2-NEXT: movq %rax, (%rdx)
+; X64-BMI2-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: shl_8bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%edx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%edx)
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_8bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_8bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, 4(%edx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_8bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = shl i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-LABEL: ashr_8bytes:
+; X64-NO-BMI2: # %bb.0:
+; X64-NO-BMI2-NEXT: movq (%rdi), %rax
+; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-NEXT: shlb $3, %cl
+; X64-NO-BMI2-NEXT: sarq %cl, %rax
+; X64-NO-BMI2-NEXT: movq %rax, (%rdx)
+; X64-NO-BMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: ashr_8bytes:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movzbl (%rsi), %eax
+; X64-BMI2-NEXT: shlb $3, %al
+; X64-BMI2-NEXT: sarxq %rax, (%rdi), %rax
+; X64-BMI2-NEXT: movq %rax, (%rdx)
+; X64-BMI2-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: ashr_8bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%edx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%edx)
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_8bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_8bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%edx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esi), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %edx, %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_8bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esi), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i64, ptr %src.ptr, align 1
+ %byteOff = load i64, ptr %byteOff.ptr, align 1
+ %bitOff = shl i64 %byteOff, 3
+ %res = ashr i64 %src, %bitOff
+ store i64 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes:
+; X64-NO-BMI2-NO-SHLD: # %bb.0:
+; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rcx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, 8(%rdx)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx)
+; X64-NO-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes:
+; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes:
+; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rcx, (%rdi), %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rcx, %rax, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes:
+; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %rsi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%eax), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = lshr i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-NO-SHLD-LABEL: shl_16bytes:
+; X64-NO-BMI2-NO-SHLD: # %bb.0:
+; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rcx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, 8(%rdx)
+; X64-NO-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes:
+; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes:
+; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rcx, 8(%rdi), %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rcx, %rax, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rdi, %rax, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rsi, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes:
+; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rax, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rsi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: shl_16bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esi), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edi), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%eax), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%ecx)
+; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = shl i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes:
+; X64-NO-BMI2-NO-SHLD: # %bb.0:
+; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rsi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx)
+; X64-NO-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes:
+; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes:
+; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rcx, (%rdi), %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %rcx, %rax, %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rsi, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes:
+; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %rdi, %rsi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %edi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i128, ptr %src.ptr, align 1
+ %byteOff = load i128, ptr %byteOff.ptr, align 1
+ %bitOff = shl i128 %byteOff, 3
+ %res = ashr i128 %src, %bitOff
+ store i128 %res, ptr %dst, align 1
+ ret void
+}
+
+define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes:
+; X64-NO-BMI2-NO-SHLD: # %bb.0:
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10
+; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes:
+; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes:
+; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rcx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r9b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r10, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rax, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r13d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r12, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rbx, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r12d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r12, %r14, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %rbx, %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r10, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes:
+; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %r11, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r11, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: subl $136, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebx), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: addl $136, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebp), %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%esi), %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $160, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esi), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 20(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $160, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $128, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %esi, %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $128, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = lshr i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-NO-SHLD-LABEL: shl_32bytes:
+; X64-NO-BMI2-NO-SHLD: # %bb.0:
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %r11
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r14, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdx,%rdx), %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r11
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, (%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 8(%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 24(%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 16(%r9)
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes:
+; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %rbp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rbp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbp, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r12, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, (%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, 16(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes:
+; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rcx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r8d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rax, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rcx, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r13d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r12, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r14, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r8b, %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r15, %rbp, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r9, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r10, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r8), %ebx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %r12d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r14, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r10, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r9, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %bl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r8b, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 24(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 16(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes:
+; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r8, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r10, %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbp, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r12, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r10, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbx, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, (%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: shl_32bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: subl $140, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ecx), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebx), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebx), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%edi), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, (%esp) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edi), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %edx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: subb %ch, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esi), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %edx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %al # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 12(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 28(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: addl $140, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 24(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 28(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%eax)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $164, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ebp, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 24(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 16(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%ecx)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $164, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $128, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%ebp), %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ebp), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebx), %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebp, %edi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %ch
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %ch
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %ch, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %esi, %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 24(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $128, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = shl i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
+; X64-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes:
+; X64-NO-BMI2-NO-SHLD: # %bb.0:
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r10
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r11
+; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %r11
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r12,%r12), %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r9
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdx, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdx, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %rdx
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9
+; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rdx, %rdi
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r12, %r8
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r10)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r10)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r10)
+; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r10)
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15
+; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp
+; X64-NO-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes:
+; X64-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx)
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15
+; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp
+; X64-NO-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes:
+; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r8d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %rcx, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r11
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rsi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r8, %r10, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r12, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %rbx, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %ebp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bpl
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r13, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %r14, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r15, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rax, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al
+; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r8b, %al
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %r12d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rbp, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r10, %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r14, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r13, %rax
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r8), %r11d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r13d
+; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r15, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r14, %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r11, %r10, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r8b, %r8b
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r9
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 24(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp
+; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
+;
+; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes:
+; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %rsi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r12d, %r12d
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rax, %r11, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r13, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %r11, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbp, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx)
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq
+;
+; X32-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes:
+; X32-NO-BMI2-NO-SHLD: # %bb.0:
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: subl $144, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %bh, %bh
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx
+; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax)
+; X32-NO-BMI2-NO-SHLD-NEXT: addl $144, %esp
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes:
+; X32-NO-BMI2-HAVE-SHLD: # %bb.0:
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $124, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dh, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dh, %dh
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax
+; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx)
+; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $124, %esp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-NO-BMI2-HAVE-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes:
+; X32-HAVE-BMI2-NO-SHLD: # %bb.0:
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $168, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %esi, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %eax, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %edx, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %esi, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edi), %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl def $ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ecx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 20(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax)
+; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $168, %esp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-NO-SHLD-NEXT: retl
+;
+; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes:
+; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0:
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $132, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %esi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %eax, %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edi, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%ecx)
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $132, %esp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp
+; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl
+ %src = load i256, ptr %src.ptr, align 1
+ %byteOff = load i256, ptr %byteOff.ptr, align 1
+ %bitOff = shl i256 %byteOff, 3
+ %res = ashr i256 %src, %bitOff
+ store i256 %res, ptr %dst, align 1
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; ALL: {{.*}}
+; X32: {{.*}}
+; X32-NO-SHLD: {{.*}}
+; X32-SHLD: {{.*}}
+; X64: {{.*}}
+; X64-NO-SHLD: {{.*}}
+; X64-SHLD: {{.*}}
More information about the llvm-commits
mailing list