[llvm] 1cf9b24 - [DAG] Enable ISD::FSHL/R SimplifyMultipleUseDemandedBits handling inside SimplifyDemandedBits
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 12 11:26:01 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-12T19:25:20+01:00
New Revision: 1cf9b24da3b8eb8e7636de2209f192ded5ce20cd
URL: https://github.com/llvm/llvm-project/commit/1cf9b24da3b8eb8e7636de2209f192ded5ce20cd
DIFF: https://github.com/llvm/llvm-project/commit/1cf9b24da3b8eb8e7636de2209f192ded5ce20cd.diff
LOG: [DAG] Enable ISD::FSHL/R SimplifyMultipleUseDemandedBits handling inside SimplifyDemandedBits
This patch allows SimplifyDemandedBits to call SimplifyMultipleUseDemandedBits in cases where the source operand has other uses, enabling us to peek through the shifted value if we don't demand all the bits/elts.
This helps with several of the regressions from D125836
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
llvm/test/CodeGen/X86/funnel-shift.ll
llvm/test/CodeGen/X86/rotate-extract.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/shift-mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57539f3d6b97f..03fc1728ac44a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1877,6 +1877,22 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
+ !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
+ DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
+ DemandedOp1, Op2);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
diff --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index 83fa3fba63bd1..27593cec1d6a6 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -211,36 +211,36 @@ define i1 @test_urem_oversized(i66 %X) nounwind {
; PPC-NEXT: lis 6, -12795
; PPC-NEXT: ori 6, 6, 40665
; PPC-NEXT: mulhwu 7, 5, 6
-; PPC-NEXT: lis 8, 12057
-; PPC-NEXT: ori 8, 8, 37186
-; PPC-NEXT: mullw 10, 4, 6
-; PPC-NEXT: addc 7, 10, 7
+; PPC-NEXT: lis 9, 12057
+; PPC-NEXT: ori 9, 9, 37186
+; PPC-NEXT: mullw 11, 4, 6
+; PPC-NEXT: addc 7, 11, 7
+; PPC-NEXT: lis 11, -5526
+; PPC-NEXT: ori 11, 11, 61135
+; PPC-NEXT: mulhwu 8, 4, 6
+; PPC-NEXT: addze 8, 8
+; PPC-NEXT: mulhwu 10, 5, 9
+; PPC-NEXT: mullw 4, 4, 9
+; PPC-NEXT: mullw 9, 5, 9
+; PPC-NEXT: addc 7, 9, 7
+; PPC-NEXT: addze 9, 10
+; PPC-NEXT: rotlwi 10, 7, 31
; PPC-NEXT: mullw 3, 3, 6
-; PPC-NEXT: mullw 11, 5, 6
-; PPC-NEXT: mulhwu 6, 4, 6
-; PPC-NEXT: addze 6, 6
-; PPC-NEXT: slwi 4, 4, 1
-; PPC-NEXT: mulhwu 9, 5, 8
-; PPC-NEXT: mullw 8, 5, 8
-; PPC-NEXT: addc 7, 8, 7
-; PPC-NEXT: addze 9, 9
+; PPC-NEXT: mullw 6, 5, 6
; PPC-NEXT: slwi 5, 5, 1
-; PPC-NEXT: add 6, 6, 9
; PPC-NEXT: add 3, 5, 3
-; PPC-NEXT: rotlwi 8, 11, 31
-; PPC-NEXT: sub 4, 6, 4
-; PPC-NEXT: lis 5, -5526
-; PPC-NEXT: rlwimi 8, 7, 31, 0, 0
-; PPC-NEXT: rotlwi 7, 7, 31
+; PPC-NEXT: rotlwi 5, 6, 31
+; PPC-NEXT: rlwimi 5, 7, 31, 0, 0
+; PPC-NEXT: add 7, 8, 9
+; PPC-NEXT: add 4, 4, 7
; PPC-NEXT: add 3, 4, 3
-; PPC-NEXT: ori 5, 5, 61135
-; PPC-NEXT: rlwimi 7, 3, 31, 0, 0
-; PPC-NEXT: cmplw 8, 5
-; PPC-NEXT: cmplwi 1, 7, 13
+; PPC-NEXT: rlwimi 10, 3, 31, 0, 0
+; PPC-NEXT: cmplw 5, 11
+; PPC-NEXT: cmplwi 1, 10, 13
; PPC-NEXT: rlwinm 3, 3, 31, 31, 31
; PPC-NEXT: crand 20, 6, 0
; PPC-NEXT: crandc 21, 4, 6
-; PPC-NEXT: rlwimi. 3, 11, 1, 30, 30
+; PPC-NEXT: rlwimi. 3, 6, 1, 30, 30
; PPC-NEXT: cror 20, 20, 21
; PPC-NEXT: crnand 20, 2, 20
; PPC-NEXT: li 3, 1
diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 24a710f734ae8..229a94bb399f1 100644
--- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -801,48 +801,48 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
; AVX-NEXT: pushq %rbx
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9
-; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
+; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: movq %r9, %r8
; AVX-NEXT: shrq $56, %r8
; AVX-NEXT: andl $15, %r8d
; AVX-NEXT: movq %r9, %r10
; AVX-NEXT: shrq $48, %r10
; AVX-NEXT: andl $15, %r10d
-; AVX-NEXT: movq %rcx, %rdx
-; AVX-NEXT: shldq $24, %r9, %rdx
-; AVX-NEXT: andl $15, %edx
+; AVX-NEXT: movq %r9, %rsi
+; AVX-NEXT: shrq $40, %rsi
+; AVX-NEXT: andl $15, %esi
; AVX-NEXT: movq %r9, %r11
; AVX-NEXT: shrq $32, %r11
; AVX-NEXT: andl $15, %r11d
-; AVX-NEXT: movq %rcx, %rdi
+; AVX-NEXT: movq %rdx, %rdi
; AVX-NEXT: shrq $56, %rdi
; AVX-NEXT: andl $15, %edi
-; AVX-NEXT: movq %rcx, %rsi
-; AVX-NEXT: shrq $48, %rsi
-; AVX-NEXT: andl $15, %esi
-; AVX-NEXT: movq %rcx, %rax
-; AVX-NEXT: shrq $40, %rax
+; AVX-NEXT: movq %rdx, %rax
+; AVX-NEXT: shrq $48, %rax
; AVX-NEXT: andl $15, %eax
-; AVX-NEXT: movq %rcx, %rbx
+; AVX-NEXT: movq %rdx, %rcx
+; AVX-NEXT: shrq $40, %rcx
+; AVX-NEXT: andl $15, %ecx
+; AVX-NEXT: movq %rdx, %rbx
; AVX-NEXT: shrq $32, %rbx
; AVX-NEXT: andl $15, %ebx
; AVX-NEXT: shlq $32, %rbx
-; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; AVX-NEXT: orq %rbx, %rcx
-; AVX-NEXT: shlq $40, %rax
+; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; AVX-NEXT: orq %rbx, %rdx
+; AVX-NEXT: shlq $40, %rcx
+; AVX-NEXT: orq %rdx, %rcx
+; AVX-NEXT: shlq $48, %rax
; AVX-NEXT: orq %rcx, %rax
-; AVX-NEXT: shlq $48, %rsi
-; AVX-NEXT: orq %rax, %rsi
; AVX-NEXT: shlq $56, %rdi
-; AVX-NEXT: orq %rsi, %rdi
+; AVX-NEXT: orq %rax, %rdi
; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; AVX-NEXT: shlq $32, %r11
; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F
; AVX-NEXT: orq %r11, %r9
-; AVX-NEXT: shlq $40, %rdx
-; AVX-NEXT: orq %r9, %rdx
+; AVX-NEXT: shlq $40, %rsi
+; AVX-NEXT: orq %r9, %rsi
; AVX-NEXT: shlq $48, %r10
-; AVX-NEXT: orq %rdx, %r10
+; AVX-NEXT: orq %rsi, %r10
; AVX-NEXT: shlq $56, %r8
; AVX-NEXT: orq %r10, %r8
; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
@@ -982,96 +982,96 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX1-NEXT: movq %rax, %r8
+; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: movq %rax, %rdx
; AVX1-NEXT: movq %rax, %rsi
; AVX1-NEXT: movq %rax, %rdi
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: andl $15, %ecx
-; AVX1-NEXT: shlq $32, %rcx
-; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: shrq $40, %rdi
+; AVX1-NEXT: shrq $32, %rdi
; AVX1-NEXT: andl $15, %edi
-; AVX1-NEXT: shlq $40, %rdi
-; AVX1-NEXT: orq %rax, %rdi
-; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX1-NEXT: shrq $48, %rsi
+; AVX1-NEXT: shlq $32, %rdi
+; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; AVX1-NEXT: orq %rdi, %rax
+; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
+; AVX1-NEXT: shrq $40, %rsi
; AVX1-NEXT: andl $15, %esi
-; AVX1-NEXT: shlq $48, %rsi
-; AVX1-NEXT: orq %rdi, %rsi
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $56, %rdx
+; AVX1-NEXT: shlq $40, %rsi
+; AVX1-NEXT: orq %rax, %rsi
+; AVX1-NEXT: movq %rdi, %rax
+; AVX1-NEXT: shrq $48, %rdx
; AVX1-NEXT: andl $15, %edx
-; AVX1-NEXT: shlq $56, %rdx
+; AVX1-NEXT: shlq $48, %rdx
; AVX1-NEXT: orq %rsi, %rdx
-; AVX1-NEXT: movq %rax, %rsi
-; AVX1-NEXT: shldq $24, %rax, %r8
-; AVX1-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movq %rax, %rdx
-; AVX1-NEXT: shrq $32, %rdx
-; AVX1-NEXT: andl $15, %edx
-; AVX1-NEXT: shlq $32, %rdx
-; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX1-NEXT: orq %rdx, %rax
-; AVX1-NEXT: andl $15, %r8d
-; AVX1-NEXT: shlq $40, %r8
-; AVX1-NEXT: orq %rax, %r8
-; AVX1-NEXT: shrq $48, %rsi
-; AVX1-NEXT: andl $15, %esi
-; AVX1-NEXT: shlq $48, %rsi
-; AVX1-NEXT: orq %r8, %rsi
+; AVX1-NEXT: movq %rdi, %rsi
; AVX1-NEXT: shrq $56, %rcx
; AVX1-NEXT: andl $15, %ecx
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: shlq $56, %rcx
-; AVX1-NEXT: orq %rsi, %rcx
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: orq %rdx, %rcx
+; AVX1-NEXT: movq %rdi, %rdx
; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: vmovd %eax, %xmm1
-; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $24, %ecx
-; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: movq %rdi, %rcx
; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $40, %rcx
-; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $48, %rcx
-; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX1-NEXT: andl $15, %ecx
+; AVX1-NEXT: shlq $32, %rcx
+; AVX1-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
+; AVX1-NEXT: orq %rcx, %rdi
+; AVX1-NEXT: shrq $40, %rdx
+; AVX1-NEXT: andl $15, %edx
+; AVX1-NEXT: shlq $40, %rdx
+; AVX1-NEXT: orq %rdi, %rdx
+; AVX1-NEXT: shrq $48, %rsi
+; AVX1-NEXT: andl $15, %esi
+; AVX1-NEXT: shlq $48, %rsi
+; AVX1-NEXT: orq %rdx, %rsi
; AVX1-NEXT: shrq $56, %rax
-; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
+; AVX1-NEXT: andl $15, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: shlq $56, %rax
+; AVX1-NEXT: orq %rsi, %rax
+; AVX1-NEXT: vmovq %xmm0, %rcx
+; AVX1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movl %ecx, %eax
; AVX1-NEXT: shrl $8, %eax
-; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %ecx, %xmm1
+; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX1-NEXT: movl %ecx, %eax
; AVX1-NEXT: shrl $16, %eax
-; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX1-NEXT: movl %ecx, %eax
; AVX1-NEXT: shrl $24, %eax
-; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: shrq $32, %rax
-; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: shrq $40, %rax
-; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: shrq $48, %rax
-; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: shrq $56, %rcx
-; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
+; AVX1-NEXT: movl %eax, %ecx
+; AVX1-NEXT: shrl $8, %ecx
+; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movl %eax, %ecx
+; AVX1-NEXT: shrl $16, %ecx
+; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movl %eax, %ecx
+; AVX1-NEXT: shrl $24, %ecx
+; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq $32, %rcx
+; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq $40, %rcx
+; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq $48, %rcx
+; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: shrq $56, %rax
+; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -1080,96 +1080,96 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: movq %rax, %rdx
; AVX2-NEXT: movq %rax, %rsi
; AVX2-NEXT: movq %rax, %rdi
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: shlq $32, %rcx
-; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX2-NEXT: orq %rcx, %rax
-; AVX2-NEXT: shrq $40, %rdi
+; AVX2-NEXT: shrq $32, %rdi
; AVX2-NEXT: andl $15, %edi
-; AVX2-NEXT: shlq $40, %rdi
-; AVX2-NEXT: orq %rax, %rdi
-; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX2-NEXT: shrq $48, %rsi
+; AVX2-NEXT: shlq $32, %rdi
+; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; AVX2-NEXT: orq %rdi, %rax
+; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
+; AVX2-NEXT: shrq $40, %rsi
; AVX2-NEXT: andl $15, %esi
-; AVX2-NEXT: shlq $48, %rsi
-; AVX2-NEXT: orq %rdi, %rsi
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $56, %rdx
+; AVX2-NEXT: shlq $40, %rsi
+; AVX2-NEXT: orq %rax, %rsi
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: shrq $48, %rdx
; AVX2-NEXT: andl $15, %edx
-; AVX2-NEXT: shlq $56, %rdx
+; AVX2-NEXT: shlq $48, %rdx
; AVX2-NEXT: orq %rsi, %rdx
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shldq $24, %rax, %r8
-; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movq %rax, %rdx
-; AVX2-NEXT: shrq $32, %rdx
-; AVX2-NEXT: andl $15, %edx
-; AVX2-NEXT: shlq $32, %rdx
-; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX2-NEXT: orq %rdx, %rax
-; AVX2-NEXT: andl $15, %r8d
-; AVX2-NEXT: shlq $40, %r8
-; AVX2-NEXT: orq %rax, %r8
-; AVX2-NEXT: shrq $48, %rsi
-; AVX2-NEXT: andl $15, %esi
-; AVX2-NEXT: shlq $48, %rsi
-; AVX2-NEXT: orq %r8, %rsi
+; AVX2-NEXT: movq %rdi, %rsi
; AVX2-NEXT: shrq $56, %rcx
; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: shlq $56, %rcx
-; AVX2-NEXT: orq %rsi, %rcx
-; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: orq %rdx, %rcx
+; AVX2-NEXT: movq %rdi, %rdx
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $8, %ecx
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $16, %ecx
-; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $24, %ecx
-; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: movq %rdi, %rcx
; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $40, %rcx
-; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $48, %rcx
-; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT: andl $15, %ecx
+; AVX2-NEXT: shlq $32, %rcx
+; AVX2-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
+; AVX2-NEXT: orq %rcx, %rdi
+; AVX2-NEXT: shrq $40, %rdx
+; AVX2-NEXT: andl $15, %edx
+; AVX2-NEXT: shlq $40, %rdx
+; AVX2-NEXT: orq %rdi, %rdx
+; AVX2-NEXT: shrq $48, %rsi
+; AVX2-NEXT: andl $15, %esi
+; AVX2-NEXT: shlq $48, %rsi
+; AVX2-NEXT: orq %rdx, %rsi
; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
+; AVX2-NEXT: andl $15, %eax
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: shlq $56, %rax
+; AVX2-NEXT: orq %rsi, %rax
+; AVX2-NEXT: vmovq %xmm0, %rcx
+; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vmovd %ecx, %xmm1
+; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrl $24, %eax
-; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: shrq $32, %rax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: shrq $40, %rax
-; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; AVX2-NEXT: movq %rcx, %rax
; AVX2-NEXT: shrq $48, %rax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: shrq $56, %rcx
-; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: shrl $8, %ecx
+; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: shrl $16, %ecx
+; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: shrl $24, %ecx
+; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shrq $32, %rcx
+; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shrq $40, %rcx
+; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shrq $48, %rcx
+; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: shrq $56, %rax
+; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
index d04819a526bae..8f4a716d71cae 100644
--- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
@@ -1996,13 +1996,12 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
; X86-LABEL: test_i64_140737488289792_mask_shl_15:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: shll $16, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $32767, %edx # imm = 0x7FFF
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shldl $15, %ecx, %edx
-; X86-NEXT: shll $31, %eax
+; X86-NEXT: shldl $15, %eax, %edx
+; X86-NEXT: andl $65536, %eax # imm = 0x10000
+; X86-NEXT: shll $15, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_i64_140737488289792_mask_shl_15:
@@ -2018,8 +2017,7 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
; X86-LABEL: test_i64_140737488289792_mask_shl_16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shll $16, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl $32767, %edx # imm = 0x7FFF
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $16, %eax, %edx
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 820b66f6179c2..a2dab38acb9a4 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -978,35 +978,34 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
; X86-SSE2-LABEL: PR45265:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT: leal (%eax,%eax,2), %edi
-; X86-SSE2-NEXT: movzwl 8(%esi,%edi,4), %ebx
-; X86-SSE2-NEXT: movsbl 10(%esi,%edi,4), %ecx
-; X86-SSE2-NEXT: movl %ecx, %edx
-; X86-SSE2-NEXT: shll $16, %edx
-; X86-SSE2-NEXT: orl %ebx, %edx
-; X86-SSE2-NEXT: movl 4(%esi,%edi,4), %esi
-; X86-SSE2-NEXT: shrdl $8, %edx, %esi
-; X86-SSE2-NEXT: xorl %eax, %esi
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT: leal (%eax,%eax,2), %edx
+; X86-SSE2-NEXT: movzwl 8(%ecx,%edx,4), %esi
+; X86-SSE2-NEXT: movl 4(%ecx,%edx,4), %edi
+; X86-SSE2-NEXT: shrdl $8, %esi, %edi
+; X86-SSE2-NEXT: xorl %eax, %edi
; X86-SSE2-NEXT: sarl $31, %eax
+; X86-SSE2-NEXT: movzbl 10(%ecx,%edx,4), %ecx
+; X86-SSE2-NEXT: shll $16, %ecx
+; X86-SSE2-NEXT: orl %esi, %ecx
+; X86-SSE2-NEXT: shll $8, %ecx
+; X86-SSE2-NEXT: movl %ecx, %edx
+; X86-SSE2-NEXT: sarl $8, %edx
; X86-SSE2-NEXT: sarl $31, %ecx
; X86-SSE2-NEXT: shldl $24, %edx, %ecx
; X86-SSE2-NEXT: xorl %eax, %ecx
-; X86-SSE2-NEXT: orl %ecx, %esi
+; X86-SSE2-NEXT: orl %ecx, %edi
; X86-SSE2-NEXT: jne .LBB46_1
; X86-SSE2-NEXT: # %bb.2:
; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: popl %edi
-; X86-SSE2-NEXT: popl %ebx
; X86-SSE2-NEXT: jmp _Z3foov # TAILCALL
; X86-SSE2-NEXT: .LBB46_1:
; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: popl %edi
-; X86-SSE2-NEXT: popl %ebx
; X86-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: PR45265:
diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll
index f8e465e0ea785..54de5c2cafc3e 100644
--- a/llvm/test/CodeGen/X86/rotate-extract.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract.ll
@@ -13,11 +13,12 @@ define i64 @rolq_extract_shl(i64 %i) nounwind {
; X86-LABEL: rolq_extract_shl:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shldl $3, %edx, %ecx
+; X86-NEXT: shll $3, %eax
; X86-NEXT: shll $3, %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: shldl $7, %ecx, %eax
+; X86-NEXT: shrdl $25, %edx, %eax
; X86-NEXT: shrdl $25, %ecx, %edx
; X86-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 9b964b147d553..fa89da14508c6 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -843,56 +843,56 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl 24(%ebp), %edx
; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: leal {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: sarl $31, %ecx
; X86-NEXT: addl %edx, %edx
-; X86-NEXT: adcl %ecx, %ecx
-; X86-NEXT: andl $1, %ecx
-; X86-NEXT: negl %ecx
+; X86-NEXT: adcl %eax, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shldl $31, %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: shldl $31, %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $31, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %eax
; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %ecx
-; X86-NEXT: pushl %esi
; X86-NEXT: pushl %edx
; X86-NEXT: pushl %ebx
; X86-NEXT: calll __modti3
; X86-NEXT: addl $32, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl 36(%ebp), %edx
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl %esi, %edi
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: sarl $31, %esi
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: addl %ecx, %ecx
-; X86-NEXT: adcl %esi, %esi
-; X86-NEXT: andl $1, %esi
-; X86-NEXT: negl %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: adcl %edx, %edx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: shldl $31, %ecx, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $1, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %edx
-; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %edx
+; X86-NEXT: pushl %edx
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %eax
@@ -902,28 +902,28 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl 28(%ebp), %ebx
; X86-NEXT: movl %ebx, %edx
; X86-NEXT: sarl $31, %edx
-; X86-NEXT: movl 12(%ebp), %esi
+; X86-NEXT: movl 12(%ebp), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: adcl %esi, %esi
; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: addl %esi, %esi
-; X86-NEXT: adcl %ecx, %ecx
-; X86-NEXT: andl $1, %ecx
-; X86-NEXT: negl %ecx
+; X86-NEXT: shldl $31, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shldl $31, %esi, %eax
+; X86-NEXT: shll $31, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll $31, %esi
+; X86-NEXT: andl $1, %esi
+; X86-NEXT: negl %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: pushl %edx
; X86-NEXT: pushl %edx
; X86-NEXT: pushl %edx
; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %esi
; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %eax
-; X86-NEXT: pushl %esi
; X86-NEXT: pushl %edi
; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
@@ -936,12 +936,12 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: addl %ecx, %ecx
; X86-NEXT: adcl %ebx, %ebx
-; X86-NEXT: andl $1, %ebx
-; X86-NEXT: negl %ebx
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: shldl $31, %ecx, %edi
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $1, %ebx
+; X86-NEXT: negl %ebx
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %esi
@@ -999,30 +999,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: subl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl $0, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl $0, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: testl %edi, %edi
-; X86-NEXT: sets %cl
+; X86-NEXT: sets %al
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: sets %ch
-; X86-NEXT: xorb %cl, %ch
+; X86-NEXT: sets %ah
+; X86-NEXT: xorb %al, %ah
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: setne %cl
-; X86-NEXT: testb %ch, %cl
-; X86-NEXT: cmovel %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %edi, %ecx
+; X86-NEXT: setne %al
+; X86-NEXT: testb %ah, %al
+; X86-NEXT: cmovel %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1036,8 +1036,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: subl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %ecx
; X86-NEXT: sbbl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
@@ -1052,14 +1052,14 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: sets %bh
; X86-NEXT: xorb %bl, %bh
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: setne %al
; X86-NEXT: testb %bh, %al
-; X86-NEXT: cmovel %edi, %ecx
+; X86-NEXT: cmovel %esi, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll
index e51f9faeff013..05cb78c1083ea 100644
--- a/llvm/test/CodeGen/X86/shift-mask.ll
+++ b/llvm/test/CodeGen/X86/shift-mask.ll
@@ -578,8 +578,10 @@ define i64 @test_i64_lshr_lshr_2(i64 %a0) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $5, %eax, %edx
-; X86-NEXT: shll $5, %eax
-; X86-NEXT: shrdl $3, %edx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: shrl $27, %eax
+; X86-NEXT: shldl $29, %ecx, %eax
; X86-NEXT: shrl $3, %edx
; X86-NEXT: retl
;
More information about the llvm-commits
mailing list