[llvm] 1cf9b24 - [DAG] Enable ISD::FSHL/R SimplifyMultipleUseDemandedBits handling inside SimplifyDemandedBits

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 12 11:26:01 PDT 2022


Author: Simon Pilgrim
Date: 2022-06-12T19:25:20+01:00
New Revision: 1cf9b24da3b8eb8e7636de2209f192ded5ce20cd

URL: https://github.com/llvm/llvm-project/commit/1cf9b24da3b8eb8e7636de2209f192ded5ce20cd
DIFF: https://github.com/llvm/llvm-project/commit/1cf9b24da3b8eb8e7636de2209f192ded5ce20cd.diff

LOG: [DAG] Enable ISD::FSHL/R SimplifyMultipleUseDemandedBits handling inside SimplifyDemandedBits

This patch allows SimplifyDemandedBits to call SimplifyMultipleUseDemandedBits in cases where the source operand has other uses, enabling us to peek through the shifted value if we don't demand all the bits/elts.

This helps with several of the regressions from D125836

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
    llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
    llvm/test/CodeGen/X86/funnel-shift.ll
    llvm/test/CodeGen/X86/rotate-extract.ll
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    llvm/test/CodeGen/X86/shift-mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 57539f3d6b97f..03fc1728ac44a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1877,6 +1877,22 @@ bool TargetLowering::SimplifyDemandedBits(
       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
       Known.One |= Known2.One;
       Known.Zero |= Known2.Zero;
+
+      // Attempt to avoid multi-use ops if we don't need anything from them.
+      if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
+          !DemandedElts.isAllOnes()) {
+        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+            Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
+        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+            Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
+        if (DemandedOp0 || DemandedOp1) {
+          DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
+          DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
+          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
+                                          DemandedOp1, Op2);
+          return TLO.CombineTo(Op, NewOp);
+        }
+      }
     }
 
     // For pow-2 bitwidths we only demand the bottom modulo amt bits.

diff  --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index 83fa3fba63bd1..27593cec1d6a6 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -211,36 +211,36 @@ define i1 @test_urem_oversized(i66 %X) nounwind {
 ; PPC-NEXT:    lis 6, -12795
 ; PPC-NEXT:    ori 6, 6, 40665
 ; PPC-NEXT:    mulhwu 7, 5, 6
-; PPC-NEXT:    lis 8, 12057
-; PPC-NEXT:    ori 8, 8, 37186
-; PPC-NEXT:    mullw 10, 4, 6
-; PPC-NEXT:    addc 7, 10, 7
+; PPC-NEXT:    lis 9, 12057
+; PPC-NEXT:    ori 9, 9, 37186
+; PPC-NEXT:    mullw 11, 4, 6
+; PPC-NEXT:    addc 7, 11, 7
+; PPC-NEXT:    lis 11, -5526
+; PPC-NEXT:    ori 11, 11, 61135
+; PPC-NEXT:    mulhwu 8, 4, 6
+; PPC-NEXT:    addze 8, 8
+; PPC-NEXT:    mulhwu 10, 5, 9
+; PPC-NEXT:    mullw 4, 4, 9
+; PPC-NEXT:    mullw 9, 5, 9
+; PPC-NEXT:    addc 7, 9, 7
+; PPC-NEXT:    addze 9, 10
+; PPC-NEXT:    rotlwi 10, 7, 31
 ; PPC-NEXT:    mullw 3, 3, 6
-; PPC-NEXT:    mullw 11, 5, 6
-; PPC-NEXT:    mulhwu 6, 4, 6
-; PPC-NEXT:    addze 6, 6
-; PPC-NEXT:    slwi 4, 4, 1
-; PPC-NEXT:    mulhwu 9, 5, 8
-; PPC-NEXT:    mullw 8, 5, 8
-; PPC-NEXT:    addc 7, 8, 7
-; PPC-NEXT:    addze 9, 9
+; PPC-NEXT:    mullw 6, 5, 6
 ; PPC-NEXT:    slwi 5, 5, 1
-; PPC-NEXT:    add 6, 6, 9
 ; PPC-NEXT:    add 3, 5, 3
-; PPC-NEXT:    rotlwi 8, 11, 31
-; PPC-NEXT:    sub 4, 6, 4
-; PPC-NEXT:    lis 5, -5526
-; PPC-NEXT:    rlwimi 8, 7, 31, 0, 0
-; PPC-NEXT:    rotlwi 7, 7, 31
+; PPC-NEXT:    rotlwi 5, 6, 31
+; PPC-NEXT:    rlwimi 5, 7, 31, 0, 0
+; PPC-NEXT:    add 7, 8, 9
+; PPC-NEXT:    add 4, 4, 7
 ; PPC-NEXT:    add 3, 4, 3
-; PPC-NEXT:    ori 5, 5, 61135
-; PPC-NEXT:    rlwimi 7, 3, 31, 0, 0
-; PPC-NEXT:    cmplw 8, 5
-; PPC-NEXT:    cmplwi 1, 7, 13
+; PPC-NEXT:    rlwimi 10, 3, 31, 0, 0
+; PPC-NEXT:    cmplw 5, 11
+; PPC-NEXT:    cmplwi 1, 10, 13
 ; PPC-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; PPC-NEXT:    crand 20, 6, 0
 ; PPC-NEXT:    crandc 21, 4, 6
-; PPC-NEXT:    rlwimi. 3, 11, 1, 30, 30
+; PPC-NEXT:    rlwimi. 3, 6, 1, 30, 30
 ; PPC-NEXT:    cror 20, 20, 21
 ; PPC-NEXT:    crnand 20, 2, 20
 ; PPC-NEXT:    li 3, 1

diff  --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 24a710f734ae8..229a94bb399f1 100644
--- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -801,48 +801,48 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
 ; AVX-NEXT:    pushq %rbx
 ; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
-; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
+; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
 ; AVX-NEXT:    movq %r9, %r8
 ; AVX-NEXT:    shrq $56, %r8
 ; AVX-NEXT:    andl $15, %r8d
 ; AVX-NEXT:    movq %r9, %r10
 ; AVX-NEXT:    shrq $48, %r10
 ; AVX-NEXT:    andl $15, %r10d
-; AVX-NEXT:    movq %rcx, %rdx
-; AVX-NEXT:    shldq $24, %r9, %rdx
-; AVX-NEXT:    andl $15, %edx
+; AVX-NEXT:    movq %r9, %rsi
+; AVX-NEXT:    shrq $40, %rsi
+; AVX-NEXT:    andl $15, %esi
 ; AVX-NEXT:    movq %r9, %r11
 ; AVX-NEXT:    shrq $32, %r11
 ; AVX-NEXT:    andl $15, %r11d
-; AVX-NEXT:    movq %rcx, %rdi
+; AVX-NEXT:    movq %rdx, %rdi
 ; AVX-NEXT:    shrq $56, %rdi
 ; AVX-NEXT:    andl $15, %edi
-; AVX-NEXT:    movq %rcx, %rsi
-; AVX-NEXT:    shrq $48, %rsi
-; AVX-NEXT:    andl $15, %esi
-; AVX-NEXT:    movq %rcx, %rax
-; AVX-NEXT:    shrq $40, %rax
+; AVX-NEXT:    movq %rdx, %rax
+; AVX-NEXT:    shrq $48, %rax
 ; AVX-NEXT:    andl $15, %eax
-; AVX-NEXT:    movq %rcx, %rbx
+; AVX-NEXT:    movq %rdx, %rcx
+; AVX-NEXT:    shrq $40, %rcx
+; AVX-NEXT:    andl $15, %ecx
+; AVX-NEXT:    movq %rdx, %rbx
 ; AVX-NEXT:    shrq $32, %rbx
 ; AVX-NEXT:    andl $15, %ebx
 ; AVX-NEXT:    shlq $32, %rbx
-; AVX-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; AVX-NEXT:    orq %rbx, %rcx
-; AVX-NEXT:    shlq $40, %rax
+; AVX-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; AVX-NEXT:    orq %rbx, %rdx
+; AVX-NEXT:    shlq $40, %rcx
+; AVX-NEXT:    orq %rdx, %rcx
+; AVX-NEXT:    shlq $48, %rax
 ; AVX-NEXT:    orq %rcx, %rax
-; AVX-NEXT:    shlq $48, %rsi
-; AVX-NEXT:    orq %rax, %rsi
 ; AVX-NEXT:    shlq $56, %rdi
-; AVX-NEXT:    orq %rsi, %rdi
+; AVX-NEXT:    orq %rax, %rdi
 ; AVX-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    shlq $32, %r11
 ; AVX-NEXT:    andl $252645135, %r9d # imm = 0xF0F0F0F
 ; AVX-NEXT:    orq %r11, %r9
-; AVX-NEXT:    shlq $40, %rdx
-; AVX-NEXT:    orq %r9, %rdx
+; AVX-NEXT:    shlq $40, %rsi
+; AVX-NEXT:    orq %r9, %rsi
 ; AVX-NEXT:    shlq $48, %r10
-; AVX-NEXT:    orq %rdx, %r10
+; AVX-NEXT:    orq %rsi, %r10
 ; AVX-NEXT:    shlq $56, %r8
 ; AVX-NEXT:    orq %r10, %r8
 ; AVX-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
@@ -982,96 +982,96 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
 ; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX1-NEXT:    movq %rax, %r8
+; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    movq %rax, %rdx
 ; AVX1-NEXT:    movq %rax, %rsi
 ; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $32, %rcx
-; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    shlq $32, %rcx
-; AVX1-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX1-NEXT:    orq %rcx, %rax
-; AVX1-NEXT:    shrq $40, %rdi
+; AVX1-NEXT:    shrq $32, %rdi
 ; AVX1-NEXT:    andl $15, %edi
-; AVX1-NEXT:    shlq $40, %rdi
-; AVX1-NEXT:    orq %rax, %rdi
-; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX1-NEXT:    shrq $48, %rsi
+; AVX1-NEXT:    shlq $32, %rdi
+; AVX1-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; AVX1-NEXT:    orq %rdi, %rax
+; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi
+; AVX1-NEXT:    shrq $40, %rsi
 ; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shlq $48, %rsi
-; AVX1-NEXT:    orq %rdi, %rsi
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $56, %rdx
+; AVX1-NEXT:    shlq $40, %rsi
+; AVX1-NEXT:    orq %rax, %rsi
+; AVX1-NEXT:    movq %rdi, %rax
+; AVX1-NEXT:    shrq $48, %rdx
 ; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $56, %rdx
+; AVX1-NEXT:    shlq $48, %rdx
 ; AVX1-NEXT:    orq %rsi, %rdx
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    shldq $24, %rax, %r8
-; AVX1-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shrq $32, %rdx
-; AVX1-NEXT:    andl $15, %edx
-; AVX1-NEXT:    shlq $32, %rdx
-; AVX1-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX1-NEXT:    orq %rdx, %rax
-; AVX1-NEXT:    andl $15, %r8d
-; AVX1-NEXT:    shlq $40, %r8
-; AVX1-NEXT:    orq %rax, %r8
-; AVX1-NEXT:    shrq $48, %rsi
-; AVX1-NEXT:    andl $15, %esi
-; AVX1-NEXT:    shlq $48, %rsi
-; AVX1-NEXT:    orq %r8, %rsi
+; AVX1-NEXT:    movq %rdi, %rsi
 ; AVX1-NEXT:    shrq $56, %rcx
 ; AVX1-NEXT:    andl $15, %ecx
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    shlq $56, %rcx
-; AVX1-NEXT:    orq %rsi, %rcx
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    orq %rdx, %rcx
+; AVX1-NEXT:    movq %rdi, %rdx
 ; AVX1-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $8, %ecx
-; AVX1-NEXT:    vmovd %eax, %xmm1
-; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $16, %ecx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movl %eax, %ecx
-; AVX1-NEXT:    shrl $24, %ecx
-; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    movq %rdi, %rcx
 ; AVX1-NEXT:    shrq $32, %rcx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $40, %rcx
-; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $48, %rcx
-; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX1-NEXT:    andl $15, %ecx
+; AVX1-NEXT:    shlq $32, %rcx
+; AVX1-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
+; AVX1-NEXT:    orq %rcx, %rdi
+; AVX1-NEXT:    shrq $40, %rdx
+; AVX1-NEXT:    andl $15, %edx
+; AVX1-NEXT:    shlq $40, %rdx
+; AVX1-NEXT:    orq %rdi, %rdx
+; AVX1-NEXT:    shrq $48, %rsi
+; AVX1-NEXT:    andl $15, %esi
+; AVX1-NEXT:    shlq $48, %rsi
+; AVX1-NEXT:    orq %rdx, %rsi
 ; AVX1-NEXT:    shrq $56, %rax
-; AVX1-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm0
+; AVX1-NEXT:    andl $15, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    shlq $56, %rax
+; AVX1-NEXT:    orq %rsi, %rax
+; AVX1-NEXT:    vmovq %xmm0, %rcx
+; AVX1-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; AVX1-NEXT:    movl %ecx, %eax
 ; AVX1-NEXT:    shrl $8, %eax
-; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vmovd %ecx, %xmm1
+; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
 ; AVX1-NEXT:    movl %ecx, %eax
 ; AVX1-NEXT:    shrl $16, %eax
-; AVX1-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
 ; AVX1-NEXT:    movl %ecx, %eax
 ; AVX1-NEXT:    shrl $24, %eax
-; AVX1-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    shrq $32, %rax
-; AVX1-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    shrq $40, %rax
-; AVX1-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    shrq $48, %rax
-; AVX1-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    shrq $56, %rcx
-; AVX1-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm0
+; AVX1-NEXT:    movl %eax, %ecx
+; AVX1-NEXT:    shrl $8, %ecx
+; AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movl %eax, %ecx
+; AVX1-NEXT:    shrl $16, %ecx
+; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movl %eax, %ecx
+; AVX1-NEXT:    shrl $24, %ecx
+; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shrq $32, %rcx
+; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shrq $40, %rcx
+; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shrq $48, %rcx
+; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    shrq $56, %rax
+; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
@@ -1080,96 +1080,96 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX2-NEXT:    movq %rax, %r8
+; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    movq %rax, %rdx
 ; AVX2-NEXT:    movq %rax, %rsi
 ; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $32, %rcx
-; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    shlq $32, %rcx
-; AVX2-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX2-NEXT:    orq %rcx, %rax
-; AVX2-NEXT:    shrq $40, %rdi
+; AVX2-NEXT:    shrq $32, %rdi
 ; AVX2-NEXT:    andl $15, %edi
-; AVX2-NEXT:    shlq $40, %rdi
-; AVX2-NEXT:    orq %rax, %rdi
-; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; AVX2-NEXT:    shrq $48, %rsi
+; AVX2-NEXT:    shlq $32, %rdi
+; AVX2-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; AVX2-NEXT:    orq %rdi, %rax
+; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi
+; AVX2-NEXT:    shrq $40, %rsi
 ; AVX2-NEXT:    andl $15, %esi
-; AVX2-NEXT:    shlq $48, %rsi
-; AVX2-NEXT:    orq %rdi, %rsi
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $56, %rdx
+; AVX2-NEXT:    shlq $40, %rsi
+; AVX2-NEXT:    orq %rax, %rsi
+; AVX2-NEXT:    movq %rdi, %rax
+; AVX2-NEXT:    shrq $48, %rdx
 ; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $56, %rdx
+; AVX2-NEXT:    shlq $48, %rdx
 ; AVX2-NEXT:    orq %rsi, %rdx
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    shldq $24, %rax, %r8
-; AVX2-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shrq $32, %rdx
-; AVX2-NEXT:    andl $15, %edx
-; AVX2-NEXT:    shlq $32, %rdx
-; AVX2-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX2-NEXT:    orq %rdx, %rax
-; AVX2-NEXT:    andl $15, %r8d
-; AVX2-NEXT:    shlq $40, %r8
-; AVX2-NEXT:    orq %rax, %r8
-; AVX2-NEXT:    shrq $48, %rsi
-; AVX2-NEXT:    andl $15, %esi
-; AVX2-NEXT:    shlq $48, %rsi
-; AVX2-NEXT:    orq %r8, %rsi
+; AVX2-NEXT:    movq %rdi, %rsi
 ; AVX2-NEXT:    shrq $56, %rcx
 ; AVX2-NEXT:    andl $15, %ecx
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    shlq $56, %rcx
-; AVX2-NEXT:    orq %rsi, %rcx
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    orq %rdx, %rcx
+; AVX2-NEXT:    movq %rdi, %rdx
 ; AVX2-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $8, %ecx
-; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $16, %ecx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movl %eax, %ecx
-; AVX2-NEXT:    shrl $24, %ecx
-; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    movq %rdi, %rcx
 ; AVX2-NEXT:    shrq $32, %rcx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $40, %rcx
-; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $48, %rcx
-; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT:    andl $15, %ecx
+; AVX2-NEXT:    shlq $32, %rcx
+; AVX2-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
+; AVX2-NEXT:    orq %rcx, %rdi
+; AVX2-NEXT:    shrq $40, %rdx
+; AVX2-NEXT:    andl $15, %edx
+; AVX2-NEXT:    shlq $40, %rdx
+; AVX2-NEXT:    orq %rdi, %rdx
+; AVX2-NEXT:    shrq $48, %rsi
+; AVX2-NEXT:    andl $15, %esi
+; AVX2-NEXT:    shlq $48, %rsi
+; AVX2-NEXT:    orq %rdx, %rsi
 ; AVX2-NEXT:    shrq $56, %rax
-; AVX2-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm0
+; AVX2-NEXT:    andl $15, %eax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    shlq $56, %rax
+; AVX2-NEXT:    orq %rsi, %rax
+; AVX2-NEXT:    vmovq %xmm0, %rcx
+; AVX2-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; AVX2-NEXT:    movl %ecx, %eax
 ; AVX2-NEXT:    shrl $8, %eax
-; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vmovd %ecx, %xmm1
+; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
 ; AVX2-NEXT:    movl %ecx, %eax
 ; AVX2-NEXT:    shrl $16, %eax
-; AVX2-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
 ; AVX2-NEXT:    movl %ecx, %eax
 ; AVX2-NEXT:    shrl $24, %eax
-; AVX2-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
 ; AVX2-NEXT:    movq %rcx, %rax
 ; AVX2-NEXT:    shrq $32, %rax
-; AVX2-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
 ; AVX2-NEXT:    movq %rcx, %rax
 ; AVX2-NEXT:    shrq $40, %rax
-; AVX2-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
 ; AVX2-NEXT:    movq %rcx, %rax
 ; AVX2-NEXT:    shrq $48, %rax
-; AVX2-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    shrq $56, %rcx
-; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm0
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $8, %ecx
+; AVX2-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $24, %ecx
+; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shrq $32, %rcx
+; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shrq $40, %rcx
+; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shrq $48, %rcx
+; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    shrq $56, %rax
+; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm1
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
index d04819a526bae..8f4a716d71cae 100644
--- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
+++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll
@@ -1996,13 +1996,12 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) {
 define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
 ; X86-LABEL: test_i64_140737488289792_mask_shl_15:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl $32767, %edx # imm = 0x7FFF
 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    shldl $15, %ecx, %edx
-; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    shldl $15, %eax, %edx
+; X86-NEXT:    andl $65536, %eax # imm = 0x10000
+; X86-NEXT:    shll $15, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i64_140737488289792_mask_shl_15:
@@ -2018,8 +2017,7 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) {
 define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) {
 ; X86-LABEL: test_i64_140737488289792_mask_shl_16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shll $16, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl $32767, %edx # imm = 0x7FFF
 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shldl $16, %eax, %edx

diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 820b66f6179c2..a2dab38acb9a4 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -978,35 +978,34 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
 define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
 ; X86-SSE2-LABEL: PR45265:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %ebx
 ; X86-SSE2-NEXT:    pushl %edi
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT:    leal (%eax,%eax,2), %edi
-; X86-SSE2-NEXT:    movzwl 8(%esi,%edi,4), %ebx
-; X86-SSE2-NEXT:    movsbl 10(%esi,%edi,4), %ecx
-; X86-SSE2-NEXT:    movl %ecx, %edx
-; X86-SSE2-NEXT:    shll $16, %edx
-; X86-SSE2-NEXT:    orl %ebx, %edx
-; X86-SSE2-NEXT:    movl 4(%esi,%edi,4), %esi
-; X86-SSE2-NEXT:    shrdl $8, %edx, %esi
-; X86-SSE2-NEXT:    xorl %eax, %esi
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    leal (%eax,%eax,2), %edx
+; X86-SSE2-NEXT:    movzwl 8(%ecx,%edx,4), %esi
+; X86-SSE2-NEXT:    movl 4(%ecx,%edx,4), %edi
+; X86-SSE2-NEXT:    shrdl $8, %esi, %edi
+; X86-SSE2-NEXT:    xorl %eax, %edi
 ; X86-SSE2-NEXT:    sarl $31, %eax
+; X86-SSE2-NEXT:    movzbl 10(%ecx,%edx,4), %ecx
+; X86-SSE2-NEXT:    shll $16, %ecx
+; X86-SSE2-NEXT:    orl %esi, %ecx
+; X86-SSE2-NEXT:    shll $8, %ecx
+; X86-SSE2-NEXT:    movl %ecx, %edx
+; X86-SSE2-NEXT:    sarl $8, %edx
 ; X86-SSE2-NEXT:    sarl $31, %ecx
 ; X86-SSE2-NEXT:    shldl $24, %edx, %ecx
 ; X86-SSE2-NEXT:    xorl %eax, %ecx
-; X86-SSE2-NEXT:    orl %ecx, %esi
+; X86-SSE2-NEXT:    orl %ecx, %edi
 ; X86-SSE2-NEXT:    jne .LBB46_1
 ; X86-SSE2-NEXT:  # %bb.2:
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    popl %edi
-; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    jmp _Z3foov # TAILCALL
 ; X86-SSE2-NEXT:  .LBB46_1:
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    popl %edi
-; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: PR45265:

diff  --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll
index f8e465e0ea785..54de5c2cafc3e 100644
--- a/llvm/test/CodeGen/X86/rotate-extract.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract.ll
@@ -13,11 +13,12 @@ define i64 @rolq_extract_shl(i64 %i) nounwind {
 ; X86-LABEL: rolq_extract_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shldl $3, %edx, %ecx
+; X86-NEXT:    shll $3, %eax
 ; X86-NEXT:    shll $3, %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    shldl $7, %ecx, %eax
+; X86-NEXT:    shrdl $25, %edx, %eax
 ; X86-NEXT:    shrdl $25, %ecx, %edx
 ; X86-NEXT:    retl
 ;

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 9b964b147d553..fa89da14508c6 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -843,56 +843,56 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl 24(%ebp), %edx
 ; X86-NEXT:    movl 40(%ebp), %edi
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %edx, %eax
 ; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    sarl $31, %ecx
 ; X86-NEXT:    addl %edx, %edx
-; X86-NEXT:    adcl %ecx, %ecx
-; X86-NEXT:    andl $1, %ecx
-; X86-NEXT:    negl %ecx
+; X86-NEXT:    adcl %eax, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shldl $31, %edx, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    shldl $31, %edx, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    shll $31, %edx
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %ecx
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    calll __modti3
 ; X86-NEXT:    addl $32, %esp
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl 36(%ebp), %edx
-; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl 36(%ebp), %esi
+; X86-NEXT:    movl %esi, %edi
 ; X86-NEXT:    sarl $31, %edi
 ; X86-NEXT:    movl 20(%ebp), %ecx
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
 ; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    adcl %esi, %esi
-; X86-NEXT:    andl $1, %esi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    adcl %edx, %edx
+; X86-NEXT:    movl %edx, %ebx
 ; X86-NEXT:    shldl $31, %ecx, %ebx
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    shll $31, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %edx
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %ecx
 ; X86-NEXT:    pushl %eax
@@ -902,28 +902,28 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl 28(%ebp), %ebx
 ; X86-NEXT:    movl %ebx, %edx
 ; X86-NEXT:    sarl $31, %edx
-; X86-NEXT:    movl 12(%ebp), %esi
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    adcl %esi, %esi
 ; X86-NEXT:    movl %esi, %ecx
-; X86-NEXT:    sarl $31, %ecx
-; X86-NEXT:    addl %esi, %esi
-; X86-NEXT:    adcl %ecx, %ecx
-; X86-NEXT:    andl $1, %ecx
-; X86-NEXT:    negl %ecx
+; X86-NEXT:    shldl $31, %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    shldl $31, %esi, %eax
+; X86-NEXT:    shll $31, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shll $31, %esi
+; X86-NEXT:    andl $1, %esi
+; X86-NEXT:    negl %esi
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl %edx
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %ecx
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    calll __divti3
 ; X86-NEXT:    addl $32, %esp
@@ -936,12 +936,12 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    sarl $31, %ebx
 ; X86-NEXT:    addl %ecx, %ecx
 ; X86-NEXT:    adcl %ebx, %ebx
-; X86-NEXT:    andl $1, %ebx
-; X86-NEXT:    negl %ebx
 ; X86-NEXT:    movl %ebx, %edi
 ; X86-NEXT:    shldl $31, %ecx, %edi
 ; X86-NEXT:    shll $31, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl $1, %ebx
+; X86-NEXT:    negl %ebx
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    pushl %esi
@@ -999,30 +999,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    subl $1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    sbbl $0, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    sbbl $0, %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl $0, %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    testl %edi, %edi
-; X86-NEXT:    sets %cl
+; X86-NEXT:    sets %al
 ; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT:    sets %ch
-; X86-NEXT:    xorb %cl, %ch
+; X86-NEXT:    sets %ah
+; X86-NEXT:    xorb %al, %ah
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    orl %edi, %edx
-; X86-NEXT:    setne %cl
-; X86-NEXT:    testb %ch, %cl
-; X86-NEXT:    cmovel %esi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %edi, %ecx
+; X86-NEXT:    setne %al
+; X86-NEXT:    testb %ah, %al
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -1036,8 +1036,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    subl $1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %ecx
 ; X86-NEXT:    sbbl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %edx, %eax
@@ -1052,14 +1052,14 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    sets %bh
 ; X86-NEXT:    xorb %bl, %bh
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    orl %edi, %eax
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    testb %bh, %al
-; X86-NEXT:    cmovel %edi, %ecx
+; X86-NEXT:    cmovel %esi, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload

diff  --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll
index e51f9faeff013..05cb78c1083ea 100644
--- a/llvm/test/CodeGen/X86/shift-mask.ll
+++ b/llvm/test/CodeGen/X86/shift-mask.ll
@@ -578,8 +578,10 @@ define i64 @test_i64_lshr_lshr_2(i64 %a0) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    shldl $5, %eax, %edx
-; X86-NEXT:    shll $5, %eax
-; X86-NEXT:    shrdl $3, %edx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $5, %ecx
+; X86-NEXT:    shrl $27, %eax
+; X86-NEXT:    shldl $29, %ecx, %eax
 ; X86-NEXT:    shrl $3, %edx
 ; X86-NEXT:    retl
 ;


        


More information about the llvm-commits mailing list