[llvm] r361706 - [X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector(), C)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat May 25 11:02:17 PDT 2019


Author: rksimon
Date: Sat May 25 11:02:17 2019
New Revision: 361706

URL: http://llvm.org/viewvc/llvm-project?rev=361706&view=rev
Log:
[X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector(),C)

Commonly occurs in sign-extension cases

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
    llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
    llvm/trunk/test/CodeGen/X86/vector-sext.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 25 11:02:17 2019
@@ -8746,9 +8746,15 @@ static SDValue lowerBuildVectorToBitOp(B
       return SDValue();
 
   // TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
+  bool IsShift = false;
   switch (Opcode) {
   default:
     return SDValue();
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+    IsShift = true;
+    break;
   case ISD::AND:
   case ISD::XOR:
   case ISD::OR:
@@ -8769,10 +8775,24 @@ static SDValue lowerBuildVectorToBitOp(B
     // We expect the canonicalized RHS operand to be the constant.
     if (!isa<ConstantSDNode>(RHS))
       return SDValue();
+
+    // Extend shift amounts.
+    if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
+      if (!IsShift)
+        return SDValue();
+      RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
+    }
+
     LHSElts.push_back(LHS);
     RHSElts.push_back(RHS);
   }
 
+  // Limit to shifts by uniform immediates.
+  // TODO: Only accept vXi8/vXi64 special cases?
+  // TODO: Permit non-uniform XOP/AVX2/MULLO cases?
+  if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
+    return SDValue();
+
   SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
   SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
   return DAG.getNode(Opcode, DL, VT, LHS, RHS);

Modified: llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll Sat May 25 11:02:17 2019
@@ -86,13 +86,12 @@ define <2 x i64> @vrolq_extract_udiv(<2
 ; X64-NEXT:    vpextrq $1, %xmm0, %rax
 ; X64-NEXT:    movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm1
 ; X64-NEXT:    vmovq %xmm0, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm0
 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; X64-NEXT:    vprolq $57, %zmm0, %zmm0
 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; X64-NEXT:    vzeroupper
@@ -256,24 +255,22 @@ define <2 x i64> @no_extract_udiv(<2 x i
 ; X64-NEXT:    movabsq $-6148914691236517205, %rdi # imm = 0xAAAAAAAAAAAAAAAB
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm1
 ; X64-NEXT:    vmovq %xmm0, %rsi
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm0
 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; X64-NEXT:    movabsq $-6180857105216966645, %rdi # imm = 0xAA392F35DC17F00B
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq $9, %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm1
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq $9, %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm2
 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-NEXT:    vpsrlq $9, %xmm1, %xmm1
 ; X64-NEXT:    vpsllq $56, %xmm0, %xmm0
 ; X64-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll Sat May 25 11:02:17 2019
@@ -19,7 +19,6 @@ define <2 x i64> @test_div7_2i64(<2 x i6
 ; SSE2-NEXT:    subq %rdx, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    addq %rdx, %rcx
-; SSE2-NEXT:    shrq $2, %rcx
 ; SSE2-NEXT:    movq %rcx, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rcx
@@ -28,9 +27,9 @@ define <2 x i64> @test_div7_2i64(<2 x i6
 ; SSE2-NEXT:    subq %rdx, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    addq %rdx, %rcx
-; SSE2-NEXT:    shrq $2, %rcx
 ; SSE2-NEXT:    movq %rcx, %xmm0
 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    psrlq $2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -43,7 +42,6 @@ define <2 x i64> @test_div7_2i64(<2 x i6
 ; SSE41-NEXT:    subq %rdx, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    addq %rdx, %rcx
-; SSE41-NEXT:    shrq $2, %rcx
 ; SSE41-NEXT:    movq %rcx, %xmm1
 ; SSE41-NEXT:    movq %xmm0, %rcx
 ; SSE41-NEXT:    movq %rcx, %rax
@@ -51,9 +49,9 @@ define <2 x i64> @test_div7_2i64(<2 x i6
 ; SSE41-NEXT:    subq %rdx, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    addq %rdx, %rcx
-; SSE41-NEXT:    shrq $2, %rcx
 ; SSE41-NEXT:    movq %rcx, %xmm0
 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    psrlq $2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_div7_2i64:
@@ -65,7 +63,6 @@ define <2 x i64> @test_div7_2i64(<2 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm1
 ; AVX-NEXT:    vmovq %xmm0, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -73,9 +70,9 @@ define <2 x i64> @test_div7_2i64(<2 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vpsrlq $2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %res = udiv <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %res

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Sat May 25 11:02:17 2019
@@ -10,32 +10,30 @@
 define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: test_div7_4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
 ; AVX1-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    mulq %rsi
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
-; AVX1-NEXT:    vmovq %rcx, %xmm2
-; AVX1-NEXT:    vmovq %xmm1, %rcx
+; AVX1-NEXT:    vmovq %rcx, %xmm1
+; AVX1-NEXT:    vmovq %xmm0, %rcx
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    mulq %rsi
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
-; AVX1-NEXT:    vmovq %rcx, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vmovq %rcx, %xmm2
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vpsrlq $2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    mulq %rsi
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm2
 ; AVX1-NEXT:    vmovq %xmm0, %rcx
 ; AVX1-NEXT:    movq %rcx, %rax
@@ -43,10 +41,10 @@ define <4 x i64> @test_div7_4i64(<4 x i6
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm0
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpsrlq $2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_div7_4i64:
@@ -59,7 +57,6 @@ define <4 x i64> @test_div7_4i64(<4 x i6
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm2
 ; AVX2-NEXT:    vmovq %xmm1, %rcx
 ; AVX2-NEXT:    movq %rcx, %rax
@@ -67,7 +64,6 @@ define <4 x i64> @test_div7_4i64(<4 x i6
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
@@ -76,7 +72,6 @@ define <4 x i64> @test_div7_4i64(<4 x i6
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm2
 ; AVX2-NEXT:    vmovq %xmm0, %rcx
 ; AVX2-NEXT:    movq %rcx, %rax
@@ -84,10 +79,10 @@ define <4 x i64> @test_div7_4i64(<4 x i6
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlq $2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %res = udiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %res

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Sat May 25 11:02:17 2019
@@ -17,7 +17,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm2
 ; AVX-NEXT:    vmovq %xmm1, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -25,7 +24,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm1
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
@@ -35,7 +33,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm3
 ; AVX-NEXT:    vmovq %xmm2, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -43,7 +40,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm2
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
@@ -54,7 +50,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm3
 ; AVX-NEXT:    vmovq %xmm2, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -62,7 +57,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm2
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
@@ -71,7 +65,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm3
 ; AVX-NEXT:    vmovq %xmm0, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -79,11 +72,11 @@ define <8 x i64> @test_div7_8i64(<8 x i6
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 ; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT:    vpsrlq $2, %zmm0, %zmm0
 ; AVX-NEXT:    retq
   %res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %res

Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Sat May 25 11:02:17 2019
@@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; SSE-NEXT:    movzbl (%rdi), %eax
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shlq $62, %rcx
-; SSE-NEXT:    sarq $63, %rcx
-; SSE-NEXT:    movq %rcx, %xmm1
+; SSE-NEXT:    movq %rcx, %xmm0
 ; SSE-NEXT:    shlq $63, %rax
-; SSE-NEXT:    sarq $63, %rax
-; SSE-NEXT:    movq %rax, %xmm0
-; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movq %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_2i1_to_2i64:
@@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX1-NEXT:    movzbl (%rdi), %eax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm0
 ; AVX1-NEXT:    shlq $63, %rax
-; AVX1-NEXT:    sarq $63, %rax
 ; AVX1-NEXT:    vmovq %rax, %xmm1
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_2i1_to_2i64:
@@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX2-NEXT:    movzbl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_2i1_to_2i64:
@@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; X32-SSE2-NEXT:    movzbl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; X32-SSE41-NEXT:    movzbl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $31, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $30, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <2 x i1>, <2 x i1>* %ptr
@@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; X32-SSE2-NEXT:    movl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $28, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm2
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; X32-SSE41-NEXT:    movl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movl %eax, %edx
 ; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
 ; X32-SSE41-NEXT:    movd %edx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $28, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <4 x i1>, <4 x i1>* %ptr
@@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(
 ; AVX2-NEXT:    movl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_4i1_to_4i64:
@@ -5990,22 +5980,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
 ; X32-SSE2-NEXT:    movl 8(%eax), %eax
 ; X32-SSE2-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE2-NEXT:    shll $15, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    movl %edx, %eax
 ; X32-SSE2-NEXT:    shll $13, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE2-NEXT:    shll $15, %ecx
-; X32-SSE2-NEXT:    sarl $15, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    shll $15, %edx
-; X32-SSE2-NEXT:    sarl $15, %edx
 ; X32-SSE2-NEXT:    movd %edx, %xmm2
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $15, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: sext_4i17_to_4i32:
@@ -6021,17 +6008,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
 ; X32-SSE41-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE41-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE41-NEXT:    shll $15, %edx
-; X32-SSE41-NEXT:    sarl $15, %edx
 ; X32-SSE41-NEXT:    shll $15, %ecx
-; X32-SSE41-NEXT:    sarl $15, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %edx, %xmm0
 ; X32-SSE41-NEXT:    shll $13, %esi
-; X32-SSE41-NEXT:    sarl $15, %esi
 ; X32-SSE41-NEXT:    pinsrd $2, %esi, %xmm0
 ; X32-SSE41-NEXT:    shll $15, %eax
-; X32-SSE41-NEXT:    sarl $15, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $15, %xmm0
 ; X32-SSE41-NEXT:    popl %esi
 ; X32-SSE41-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE41-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Sat May 25 11:02:17 2019
@@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; SSE-NEXT:    movzbl (%rdi), %eax
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shlq $62, %rcx
-; SSE-NEXT:    sarq $63, %rcx
-; SSE-NEXT:    movq %rcx, %xmm1
+; SSE-NEXT:    movq %rcx, %xmm0
 ; SSE-NEXT:    shlq $63, %rax
-; SSE-NEXT:    sarq $63, %rax
-; SSE-NEXT:    movq %rax, %xmm0
-; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movq %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_2i1_to_2i64:
@@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX1-NEXT:    movzbl (%rdi), %eax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm0
 ; AVX1-NEXT:    shlq $63, %rax
-; AVX1-NEXT:    sarq $63, %rax
 ; AVX1-NEXT:    vmovq %rax, %xmm1
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_2i1_to_2i64:
@@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX2-NEXT:    movzbl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_2i1_to_2i64:
@@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; X32-SSE2-NEXT:    movzbl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; X32-SSE41-NEXT:    movzbl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $31, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $30, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <2 x i1>, <2 x i1>* %ptr
@@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; X32-SSE2-NEXT:    movl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $28, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm2
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; X32-SSE41-NEXT:    movl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movl %eax, %edx
 ; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
 ; X32-SSE41-NEXT:    movd %edx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $28, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <4 x i1>, <4 x i1>* %ptr
@@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(
 ; AVX2-NEXT:    movl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_4i1_to_4i64:
@@ -6008,22 +5998,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
 ; X32-SSE2-NEXT:    movl 8(%eax), %eax
 ; X32-SSE2-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE2-NEXT:    shll $15, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    movl %edx, %eax
 ; X32-SSE2-NEXT:    shll $13, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE2-NEXT:    shll $15, %ecx
-; X32-SSE2-NEXT:    sarl $15, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    shll $15, %edx
-; X32-SSE2-NEXT:    sarl $15, %edx
 ; X32-SSE2-NEXT:    movd %edx, %xmm2
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $15, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: sext_4i17_to_4i32:
@@ -6039,17 +6026,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
 ; X32-SSE41-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE41-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE41-NEXT:    shll $15, %edx
-; X32-SSE41-NEXT:    sarl $15, %edx
 ; X32-SSE41-NEXT:    shll $15, %ecx
-; X32-SSE41-NEXT:    sarl $15, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %edx, %xmm0
 ; X32-SSE41-NEXT:    shll $13, %esi
-; X32-SSE41-NEXT:    sarl $15, %esi
 ; X32-SSE41-NEXT:    pinsrd $2, %esi, %xmm0
 ; X32-SSE41-NEXT:    shll $15, %eax
-; X32-SSE41-NEXT:    sarl $15, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $15, %xmm0
 ; X32-SSE41-NEXT:    popl %esi
 ; X32-SSE41-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE41-NEXT:    retl




More information about the llvm-commits mailing list