[llvm] r346102 - [X86] Add vector shift by immediate to SimplifyDemandedBitsForTargetNode.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 4 09:31:28 PST 2018


Author: ctopper
Date: Sun Nov  4 09:31:27 2018
New Revision: 346102

URL: http://llvm.org/viewvc/llvm-project?rev=346102&view=rev
Log:
[X86] Add vector shift by immediate to SimplifyDemandedBitsForTargetNode.

Summary: This also enables some constant folding from KnownBits propagation. This helps on some cases vXi64 case in 32-bit mode where constant vectors appear as vXi32 and a bitcast. This can prevent getNode from constant folding sra/shl/srl.

Reviewers: RKSimon, spatel

Reviewed By: spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D54069

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/combine-srl.ll
    llvm/trunk/test/CodeGen/X86/combine-udiv.ll
    llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
    llvm/trunk/test/CodeGen/X86/pr35918.ll
    llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll
    llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov  4 09:31:27 2018
@@ -31817,6 +31817,7 @@ bool X86TargetLowering::SimplifyDemanded
 bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     SDValue Op, const APInt &OriginalDemandedBits, KnownBits &Known,
     TargetLoweringOpt &TLO, unsigned Depth) const {
+  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
   unsigned Opc = Op.getOpcode();
   switch(Opc) {
   case X86ISD::PMULDQ:
@@ -31833,6 +31834,42 @@ bool X86TargetLowering::SimplifyDemanded
       return true;
     break;
   }
+  case X86ISD::VSHLI: {
+    if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      if (ShiftImm->getAPIntValue().uge(BitWidth))
+        break;
+
+      KnownBits KnownOp;
+      unsigned ShAmt = ShiftImm->getZExtValue();
+      APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
+                               Depth + 1))
+        return true;
+    }
+    break;
+  }
+  case X86ISD::VSRAI:
+  case X86ISD::VSRLI: {
+    if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      if (ShiftImm->getAPIntValue().uge(BitWidth))
+        break;
+
+      KnownBits KnownOp;
+      unsigned ShAmt = ShiftImm->getZExtValue();
+      APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      if (Opc == X86ISD::VSRAI &&
+          OriginalDemandedBits.countLeadingZeros() < ShAmt)
+        DemandedMask.setSignBit();
+
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
+                               Depth + 1))
+        return true;
+    }
+    break;
+  }
   }
 
   return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -34861,6 +34898,11 @@ static SDValue combineVectorShiftImm(SDN
     return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
   }
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+                               APInt::getAllOnesValue(NumBitsPerElt), DCI))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/combine-srl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-srl.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-srl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-srl.ll Sun Nov  4 09:31:27 2018
@@ -63,17 +63,7 @@ define <4 x i32> @combine_vec_lshr_known
 define <4 x i32> @combine_vec_lshr_known_zero1(<4 x i32> %x) {
 ; SSE-LABEL: combine_vec_lshr_known_zero1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
-; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    psrld $11, %xmm1
-; SSE-NEXT:    movdqa %xmm0, %xmm2
-; SSE-NEXT:    psrld $9, %xmm2
-; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    psrld $10, %xmm1
-; SSE-NEXT:    psrld $8, %xmm0
-; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_lshr_known_zero1:

Modified: llvm/trunk/test/CodeGen/X86/combine-udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-udiv.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-udiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll Sun Nov  4 09:31:27 2018
@@ -669,20 +669,15 @@ define <16 x i8> @combine_vec_udiv_nonun
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    pmullw %xmm0, %xmm2
 ; SSE41-NEXT:    psrlw $8, %xmm2
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    pmullw %xmm3, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    movdqa %xmm2, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    psllw $1, %xmm3
-; SSE41-NEXT:    psllw $8, %xmm2
-; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psllw $8, %xmm0
+; SSE41-NEXT:    pxor %xmm3, %xmm3
+; SSE41-NEXT:    packuswb %xmm3, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    psllw $1, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3,4,5,6,7]
 ; SSE41-NEXT:    psrlw $8, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    packuswb %xmm3, %xmm2
 ; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
@@ -693,21 +688,16 @@ define <16 x i8> @combine_vec_udiv_nonun
 ; AVX1-NEXT:    movl $171, %eax
 ; AVX1-NEXT:    vmovd %eax, %xmm1
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmullw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $8, %xmm1, %xmm2
+; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpsllw $1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
 ; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT:    vpsllw $1, %xmm3, %xmm3
-; AVX1-NEXT:    vpsllw $8, %xmm2, %xmm2
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Sun Nov  4 09:31:27 2018
@@ -91,17 +91,14 @@ define float @signbits_ashr_extract_sito
 ; X32-LABEL: signbits_ashr_extract_sitofp_1:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %eax
-; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
-; X32-NEXT:    vpsrlq $63, %xmm1, %xmm2
-; X32-NEXT:    vpsrlq $32, %xmm1, %xmm1
-; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-NEXT:    vpsrlq $63, %xmm0, %xmm2
+; X32-NEXT:    vpsrlq $63, %xmm0, %xmm1
 ; X32-NEXT:    vpsrlq $32, %xmm0, %xmm0
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,32768,0,0,1,0,0,0]
 ; X32-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X32-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
 ; X32-NEXT:    vmovd %xmm0, %eax
-; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
 ; X32-NEXT:    vmovss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax
@@ -128,18 +125,15 @@ define float @signbits_ashr_shl_extract_
 ; X32-LABEL: signbits_ashr_shl_extract_sitofp:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %eax
-; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
-; X32-NEXT:    vpsrlq $60, %xmm1, %xmm2
-; X32-NEXT:    vpsrlq $61, %xmm1, %xmm1
-; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-NEXT:    vpsrlq $60, %xmm0, %xmm2
+; X32-NEXT:    vpsrlq $60, %xmm0, %xmm1
 ; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; X32-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
 ; X32-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X32-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
 ; X32-NEXT:    vpsllq $20, %xmm0, %xmm0
 ; X32-NEXT:    vmovd %xmm0, %eax
-; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
 ; X32-NEXT:    vmovss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax
@@ -263,13 +257,10 @@ define float @signbits_ashr_sext_sextinr
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-NEXT:    vpsrlq $60, %xmm2, %xmm3
-; X32-NEXT:    vpsrlq $61, %xmm2, %xmm2
-; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
-; X32-NEXT:    vpsrlq $60, %xmm0, %xmm3
+; X32-NEXT:    vpsrlq $60, %xmm0, %xmm2
 ; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
 ; X32-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; X32-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
 ; X32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
@@ -281,7 +272,7 @@ define float @signbits_ashr_sext_sextinr
 ; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
 ; X32-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; X32-NEXT:    vmovd %xmm0, %eax
-; X32-NEXT:    vcvtsi2ssl %eax, %xmm4, %xmm0
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
 ; X32-NEXT:    vmovss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax
@@ -320,13 +311,10 @@ define float @signbits_ashr_sextvecinreg
 ; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushl %eax
-; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-NEXT:    vpsrlq $60, %xmm2, %xmm3
-; X32-NEXT:    vpsrlq $61, %xmm2, %xmm2
-; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
-; X32-NEXT:    vpsrlq $60, %xmm0, %xmm3
+; X32-NEXT:    vpsrlq $60, %xmm0, %xmm2
 ; X32-NEXT:    vpsrlq $61, %xmm0, %xmm0
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
 ; X32-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; X32-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
 ; X32-NEXT:    vpmovsxdq %xmm1, %xmm1
@@ -334,7 +322,7 @@ define float @signbits_ashr_sextvecinreg
 ; X32-NEXT:    vpor %xmm1, %xmm2, %xmm1
 ; X32-NEXT:    vpxor %xmm0, %xmm1, %xmm0
 ; X32-NEXT:    vmovd %xmm0, %eax
-; X32-NEXT:    vcvtsi2ssl %eax, %xmm4, %xmm0
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm3, %xmm0
 ; X32-NEXT:    vmovss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax
@@ -375,22 +363,19 @@ define <4 x float> @signbits_ashr_sext_s
 ; X32-NEXT:    subl $16, %esp
 ; X32-NEXT:    vpmovsxdq 16(%ebp), %xmm3
 ; X32-NEXT:    vpmovsxdq 8(%ebp), %xmm4
-; X32-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648]
+; X32-NEXT:    vextractf128 $1, %ymm2, %xmm5
 ; X32-NEXT:    vpsrlq $63, %xmm5, %xmm6
 ; X32-NEXT:    vpsrlq $33, %xmm5, %xmm5
 ; X32-NEXT:    vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
-; X32-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; X32-NEXT:    vpsrlq $63, %xmm6, %xmm7
-; X32-NEXT:    vpsrlq $33, %xmm6, %xmm6
-; X32-NEXT:    vpblendw {{.*#+}} xmm6 = xmm6[0,1,2,3],xmm7[4,5,6,7]
-; X32-NEXT:    vpxor %xmm5, %xmm6, %xmm6
-; X32-NEXT:    vpsubq %xmm5, %xmm6, %xmm6
+; X32-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,16384,0,0,1,0,0,0]
+; X32-NEXT:    vpxor %xmm6, %xmm5, %xmm5
+; X32-NEXT:    vpsubq %xmm6, %xmm5, %xmm5
 ; X32-NEXT:    vpsrlq $63, %xmm2, %xmm7
 ; X32-NEXT:    vpsrlq $33, %xmm2, %xmm2
 ; X32-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm7[4,5,6,7]
-; X32-NEXT:    vpxor %xmm5, %xmm2, %xmm2
-; X32-NEXT:    vpsubq %xmm5, %xmm2, %xmm2
-; X32-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
+; X32-NEXT:    vpxor %xmm6, %xmm2, %xmm2
+; X32-NEXT:    vpsubq %xmm6, %xmm2, %xmm2
+; X32-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
 ; X32-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
 ; X32-NEXT:    vextractf128 $1, %ymm1, %xmm4
 ; X32-NEXT:    vextractf128 $1, %ymm0, %xmm5

Modified: llvm/trunk/test/CodeGen/X86/pr35918.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr35918.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr35918.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr35918.ll Sun Nov  4 09:31:27 2018
@@ -11,9 +11,9 @@ define void @fetch_r16g16_snorm_unorm8(<
 ; X86-SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SKYLAKE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-SKYLAKE-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SKYLAKE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; X86-SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
 ; X86-SKYLAKE-NEXT:    vpsrad $16, %xmm0, %xmm0
+; X86-SKYLAKE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; X86-SKYLAKE-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-SKYLAKE-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
 ; X86-SKYLAKE-NEXT:    vpsrld $7, %xmm0, %xmm0
@@ -29,7 +29,7 @@ define void @fetch_r16g16_snorm_unorm8(<
 ; X86-SKX-NEXT:    subl $8, %esp
 ; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SKX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SKX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[u,u],zero,zero,xmm0[u,u]
+; X86-SKX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,u,0,1,u,u,2,3,u,u,u,u,u,u,u,u]
 ; X86-SKX-NEXT:    vpsrad $16, %xmm0, %xmm0
 ; X86-SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; X86-SKX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
@@ -50,9 +50,9 @@ define void @fetch_r16g16_snorm_unorm8(<
 ; X64-SKYLAKE-LABEL: fetch_r16g16_snorm_unorm8:
 ; X64-SKYLAKE:       # %bb.0: # %entry
 ; X64-SKYLAKE-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SKYLAKE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; X64-SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
 ; X64-SKYLAKE-NEXT:    vpsrad $16, %xmm0, %xmm0
+; X64-SKYLAKE-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; X64-SKYLAKE-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-SKYLAKE-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
 ; X64-SKYLAKE-NEXT:    vpsrld $7, %xmm0, %xmm0
@@ -65,7 +65,7 @@ define void @fetch_r16g16_snorm_unorm8(<
 ; X64-SKX-LABEL: fetch_r16g16_snorm_unorm8:
 ; X64-SKX:       # %bb.0: # %entry
 ; X64-SKX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SKX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[u,u],zero,zero,xmm0[u,u]
+; X64-SKX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,u,0,1,u,u,2,3,u,u,u,u,u,u,u,u]
 ; X64-SKX-NEXT:    vpsrad $16, %xmm0, %xmm0
 ; X64-SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; X64-SKX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0

Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll Sun Nov  4 09:31:27 2018
@@ -990,15 +990,11 @@ define <2 x i64> @constant_shift_v2i64(<
 ;
 ; X32-SSE-LABEL: constant_shift_v2i64:
 ; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlq $1, %xmm2
-; X32-SSE-NEXT:    psrlq $7, %xmm1
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrlq $1, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlq $1, %xmm1
 ; X32-SSE-NEXT:    psrlq $7, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X32-SSE-NEXT:    movapd {{.*#+}} xmm1 = [2.0E+0,7.2911220195563975E-304]
 ; X32-SSE-NEXT:    xorpd %xmm1, %xmm0
 ; X32-SSE-NEXT:    psubq %xmm1, %xmm0
 ; X32-SSE-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll Sun Nov  4 09:31:27 2018
@@ -1066,25 +1066,20 @@ define <4 x i64> @constant_shift_v4i64(<
 ;
 ; X32-AVX1-LABEL: constant_shift_v4i64:
 ; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; X32-AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vpsrlq $62, %xmm3, %xmm4
-; X32-AVX1-NEXT:    vpsrlq $31, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
-; X32-AVX1-NEXT:    vpxor %xmm2, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm1, %xmm3
-; X32-AVX1-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm3
+; X32-AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
+; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,1,0,2,0,0,0]
+; X32-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
+; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
 ; X32-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,16384,0,0,0,256]
+; X32-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
 ; X32-AVX2-LABEL: constant_shift_v4i64:

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll Sun Nov  4 09:31:27 2018
@@ -716,26 +716,26 @@ define <8 x i16> @trunc_usat_v8i64_v8i16
 define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) {
 ; SSE2-LABEL: trunc_usat_v8i32_v8i16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm0, %xmm4
-; SSE2-NEXT:    pxor %xmm3, %xmm4
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
-; SSE2-NEXT:    movdqa %xmm5, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
-; SSE2-NEXT:    pand %xmm6, %xmm0
-; SSE2-NEXT:    pandn %xmm2, %xmm6
-; SSE2-NEXT:    por %xmm6, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183]
+; SSE2-NEXT:    movdqa %xmm4, %xmm5
 ; SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
-; SSE2-NEXT:    pand %xmm5, %xmm1
-; SSE2-NEXT:    pandn %xmm2, %xmm5
-; SSE2-NEXT:    por %xmm1, %xmm5
-; SSE2-NEXT:    pslld $16, %xmm5
-; SSE2-NEXT:    psrad $16, %xmm5
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
+; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    pand %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    pslld $16, %xmm4
+; SSE2-NEXT:    psrad $16, %xmm4
 ; SSE2-NEXT:    pslld $16, %xmm0
 ; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    packssdw %xmm5, %xmm0
+; SSE2-NEXT:    packssdw %xmm4, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: trunc_usat_v8i32_v8i16:
@@ -826,36 +826,36 @@ define <8 x i16> @trunc_usat_v8i32_v8i16
 define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32> %a0) {
 ; SSE2-LABEL: trunc_usat_v16i32_v16i16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [65535,65535,65535,65535]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm2, %xmm6
-; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm8
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
 ; SSE2-NEXT:    movdqa %xmm5, %xmm1
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm7, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
 ; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm8, %xmm1
+; SSE2-NEXT:    pxor %xmm7, %xmm1
 ; SSE2-NEXT:    por %xmm2, %xmm1
-; SSE2-NEXT:    movdqa %xmm3, %xmm6
-; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
 ; SSE2-NEXT:    movdqa %xmm5, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm2
 ; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    pandn %xmm8, %xmm2
+; SSE2-NEXT:    pxor %xmm7, %xmm2
 ; SSE2-NEXT:    por %xmm3, %xmm2
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pxor %xmm7, %xmm3
-; SSE2-NEXT:    movdqa %xmm5, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
-; SSE2-NEXT:    pand %xmm6, %xmm0
-; SSE2-NEXT:    pandn %xmm8, %xmm6
-; SSE2-NEXT:    por %xmm6, %xmm0
-; SSE2-NEXT:    pxor %xmm4, %xmm7
-; SSE2-NEXT:    pcmpgtd %xmm7, %xmm5
-; SSE2-NEXT:    pand %xmm5, %xmm4
-; SSE2-NEXT:    pandn %xmm8, %xmm5
-; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm6, %xmm3
+; SSE2-NEXT:    movdqa %xmm5, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm0
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pxor %xmm8, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
 ; SSE2-NEXT:    pslld $16, %xmm5
 ; SSE2-NEXT:    psrad $16, %xmm5
 ; SSE2-NEXT:    pslld $16, %xmm0
@@ -870,36 +870,36 @@ define <16 x i16> @trunc_usat_v16i32_v16
 ;
 ; SSSE3-LABEL: trunc_usat_v16i32_v16i16:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [65535,65535,65535,65535]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT:    movdqa %xmm2, %xmm6
-; SSSE3-NEXT:    pxor %xmm7, %xmm6
+; SSSE3-NEXT:    movdqa %xmm1, %xmm8
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm7
+; SSSE3-NEXT:    pxor %xmm6, %xmm7
 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
 ; SSSE3-NEXT:    movdqa %xmm5, %xmm1
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm1
+; SSSE3-NEXT:    pcmpgtd %xmm7, %xmm1
+; SSSE3-NEXT:    pcmpeqd %xmm7, %xmm7
 ; SSSE3-NEXT:    pand %xmm1, %xmm2
-; SSSE3-NEXT:    pandn %xmm8, %xmm1
+; SSSE3-NEXT:    pxor %xmm7, %xmm1
 ; SSSE3-NEXT:    por %xmm2, %xmm1
-; SSSE3-NEXT:    movdqa %xmm3, %xmm6
-; SSSE3-NEXT:    pxor %xmm7, %xmm6
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pxor %xmm6, %xmm4
 ; SSSE3-NEXT:    movdqa %xmm5, %xmm2
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm2
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm2
 ; SSSE3-NEXT:    pand %xmm2, %xmm3
-; SSSE3-NEXT:    pandn %xmm8, %xmm2
+; SSSE3-NEXT:    pxor %xmm7, %xmm2
 ; SSSE3-NEXT:    por %xmm3, %xmm2
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm3
-; SSSE3-NEXT:    pxor %xmm7, %xmm3
-; SSSE3-NEXT:    movdqa %xmm5, %xmm6
-; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm6
-; SSSE3-NEXT:    pand %xmm6, %xmm0
-; SSSE3-NEXT:    pandn %xmm8, %xmm6
-; SSSE3-NEXT:    por %xmm6, %xmm0
-; SSSE3-NEXT:    pxor %xmm4, %xmm7
-; SSSE3-NEXT:    pcmpgtd %xmm7, %xmm5
-; SSSE3-NEXT:    pand %xmm5, %xmm4
-; SSSE3-NEXT:    pandn %xmm8, %xmm5
-; SSSE3-NEXT:    por %xmm4, %xmm5
+; SSSE3-NEXT:    pxor %xmm6, %xmm3
+; SSSE3-NEXT:    movdqa %xmm5, %xmm4
+; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSSE3-NEXT:    pand %xmm4, %xmm0
+; SSSE3-NEXT:    pxor %xmm7, %xmm4
+; SSSE3-NEXT:    por %xmm4, %xmm0
+; SSSE3-NEXT:    pxor %xmm8, %xmm6
+; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSSE3-NEXT:    pxor %xmm5, %xmm7
+; SSSE3-NEXT:    pand %xmm8, %xmm5
+; SSSE3-NEXT:    por %xmm7, %xmm5
 ; SSSE3-NEXT:    pslld $16, %xmm5
 ; SSSE3-NEXT:    psrad $16, %xmm5
 ; SSSE3-NEXT:    pslld $16, %xmm0




More information about the llvm-commits mailing list