[llvm] r244341 - [InstCombine] Fix SSE2/AVX2 vector logical shift by constant

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 7 11:22:50 PDT 2015


Author: rksimon
Date: Fri Aug  7 13:22:50 2015
New Revision: 244341

URL: http://llvm.org/viewvc/llvm-project?rev=244341&view=rev
Log:
[InstCombine] Fix SSE2/AVX2 vector logical shift by constant

This patch fixes the sse2/avx2 vector shift by constant instcombine call to correctly deal with the fact that the shift amount is formed from the entire lower 64-bit and not just the lowest element as it currently assumes.

e.g.

%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)

In this case, (V)PSRLD doesn't perform a lshr by 15 but in fact attempts to shift by 64424509455 ((15 << 32) | 15) - giving a zero result.

In addition, this review also recognizes shift-by-zero from a ConstantAggregateZero type (PR23821).

Differential Revision: http://reviews.llvm.org/D11760

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=244341&r1=244340&r2=244341&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Fri Aug  7 13:22:50 2015
@@ -200,33 +200,56 @@ Instruction *InstCombiner::SimplifyMemSe
 static Value *SimplifyX86immshift(const IntrinsicInst &II,
                                   InstCombiner::BuilderTy &Builder,
                                   bool ShiftLeft) {
-  // Simplify if count is constant. To 0 if >= BitWidth,
-  // otherwise to shl/lshr.
-  auto CDV = dyn_cast<ConstantDataVector>(II.getArgOperand(1));
-  auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(1));
-  if (!CDV && !CInt)
+  // Simplify if count is constant.
+  auto Arg1 = II.getArgOperand(1);
+  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
+  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
+  auto CInt = dyn_cast<ConstantInt>(Arg1);
+  if (!CAZ && !CDV && !CInt)
     return nullptr;
-  ConstantInt *Count;
-  if (CDV)
-    Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
-  else
-    Count = CInt;
+
+  APInt Count(64, 0);
+  if (CDV) {
+    // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+    // operand to compute the shift amount.
+    auto VT = cast<VectorType>(CDV->getType());
+    unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
+    assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
+    unsigned NumSubElts = 64 / BitWidth;
+
+    // Concatenate the sub-elements to create the 64-bit value.
+    for (unsigned i = 0; i != NumSubElts; ++i) {
+      unsigned SubEltIdx = (NumSubElts - 1) - i;
+      auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+      Count = Count.shl(BitWidth);
+      Count |= SubElt->getValue().zextOrTrunc(64);
+    }
+  }
+  else if (CInt)
+    Count = CInt->getValue();
 
   auto Vec = II.getArgOperand(0);
   auto VT = cast<VectorType>(Vec->getType());
   auto SVT = VT->getElementType();
-  if (Count->getZExtValue() > (SVT->getPrimitiveSizeInBits() - 1))
-    return ConstantAggregateZero::get(VT);
-
   unsigned VWidth = VT->getNumElements();
+  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+  // If shift-by-zero then just return the original value.
+  if (Count == 0)
+    return Vec;
+
+  // Handle cases when Shift >= BitWidth - just return zero.
+  if (Count.uge(BitWidth))
+    return ConstantAggregateZero::get(VT);
 
   // Get a constant vector of the same type as the first operand.
-  auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
+  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
 
   if (ShiftLeft)
-    return Builder.CreateShl(Vec, Builder.CreateVectorSplat(VWidth, VTCI));
+    return Builder.CreateShl(Vec, ShiftVec);
 
-  return Builder.CreateLShr(Vec, Builder.CreateVectorSplat(VWidth, VTCI));
+  return Builder.CreateLShr(Vec, ShiftVec);
 }
 
 static Value *SimplifyX86extend(const IntrinsicInst &II,

Modified: llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll?rev=244341&r1=244340&r2=244341&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll Fri Aug  7 13:22:50 2015
@@ -7,132 +7,132 @@ target datalayout = "e-m:e-i64:64-f80:12
 
 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_w_0
-; CHECK: ret <8 x i16> %v
+; CHECK-NEXT: ret <8 x i16> %v
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_w_15
-; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
   ret <8 x i16> %1
 }
 
 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_d_0
-; CHECK: ret <4 x i32> %v
+; CHECK-NEXT: ret <4 x i32> %v
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_d_15
-; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
   ret <4 x i32> %1
 }
 
 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_q_0
-; CHECK: ret <2 x i64> %v
+; CHECK-NEXT: ret <2 x i64> %v
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_q_15
-; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrli_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
   ret <2 x i64> %1
 }
 
 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_w_0
-; CHECK: ret <16 x i16> %v
+; CHECK-NEXT: ret <16 x i16> %v
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
   ret <16 x i16> %1
 }
 
 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_w_15
-; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
   ret <16 x i16> %1
 }
 
 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
   ret <16 x i16> %1
 }
 
 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_d_0
-; CHECK: ret <8 x i32> %v
+; CHECK-NEXT: ret <8 x i32> %v
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
   ret <8 x i32> %1
 }
 
 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_d_15
-; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
   ret <8 x i32> %1
 }
 
 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
   ret <8 x i32> %1
 }
 
 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_q_0
-; CHECK: ret <4 x i64> %v
+; CHECK-NEXT: ret <4 x i64> %v
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
   ret <4 x i64> %1
 }
 
 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_q_15
-; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
   ret <4 x i64> %1
 }
 
 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrli_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
   ret <4 x i64> %1
 }
@@ -143,132 +143,132 @@ define <4 x i64> @avx2_psrli_q_64(<4 x i
 
 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_w_0
-; CHECK: ret <8 x i16> %v
+; CHECK-NEXT: ret <8 x i16> %v
   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_w_15
-; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
   ret <8 x i16> %1
 }
 
 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_d_0
-; CHECK: ret <4 x i32> %v
+; CHECK-NEXT: ret <4 x i32> %v
   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_d_15
-; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
   ret <4 x i32> %1
 }
 
 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_q_0
-; CHECK: ret <2 x i64> %v
+; CHECK-NEXT: ret <2 x i64> %v
   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_q_15
-; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_pslli_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
   ret <2 x i64> %1
 }
 
 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_w_0
-; CHECK: ret <16 x i16> %v
+; CHECK-NEXT: ret <16 x i16> %v
   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
   ret <16 x i16> %1
 }
 
 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_w_15
-; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
   ret <16 x i16> %1
 }
 
 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
   ret <16 x i16> %1
 }
 
 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_d_0
-; CHECK: ret <8 x i32> %v
+; CHECK-NEXT: ret <8 x i32> %v
   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
   ret <8 x i32> %1
 }
 
 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_d_15
-; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
   ret <8 x i32> %1
 }
 
 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
   ret <8 x i32> %1
 }
 
 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_q_0
-; CHECK: ret <4 x i64> %v
+; CHECK-NEXT: ret <4 x i64> %v
   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
   ret <4 x i64> %1
 }
 
 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_q_15
-; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
   ret <4 x i64> %1
 }
 
 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_pslli_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
   ret <4 x i64> %1
 }
@@ -277,92 +277,162 @@ define <4 x i64> @avx2_pslli_q_64(<4 x i
 ; LSHR - Constant Vector
 ;
 
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrl_w_15
-; CHECK: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <8 x i16> %1
 }
 
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrl_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <8 x i16> %1
 }
 
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrl_d_15
-; CHECK: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
   ret <4 x i32> %1
 }
 
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrl_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
   ret <4 x i32> %1
 }
 
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psrl_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrl_q_15
-; CHECK: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psrl_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
   ret <2 x i64> %1
 }
 
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrl_w_15
-; CHECK: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <16 x i16> %1
 }
 
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrl_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <16 x i16> %1
 }
 
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrl_d_15
-; CHECK: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
   ret <8 x i32> %1
 }
 
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrl_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
   ret <8 x i32> %1
 }
 
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psrl_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrl_q_15
-; CHECK: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
   ret <4 x i64> %1
 }
 
 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psrl_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
   ret <4 x i64> %1
 }
@@ -371,92 +441,162 @@ define <4 x i64> @avx2_psrl_q_64(<4 x i6
 ; SHL - Constant Vector
 ;
 
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psll_w_15
-; CHECK: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <8 x i16> %1
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <8 x i16> %1
 }
 
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psll_w_64
-; CHECK: ret <8 x i16> zeroinitializer
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <8 x i16> %1
 }
 
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psll_d_15
-; CHECK: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <4 x i32> %1
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
   ret <4 x i32> %1
 }
 
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psll_d_64
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
   ret <4 x i32> %1
 }
 
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @sse2_psll_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psll_q_15
-; CHECK: %1 = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK: ret <2 x i64> %1
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @sse2_psll_q_64
-; CHECK: ret <2 x i64> zeroinitializer
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
   ret <2 x i64> %1
 }
 
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psll_w_15
-; CHECK: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK: ret <16 x i16> %1
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <16 x i16> %1
 }
 
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psll_w_64
-; CHECK: ret <16 x i16> zeroinitializer
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
   ret <16 x i16> %1
 }
 
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psll_d_15
-; CHECK: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK: ret <8 x i32> %1
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
   ret <8 x i32> %1
 }
 
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psll_d_64
-; CHECK: ret <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
   ret <8 x i32> %1
 }
 
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) nounwind readnone uwtable {
+; CHECK-LABEL: @avx2_psll_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psll_q_15
-; CHECK: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK: ret <4 x i64> %1
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
   ret <4 x i64> %1
 }
 
 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) nounwind readnone uwtable {
 ; CHECK-LABEL: @avx2_psll_q_64
-; CHECK: ret <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
   ret <4 x i64> %1
 }
@@ -660,6 +800,7 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w
 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1




More information about the llvm-commits mailing list