[llvm] r268202 - [InstCombine][SSE] Added support to PSHUFB to shuffle combine to accept UNDEF elements.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun May 1 12:26:22 PDT 2016


Author: rksimon
Date: Sun May  1 14:26:21 2016
New Revision: 268202

URL: http://llvm.org/viewvc/llvm-project?rev=268202&view=rev
Log:
[InstCombine][SSE] Added support to PSHUFB to shuffle combine to accept UNDEF elements.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=268202&r1=268201&r2=268202&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sun May  1 14:26:21 2016
@@ -597,21 +597,27 @@ static Value *simplifyX86pshufb(const In
   if (!V)
     return nullptr;
 
-  auto *VTy = cast<VectorType>(V->getType());
-  unsigned NumElts = VTy->getNumElements();
+  auto *VecTy = cast<VectorType>(II.getType());
+  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
+  unsigned NumElts = VecTy->getNumElements();
   assert((NumElts == 16 || NumElts == 32) &&
          "Unexpected number of elements in shuffle mask!");
 
-  // Initialize the resulting shuffle mask to all zeroes.
-  uint32_t Indexes[32] = {0};
+  // Construct a shuffle mask from constant integers or UNDEFs.
+  Constant *Indexes[32] = { NULL };
 
   // Each byte in the shuffle control mask forms an index to permute the
   // corresponding byte in the destination operand.
   for (unsigned I = 0; I < NumElts; ++I) {
     Constant *COp = V->getAggregateElement(I);
-    if (!COp || !isa<ConstantInt>(COp))
+    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
       return nullptr;
 
+    if (isa<UndefValue>(COp)) {
+      Indexes[I] = UndefValue::get(MaskEltTy);
+      continue;
+    }
+
     int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
 
     // If the most significant bit (bit[7]) of each byte of the shuffle
@@ -619,20 +625,15 @@ static Value *simplifyX86pshufb(const In
     // The zero vector is in the right-hand side of the resulting
     // shufflevector.
 
-    // The value of each index is the least significant 4 bits of the
-    // shuffle control byte.
-    Indexes[I] = (Index < 0) ? NumElts : Index & 0xF;
+    // The value of each index for the high 128-bit lane is the least
+    // significant 4 bits of the respective shuffle control byte.
+    Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
+    Indexes[I] = ConstantInt::get(MaskEltTy, Index);
   }
 
-  // The value of each index for the high 128-bit lane is the least
-  // significant 4 bits of the respective shuffle control byte.
-  for (unsigned I = 16; I < NumElts; ++I)
-    Indexes[I] += I & 0xF0;
-
-  auto ShuffleMask =
-      ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, NumElts));
+  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
   auto V1 = II.getArgOperand(0);
-  auto V2 = Constant::getNullValue(II.getType());
+  auto V2 = Constant::getNullValue(VecTy);
   return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll?rev=268202&r1=268201&r2=268202&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll Sun May  1 14:26:21 2016
@@ -288,7 +288,7 @@ define <32 x i8> @permute3_avx2(<32 x i8
 
 define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
 ; CHECK-LABEL: @fold_with_undef_elts(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 16, i32 undef, i32 16, i32 1, i32 16, i32 undef, i32 16, i32 2, i32 16, i32 undef, i32 16, i32 3, i32 16, i32 undef, i32 16>
 ; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 ;
   %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
@@ -297,7 +297,7 @@ define <16 x i8> @fold_with_undef_elts(<
 
 define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
 ; CHECK-LABEL: @fold_with_undef_elts_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 undef, i32 32, i32 1, i32 32, i32 undef, i32 32, i32 2, i32 32, i32 undef, i32 32, i32 3, i32 32, i32 undef, i32 32, i32 16, i32 48, i32 undef, i32 48, i32 17, i32 48, i32 undef, i32 48, i32 18, i32 48, i32 undef, i32 48, i32 19, i32 48, i32 undef, i32 48>
 ; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
 ;
   %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
@@ -306,8 +306,7 @@ define <32 x i8> @fold_with_undef_elts_a
 
 define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
 ; CHECK-LABEL: @fold_with_allundef_elts(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <16 x i8> undef
 ;
   %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
   ret <16 x i8> %1
@@ -315,8 +314,7 @@ define <16 x i8> @fold_with_allundef_elt
 
 define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) {
 ; CHECK-LABEL: @fold_with_allundef_elts_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <32 x i8> undef
 ;
   %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
   ret <32 x i8> %1




More information about the llvm-commits mailing list