[llvm] r268206 - [InstCombine][SSE] Added support to VPERMD/VPERMPS to shuffle combine to accept UNDEF elements.

Sun May 1 13:43:02 PDT 2016

Author: rksimon
Date: Sun May  1 15:43:02 2016
New Revision: 268206

URL: http://llvm.org/viewvc/llvm-project?rev=268206&view=rev
Log:
[InstCombine][SSE] Added support to VPERMD/VPERMPS to shuffle combine to accept UNDEF elements.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=268206&r1=268205&r2=268206&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sun May  1 15:43:02 2016
@@ -696,25 +696,30 @@ static Value *simplifyX86vpermv(const In
   if (!V)
     return nullptr;
 
-  VectorType *VecTy = cast<VectorType>(II.getType());
+  auto *VecTy = cast<VectorType>(II.getType());
+  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
   unsigned Size = VecTy->getNumElements();
   assert(Size == 8 && "Unexpected shuffle mask size");
 
-  // Initialize the resulting shuffle mask to all zeroes.
-  uint32_t Indexes[8] = {0};
+  // Construct a shuffle mask from constant integers or UNDEFs.
+  Constant *Indexes[8] = {NULL};
 
   for (unsigned I = 0; I < Size; ++I) {
     Constant *COp = V->getAggregateElement(I);
-    if (!COp || !isa<ConstantInt>(COp))
+    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
       return nullptr;
 
+    if (isa<UndefValue>(COp)) {
+      Indexes[I] = UndefValue::get(MaskEltTy);
+      continue;
+    }
+
     APInt Index = cast<ConstantInt>(COp)->getValue();
-    Index = Index.getLoBits(3);
-    Indexes[I] = (uint32_t)Index.getZExtValue();
+    Index = Index.zextOrTrunc(32).getLoBits(3);
+    Indexes[I] = ConstantInt::get(MaskEltTy, Index);
   }
 
-  auto ShuffleMask =
-      ConstantDataVector::get(II.getContext(), makeArrayRef(Indexes, Size));
+  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
   auto V1 = II.getArgOperand(0);
   auto V2 = UndefValue::get(VecTy);
   return Builder.CreateShuffleVector(V1, V2, ShuffleMask);

Modified: llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll?rev=268206&r1=268205&r2=268206&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll Sun May  1 15:43:02 2016
@@ -61,12 +61,12 @@ define <8 x float> @shuffle_test_vpermps
   ret <8 x float> %a
 }
 
-; FIXME: Verify that instcombine is able to fold constant shuffles with undef mask elements.
+; Verify that instcombine is able to fold constant shuffles with undef mask elements.
 
 define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
 ; CHECK-LABEL: @undef_test_vpermd(
-; CHECK-NEXT:    [[A:%.*]] = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A:%.*]]0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-; CHECK-NEXT:    ret <8 x i32> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
   ret <8 x i32> %a
@@ -74,8 +74,8 @@ define <8 x i32> @undef_test_vpermd(<8 x
 
 define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
 ; CHECK-LABEL: @undef_test_vpermps(
-; CHECK-NEXT:    [[A:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A:%.*]]0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-; CHECK-NEXT:    ret <8 x float> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
   ret <8 x float> %a