[llvm-commits] [llvm] r136453 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-vpermil.ll

Bruno Cardoso Lopes bruno.cardoso at gmail.com
Thu Jul 28 18:31:15 PDT 2011


Author: bruno
Date: Thu Jul 28 20:31:15 2011
New Revision: 136453

URL: http://llvm.org/viewvc/llvm-project?rev=136453&view=rev
Log:
Match VPERMIL masks more strictly and update the target specific mask
generation to always catch the weird cases.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx-vpermil.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=136453&r1=136452&r2=136453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 28 20:31:15 2011
@@ -3465,11 +3465,14 @@
     return false;
 
   // The mask on the high lane should be the same as the low. Actually,
-  // they can differ if any of the corresponding index in a lane is undef.
+  // they can differ if any of the corresponding index in a lane is undef
+  // and the other stays in range.
   int LaneSize = NumElts/NumLanes;
   for (int i = 0; i < LaneSize; ++i) {
     int HighElt = i+LaneSize;
-    if (Mask[i] < 0 || Mask[HighElt] < 0)
+    if (Mask[i] < 0 && (isUndefOrInRange(Mask[HighElt], LaneSize, NumElts)))
+      continue;
+    if (Mask[HighElt] < 0 && (isUndefOrInRange(Mask[i], 0, LaneSize)))
       continue;
     if (Mask[HighElt]-Mask[i] != LaneSize)
       return false;
@@ -3486,13 +3489,20 @@
 
   int NumElts = VT.getVectorNumElements();
   int NumLanes = VT.getSizeInBits()/128;
+  int LaneSize = NumElts/NumLanes;
 
+  // Although the mask is equal for both lanes do it twice to get the cases
+  // where a mask will match because the same mask element is undef on the
+  // first half but valid on the second. This would get pathological cases
+  // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
   unsigned Mask = 0;
-  for (int i = 0; i < NumElts/NumLanes /* lane size */; ++i) {
-    int MaskElt = SVOp->getMaskElt(i);
-    if (MaskElt < 0)
-      continue;
-    Mask |= MaskElt << (i*2);
+  for (int l = 0; l < NumLanes; ++l) {
+    for (int i = 0; i < LaneSize; ++i) {
+      int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
+      if (MaskElt < 0)
+        continue;
+      Mask |= MaskElt << (i*2);
+    }
   }
 
   return Mask;

Modified: llvm/trunk/test/CodeGen/X86/avx-vpermil.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vpermil.ll?rev=136453&r1=136452&r2=136453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vpermil.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vpermil.ll Thu Jul 28 20:31:15 2011
@@ -27,3 +27,13 @@
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
   ret <4 x i64> %shuffle
 }
+
+; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
+; target specific mask was correctly generated.
+; CHECK: vpermilps $-100
+define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 3, i32 1, i32 2, i32 4, i32 8, i32 5, i32 6>
+  ret <8 x float> %shuffle
+}
+





More information about the llvm-commits mailing list