[llvm] r345018 - [X86][SSE] Update raw mask shuffle decoders to handle UNDEF mask elts

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 23 04:33:38 PDT 2018


Author: rksimon
Date: Tue Oct 23 04:33:38 2018
New Revision: 345018

URL: http://llvm.org/viewvc/llvm-project?rev=345018&view=rev
Log:
[X86][SSE] Update raw mask shuffle decoders to handle UNDEF mask elts

Matches the approach taken in the constant pool shuffle decoders, and uses an UndefElts mask instead of uint64_t(-1) raw mask values, which doesn't work safely for i32/i64 shuffle mask sizes (as the -1 value is legal).

This allows us to remove the constant pool shuffle decoders from most of the getTargetShuffleMask variable shuffle cases (X86ISD::VPERMV3 will be handled in a future commit).

Modified:
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=345018&r1=345017&r2=345018&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Tue Oct 23 04:33:38 2018
@@ -304,12 +304,12 @@ void DecodeVPERM2X128Mask(unsigned NumEl
   }
 }
 
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   for (int i = 0, e = RawMask.size(); i < e; ++i) {
     uint64_t M = RawMask[i];
-    if (M == (uint64_t)SM_SentinelUndef) {
-      ShuffleMask.push_back(M);
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
     // For 256/512-bit vectors the base of the shuffle is the 128-bit
@@ -336,7 +336,7 @@ void DecodeBLENDMask(unsigned NumElts, u
   }
 }
 
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
 
@@ -354,12 +354,12 @@ void DecodeVPPERMMask(ArrayRef<uint64_t>
   // 6 - Most significant bit of source byte replicated in all bit positions.
   // 7 - Invert most significant bit of source byte and replicate in all bit positions.
   for (int i = 0, e = RawMask.size(); i < e; ++i) {
-    uint64_t M = RawMask[i];
-    if (M == (uint64_t)SM_SentinelUndef) {
-      ShuffleMask.push_back(M);
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
 
+    uint64_t M = RawMask[i];
     uint64_t PermuteOp = (M >> 5) & 0x7;
     if (PermuteOp == 4) {
       ShuffleMask.push_back(SM_SentinelZero);
@@ -490,7 +490,7 @@ void DecodeINSERTQIMask(unsigned NumElts
 }
 
 void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
-                        ArrayRef<uint64_t> RawMask,
+                        ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                         SmallVectorImpl<int> &ShuffleMask) {
   unsigned VecSize = NumElts * ScalarBits;
   unsigned NumLanes = VecSize / 128;
@@ -500,6 +500,10 @@ void DecodeVPERMILPMask(unsigned NumElts
   assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
 
   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
     uint64_t M = RawMask[i];
     M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
     unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
@@ -508,7 +512,7 @@ void DecodeVPERMILPMask(unsigned NumElts
 }
 
 void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
-                         ArrayRef<uint64_t> RawMask,
+                         ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                          SmallVectorImpl<int> &ShuffleMask) {
   unsigned VecSize = NumElts * ScalarBits;
   unsigned NumLanes = VecSize / 128;
@@ -518,6 +522,11 @@ void DecodeVPERMIL2PMask(unsigned NumElt
   assert((NumElts == RawMask.size()) && "Unexpected mask size");
 
   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+
     // VPERMIL2 Operation.
     // Bits[3] - Match Bit.
     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
@@ -548,19 +557,29 @@ void DecodeVPERMIL2PMask(unsigned NumElt
   }
 }
 
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   uint64_t EltMaskSize = RawMask.size() - 1;
-  for (auto M : RawMask) {
+  for (int i = 0, e = RawMask.size(); i != e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+    uint64_t M = RawMask[i];
     M &= EltMaskSize;
     ShuffleMask.push_back((int)M);
   }
 }
 
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask) {
   uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
-  for (auto M : RawMask) {
+  for (int i = 0, e = RawMask.size(); i != e; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+    uint64_t M = RawMask[i];
     M &= EltMaskSize;
     ShuffleMask.push_back((int)M);
   }

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=345018&r1=345017&r2=345018&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Tue Oct 23 04:33:38 2018
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
 #define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 
 //===----------------------------------------------------------------------===//
@@ -108,7 +109,7 @@ void DecodeSubVectorBroadcast(unsigned D
 
 /// Decode a PSHUFB mask from a raw array of constants such as from
 /// BUILD_VECTOR.
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a BLEND immediate mask into a shuffle mask.
@@ -131,7 +132,7 @@ void DecodeVPERMMask(unsigned NumElts, u
 /// BUILD_VECTOR.
 /// This can only basic masks (permutes + zeros), not any of the other
 /// operations that VPPERM can perform.
-void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a zero extension instruction as a shuffle mask.
@@ -156,20 +157,20 @@ void DecodeINSERTQIMask(unsigned NumElts
 
 /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
 void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
-                        ArrayRef<uint64_t> RawMask,
+                        ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                         SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
 void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
-                         ArrayRef<uint64_t> RawMask,
+                         ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                          SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
-void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
                       SmallVectorImpl<int> &ShuffleMask);
 } // llvm namespace
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=345018&r1=345017&r2=345018&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Oct 23 04:33:38 2018
@@ -5825,14 +5825,12 @@ static bool isConstantSplat(SDValue Op,
 
 static bool getTargetShuffleMaskIndices(SDValue MaskNode,
                                         unsigned MaskEltSizeInBits,
-                                        SmallVectorImpl<uint64_t> &RawMask) {
-  APInt UndefElts;
-  SmallVector<APInt, 64> EltBits;
-
+                                        SmallVectorImpl<uint64_t> &RawMask,
+                                        APInt &UndefElts) {
   // Extract the raw target constant bits.
-  // FIXME: We currently don't support UNDEF bits or mask entries.
+  SmallVector<APInt, 64> EltBits;
   if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
-                                     EltBits, /* AllowWholeUndefs */ false,
+                                     EltBits, /* AllowWholeUndefs */ true,
                                      /* AllowPartialUndefs */ false))
     return false;
 
@@ -5873,6 +5871,8 @@ static bool getTargetShuffleMask(SDNode
                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
   unsigned NumElems = VT.getVectorNumElements();
   unsigned MaskEltSize = VT.getScalarSizeInBits();
+  SmallVector<uint64_t, 32> RawMask;
+  APInt RawUndefs;
   SDValue ImmN;
 
   assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
@@ -6025,13 +6025,9 @@ static bool getTargetShuffleMask(SDNode
     assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
     IsUnary = true;
     SDValue MaskNode = N->getOperand(1);
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
-      DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMILPMask(C, MaskEltSize, VT.getSizeInBits(), Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                    RawUndefs)) {
+      DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -6042,13 +6038,8 @@ static bool getTargetShuffleMask(SDNode
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
     IsUnary = true;
     SDValue MaskNode = N->getOperand(1);
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
-      DecodePSHUFBMask(RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodePSHUFBMask(C, VT.getSizeInBits(), Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+      DecodePSHUFBMask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -6104,13 +6095,10 @@ static bool getTargetShuffleMask(SDNode
     SDValue CtrlNode = N->getOperand(3);
     if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
       unsigned CtrlImm = CtrlOp->getZExtValue();
-      SmallVector<uint64_t, 32> RawMask;
-      if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
-        DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, Mask);
-        break;
-      }
-      if (auto *C = getTargetConstantFromNode(MaskNode)) {
-        DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, VT.getSizeInBits(), Mask);
+      if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                      RawUndefs)) {
+        DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs,
+                            Mask);
         break;
       }
     }
@@ -6121,13 +6109,8 @@ static bool getTargetShuffleMask(SDNode
     assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     SDValue MaskNode = N->getOperand(2);
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
-      DecodeVPPERMMask(RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPPERMMask(C, VT.getSizeInBits(), Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) {
+      DecodeVPPERMMask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;
@@ -6138,13 +6121,9 @@ static bool getTargetShuffleMask(SDNode
     // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
     Ops.push_back(N->getOperand(1));
     SDValue MaskNode = N->getOperand(0);
-    SmallVector<uint64_t, 32> RawMask;
-    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
-      DecodeVPERMVMask(RawMask, Mask);
-      break;
-    }
-    if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMVMask(C, MaskEltSize, VT.getSizeInBits(), Mask);
+    if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
+                                    RawUndefs)) {
+      DecodeVPERMVMask(RawMask, RawUndefs, Mask);
       break;
     }
     return false;




More information about the llvm-commits mailing list