[llvm] r295613 - [X86][SSE] Use getTargetConstantBitsFromNode to find zeroable shuffle elements.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 19 11:40:31 PST 2017


Author: rksimon
Date: Sun Feb 19 13:40:31 2017
New Revision: 295613

URL: http://llvm.org/viewvc/llvm-project?rev=295613&view=rev
Log:
[X86][SSE] Use getTargetConstantBitsFromNode to find zeroable shuffle elements.

Replaces existing approach that could only search BUILD_VECTOR nodes.

Requires getTargetConstantBitsFromNode to discriminate cases with all/partial UNDEF bits in each element - this should also be useful when we get around to supporting getTargetShuffleMaskIndices with UNDEF elements. 

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=295613&r1=295612&r2=295613&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 19 13:40:31 2017
@@ -5154,7 +5154,8 @@ static const Constant *getTargetConstant
 static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
                                           SmallBitVector &UndefElts,
                                           SmallVectorImpl<APInt> &EltBits,
-                                          bool AllowUndefs = true) {
+                                          bool AllowWholeUndefs = true,
+                                          bool AllowPartialUndefs = true) {
   assert(UndefElts.empty() && "Expected an empty UndefElts vector");
   assert(EltBits.empty() && "Expected an empty EltBits vector");
 
@@ -5175,6 +5176,7 @@ static bool getTargetConstantBitsFromNod
   // Split the undef/constant single bitset data into the target elements.
   auto SplitBitData = [&]() {
     // Don't split if we don't allow undef bits.
+    bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
     if (UndefBits.getBoolValue() && !AllowUndefs)
       return false;
 
@@ -5185,13 +5187,19 @@ static bool getTargetConstantBitsFromNod
       APInt UndefEltBits = UndefBits.lshr(i * EltSizeInBits);
       UndefEltBits = UndefEltBits.zextOrTrunc(EltSizeInBits);
 
-      // Only treat an element as UNDEF if all bits are UNDEF, otherwise
-      // treat it as zero.
+      // Only treat an element as UNDEF if all bits are UNDEF.
       if (UndefEltBits.isAllOnesValue()) {
+        if (!AllowWholeUndefs)
+          return false;
         UndefElts[i] = true;
         continue;
       }
 
+      // If only some bits are UNDEF then treat them as zero (or bail if not
+      // supported).
+      if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
+        return false;
+
       APInt Bits = MaskBits.lshr(i * EltSizeInBits);
       Bits = Bits.zextOrTrunc(EltSizeInBits);
       EltBits[i] = Bits.getZExtValue();
@@ -5293,7 +5301,8 @@ static bool getTargetShuffleMaskIndices(
   // Extract the raw target constant bits.
   // FIXME: We currently don't support UNDEF bits or mask entries.
   if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
-                                     EltBits, /* AllowUndefs */ false))
+                                     EltBits, /* AllowWholeUndefs */ false,
+                                     /* AllowPartialUndefs */ false))
     return false;
 
   // Insert the extracted elements into the mask.
@@ -5600,6 +5609,19 @@ static bool setTargetShuffleZeroElements
   V1 = peekThroughBitcasts(V1);
   V2 = peekThroughBitcasts(V2);
 
+  assert((VT.getSizeInBits() % Mask.size()) == 0 &&
+         "Illegal split of shuffle value type");
+  unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
+
+  // Extract known constant input data.
+  SmallBitVector UndefSrcElts[2];
+  SmallVector<APInt, 32> SrcEltBits[2];
+  bool IsSrcConstant[2] = {
+      getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
+                                    SrcEltBits[0], true, false),
+      getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
+                                    SrcEltBits[1], true, false)};
+
   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
     int M = Mask[i];
 
@@ -5608,6 +5630,7 @@ static bool setTargetShuffleZeroElements
       continue;
 
     // Determine shuffle input and normalize the mask.
+    unsigned SrcIdx = M / Size;
     SDValue V = M < Size ? V1 : V2;
     M %= Size;
 
@@ -5632,39 +5655,12 @@ static bool setTargetShuffleZeroElements
       continue;
     }
 
-    // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
-    if (V.getOpcode() != ISD::BUILD_VECTOR)
-      continue;
-
-    // If the BUILD_VECTOR has fewer elements then the (larger) source
-    // element must be UNDEF/ZERO.
-    // TODO: Is it worth testing the individual bits of a constant?
-    if ((Size % V.getNumOperands()) == 0) {
-      int Scale = Size / V->getNumOperands();
-      SDValue Op = V.getOperand(M / Scale);
-      if (Op.isUndef())
-        Mask[i] = SM_SentinelUndef;
-      else if (X86::isZeroNode(Op))
-        Mask[i] = SM_SentinelZero;
-      continue;
-    }
-
-    // If the BUILD_VECTOR has more elements then all the (smaller) source
-    // elements must be all UNDEF or all ZERO.
-    if ((V.getNumOperands() % Size) == 0) {
-      int Scale = V->getNumOperands() / Size;
-      bool AllUndef = true;
-      bool AllZero = true;
-      for (int j = 0; j < Scale; ++j) {
-        SDValue Op = V.getOperand((M * Scale) + j);
-        AllUndef &= Op.isUndef();
-        AllZero &= X86::isZeroNode(Op);
-      }
-      if (AllUndef)
+    // Attempt to extract from the source's constant bits.
+    if (IsSrcConstant[SrcIdx]) {
+      if (UndefSrcElts[SrcIdx][M])
         Mask[i] = SM_SentinelUndef;
-      else if (AllZero)
+      else if (SrcEltBits[SrcIdx][M] == 0)
         Mask[i] = SM_SentinelZero;
-      continue;
     }
   }
 

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=295613&r1=295612&r2=295613&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sun Feb 19 13:40:31 2017
@@ -441,16 +441,14 @@ define <4 x float> @PR31296(i8* %in) {
 ; X32:       # BB#0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    vmovaps {{.*#+}} xmm1 = <0,1,u,u>
-; X32-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
+; X32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: PR31296:
 ; X64:       # BB#0: # %entry
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    vmovq %rax, %xmm0
-; X64-NEXT:    vmovaps {{.*#+}} xmm1 = <0,1,u,u>
-; X64-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
+; X64-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
 ; X64-NEXT:    retq
 entry:
   %0 = getelementptr i8, i8* %in, i32 0




More information about the llvm-commits mailing list