[llvm] r297381 - [X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037).

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 9 06:06:39 PST 2017


Author: rksimon
Date: Thu Mar  9 08:06:39 2017
New Revision: 297381

URL: http://llvm.org/viewvc/llvm-project?rev=297381&view=rev
Log:
[X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037).

If the constants are already the correct size, we can copy them directly into the shuffle mask.

Modified:
    llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp

Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp?rev=297381&r1=297380&r2=297381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp Thu Mar  9 08:06:39 2017
@@ -49,6 +49,33 @@ static bool extractConstantMask(const Co
   unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
   unsigned NumCstElts = CstTy->getVectorNumElements();
 
+  assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
+         "Unaligned shuffle mask size");
+
+  unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
+  UndefElts = APInt(NumMaskElts, 0);
+  RawMask.resize(NumMaskElts, 0);
+
+  // Fast path - if the constants match the mask size then copy direct.
+  if (MaskEltSizeInBits == CstEltSizeInBits) {
+    assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
+    for (unsigned i = 0; i != NumMaskElts; ++i) {
+      Constant *COp = C->getAggregateElement(i);
+      if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+        return false;
+
+      if (isa<UndefValue>(COp)) {
+        UndefElts.setBit(i);
+        RawMask[i] = 0;
+        continue;
+      }
+
+      auto *Elt = cast<ConstantInt>(COp);
+      RawMask[i] = Elt->getValue().getZExtValue();
+    }
+    return true;
+  }
+
   // Extract all the undef/constant element data and pack into single bitsets.
   APInt UndefBits(CstSizeInBits, 0);
   APInt MaskBits(CstSizeInBits, 0);
@@ -69,13 +96,6 @@ static bool extractConstantMask(const Co
   }
 
   // Now extract the undef/constant bit data into the raw shuffle masks.
-  assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
-         "Unaligned shuffle mask size");
-
-  unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
-  UndefElts = APInt(NumMaskElts, 0);
-  RawMask.resize(NumMaskElts, 0);
-
   for (unsigned i = 0; i != NumMaskElts; ++i) {
     unsigned BitOffset = i * MaskEltSizeInBits;
     APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);




More information about the llvm-commits mailing list