[llvm] r297381 - [X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037).
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 9 06:06:39 PST 2017
Author: rksimon
Date: Thu Mar 9 08:06:39 2017
New Revision: 297381
URL: http://llvm.org/viewvc/llvm-project?rev=297381&view=rev
Log:
[X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037).
If the constants are already the correct size, we can copy them directly into the shuffle mask.
Modified:
llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp?rev=297381&r1=297380&r2=297381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp Thu Mar 9 08:06:39 2017
@@ -49,6 +49,33 @@ static bool extractConstantMask(const Co
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumCstElts = CstTy->getVectorNumElements();
+ assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
+ "Unaligned shuffle mask size");
+
+ unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
+ UndefElts = APInt(NumMaskElts, 0);
+ RawMask.resize(NumMaskElts, 0);
+
+ // Fast path - if the constants match the mask size then copy direct.
+ if (MaskEltSizeInBits == CstEltSizeInBits) {
+ assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return false;
+
+ if (isa<UndefValue>(COp)) {
+ UndefElts.setBit(i);
+ RawMask[i] = 0;
+ continue;
+ }
+
+ auto *Elt = cast<ConstantInt>(COp);
+ RawMask[i] = Elt->getValue().getZExtValue();
+ }
+ return true;
+ }
+
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(CstSizeInBits, 0);
APInt MaskBits(CstSizeInBits, 0);
@@ -69,13 +96,6 @@ static bool extractConstantMask(const Co
}
// Now extract the undef/constant bit data into the raw shuffle masks.
- assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
- "Unaligned shuffle mask size");
-
- unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
- UndefElts = APInt(NumMaskElts, 0);
- RawMask.resize(NumMaskElts, 0);
-
for (unsigned i = 0; i != NumMaskElts; ++i) {
unsigned BitOffset = i * MaskEltSizeInBits;
APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
More information about the llvm-commits
mailing list