[llvm] r334258 - [X86] Improve some shuffle decoding code to remove a conditional from a loop and reduce the number of temporary variables. NFCI
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 7 18:09:32 PDT 2018
Author: ctopper
Date: Thu Jun 7 18:09:31 2018
New Revision: 334258
URL: http://llvm.org/viewvc/llvm-project?rev=334258&view=rev
Log:
[X86] Improve some shuffle decoding code to remove a conditional from a loop and reduce the number of temporary variables. NFCI
The NumControlBits variable was definitely sketchy. I think that only worked because the expected value was 1 or 2 and the number of lanes was 2 or 4. Had their been 8 lanes the number of bits should have been 3 not 4 as the previous code would have given.
Modified:
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=334258&r1=334257&r2=334258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Thu Jun 7 18:09:31 2018
@@ -152,13 +152,12 @@ void DecodePSHUFMask(unsigned NumElts, u
if (NumLanes == 0) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
- unsigned NewImm = Imm;
+ uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
- ShuffleMask.push_back(NewImm % NumLaneElts + l);
- NewImm /= NumLaneElts;
+ ShuffleMask.push_back(SplatImm % NumLaneElts + l);
+ SplatImm /= NumLaneElts;
}
- if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}
@@ -281,16 +280,15 @@ void decodeVSHUF64x2FamilyMask(unsigned
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElementsInLane = 128 / ScalarSize;
unsigned NumLanes = NumElts / NumElementsInLane;
- unsigned ControlBitsMask = NumLanes - 1;
- unsigned NumControlBits = NumLanes / 2;
- for (unsigned l = 0; l != NumLanes; ++l) {
- unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+ for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
+ unsigned Index = (Imm % NumLanes) * NumElementsInLane;
+ Imm /= NumLanes; // Discard the bits we just used.
// We actually need the other source.
- if (l >= NumLanes / 2)
- LaneMask += NumLanes;
+ if (l >= (NumElts / 2))
+ Index += NumElts;
for (unsigned i = 0; i != NumElementsInLane; ++i)
- ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+ ShuffleMask.push_back(Index + i);
}
}
More information about the llvm-commits
mailing list