[llvm] r334258 - [X86] Improve some shuffle decoding code to remove a conditional from a loop and reduce the number of temporary variables. NFCI

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 7 18:09:32 PDT 2018


Author: ctopper
Date: Thu Jun  7 18:09:31 2018
New Revision: 334258

URL: http://llvm.org/viewvc/llvm-project?rev=334258&view=rev
Log:
[X86] Improve some shuffle decoding code to remove a conditional from a loop and reduce the number of temporary variables. NFCI

The NumControlBits variable was definitely sketchy. I think that only worked because the expected value was 1 or 2 and the number of lanes was 2 or 4. Had their been 8 lanes the number of bits should have been 3 not 4 as the previous code would have given.

Modified:
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=334258&r1=334257&r2=334258&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Thu Jun  7 18:09:31 2018
@@ -152,13 +152,12 @@ void DecodePSHUFMask(unsigned NumElts, u
   if (NumLanes == 0) NumLanes = 1;  // Handle MMX
   unsigned NumLaneElts = NumElts / NumLanes;
 
-  unsigned NewImm = Imm;
+  uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
     for (unsigned i = 0; i != NumLaneElts; ++i) {
-      ShuffleMask.push_back(NewImm % NumLaneElts + l);
-      NewImm /= NumLaneElts;
+      ShuffleMask.push_back(SplatImm % NumLaneElts + l);
+      SplatImm /= NumLaneElts;
     }
-    if (NumLaneElts == 4) NewImm = Imm; // reload imm
   }
 }
 
@@ -281,16 +280,15 @@ void decodeVSHUF64x2FamilyMask(unsigned
                                SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElementsInLane = 128 / ScalarSize;
   unsigned NumLanes = NumElts / NumElementsInLane;
-  unsigned ControlBitsMask = NumLanes - 1;
-  unsigned NumControlBits  = NumLanes / 2;
 
-  for (unsigned l = 0; l != NumLanes; ++l) {
-    unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+  for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
+    unsigned Index = (Imm % NumLanes) * NumElementsInLane;
+    Imm /= NumLanes; // Discard the bits we just used.
     // We actually need the other source.
-    if (l >= NumLanes / 2)
-      LaneMask += NumLanes;
+    if (l >= (NumElts / 2))
+      Index += NumElts;
     for (unsigned i = 0; i != NumElementsInLane; ++i)
-      ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+      ShuffleMask.push_back(Index + i);
   }
 }
 




More information about the llvm-commits mailing list