[llvm] r288898 - [X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 7 03:19:01 PST 2016


Author: rksimon
Date: Wed Dec  7 05:19:00 2016
New Revision: 288898

URL: http://llvm.org/viewvc/llvm-project?rev=288898&view=rev
Log:
[X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296)

The non-constant pool version of DecodeVPERMIL2PMask was not offsetting correctly for the second input. I've updated the code to match the implementation in the constant-pool version.

Annoyingly this bug was hidden for so long as it's tricky to combine to useful variable shuffle masks that don't become constant-pool entries.

Modified:
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=288898&r1=288897&r2=288898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Wed Dec  7 05:19:00 2016
@@ -548,10 +548,11 @@ void DecodeVPERMIL2PMask(MVT VT, unsigne
   unsigned VecSize = VT.getSizeInBits();
   unsigned EltSize = VT.getScalarSizeInBits();
   unsigned NumLanes = VecSize / 128;
-  unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
-  assert((VecSize == 128 || VecSize == 256) &&
-         "Unexpected vector size");
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumEltsPerLane = NumElts / NumLanes;
+  assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
   assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
+  assert((NumElts == RawMask.size()) && "Unexpected mask size");
 
   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
     // VPERMIL2 Operation.
@@ -572,14 +573,15 @@ void DecodeVPERMIL2PMask(MVT VT, unsigne
       continue;
     }
 
-    unsigned Index = i & ~(NumEltsPerLane - 1);
+    int Index = i & ~(NumEltsPerLane - 1);
     if (EltSize == 64)
       Index += (Selector >> 1) & 0x1;
     else
       Index += Selector & 0x3;
 
-    unsigned SrcOffset = (Selector >> 2) & 1;
-    ShuffleMask.push_back((int)(SrcOffset + Index));
+    int Src = (Selector >> 2) & 0x1;
+    Index += Src * NumElts;
+    ShuffleMask.push_back(Index);
   }
 }
 

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=288898&r1=288897&r2=288898&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Wed Dec  7 05:19:00 2016
@@ -345,12 +345,18 @@ define <16 x i8> @constant_fold_vpperm()
 define <4 x float> @PR31296(i8* %in) {
 ; X32-LABEL: PR31296:
 ; X32:       # BB#0: # %entry
-; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00]
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT:    vmovaps {{.*#+}} xmm1 = <0,1,u,u>
+; X32-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: PR31296:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00]
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    vmovq %rax, %xmm0
+; X64-NEXT:    vmovaps {{.*#+}} xmm1 = <0,1,u,u>
+; X64-NEXT:    vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
 ; X64-NEXT:    retq
 entry:
   %0 = getelementptr i8, i8* %in, i32 0




More information about the llvm-commits mailing list