[llvm] r284451 - [AVX-512] Fix DecodeVPERMV3Mask to handle cases where the constant pool entry has a different type than the shuffle itself.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 17 21:00:32 PDT 2016


Author: ctopper
Date: Mon Oct 17 23:00:32 2016
New Revision: 284451

URL: http://llvm.org/viewvc/llvm-project?rev=284451&view=rev
Log:
[AVX-512] Fix DecodeVPERMV3Mask to handle cases where the constant pool entry has a different type than the shuffle itself.

Summary: This is especially important for 32-bit targets with 64-bit shuffle elements.This is similar to how PSHUFB and VPERMIL handle the same problem.

Reviewers: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D25666

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
    llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=284451&r1=284450&r2=284451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Oct 17 23:00:32 2016
@@ -5102,8 +5102,9 @@ static bool getTargetShuffleMask(SDNode
     Ops.push_back(N->getOperand(0));
     Ops.push_back(N->getOperand(2));
     SDValue MaskNode = N->getOperand(1);
+    unsigned MaskEltSize = VT.getScalarSizeInBits();
     if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMV3Mask(C, VT, Mask);
+      DecodeVPERMV3Mask(C, MaskEltSize, Mask);
       break;
     }
     return false;
@@ -5114,8 +5115,9 @@ static bool getTargetShuffleMask(SDNode
     Ops.push_back(N->getOperand(1));
     Ops.push_back(N->getOperand(2));
     SDValue MaskNode = N->getOperand(0);
+    unsigned MaskEltSize = VT.getScalarSizeInBits();
     if (auto *C = getTargetConstantFromNode(MaskNode)) {
-      DecodeVPERMV3Mask(C, VT, Mask);
+      DecodeVPERMV3Mask(C, MaskEltSize, Mask);
       break;
     }
     return false;

Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp?rev=284451&r1=284450&r2=284451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp Mon Oct 17 23:00:32 2016
@@ -309,26 +309,31 @@ void DecodeVPERMVMask(const Constant *C,
     ShuffleMask.push_back(Element);
 }
 
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
                        SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
-  unsigned NumElements = MaskTy->getVectorNumElements();
-  if (NumElements == VT.getVectorNumElements()) {
-    unsigned EltMaskSize = Log2_64(NumElements * 2);
-    for (unsigned i = 0; i < NumElements; ++i) {
-      Constant *COp = C->getAggregateElement(i);
-      if (!COp) {
-        ShuffleMask.clear();
-        return;
-      }
-      if (isa<UndefValue>(COp))
-        ShuffleMask.push_back(SM_SentinelUndef);
-      else {
-        APInt Element = cast<ConstantInt>(COp)->getValue();
-        Element = Element.getLoBits(EltMaskSize);
-        ShuffleMask.push_back(Element.getZExtValue());
-      }
+  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+  (void)MaskTySize;
+  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+         "Unexpected vector size.");
+  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
+         "Unexpected vector element size.");
+
+  // The shuffle mask requires elements the same size as the target.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 8> RawMask;
+  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
+    return;
+
+  unsigned NumElts = RawMask.size();
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
     }
+    int Index = RawMask[i] & (NumElts*2 - 1);
+    ShuffleMask.push_back(Index);
   }
 }
 } // llvm namespace

Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h?rev=284451&r1=284450&r2=284451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h Mon Oct 17 23:00:32 2016
@@ -44,7 +44,7 @@ void DecodeVPERMVMask(const Constant *C,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
                        SmallVectorImpl<int> &ShuffleMask);
 
 } // llvm namespace

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=284451&r1=284450&r2=284451&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Mon Oct 17 23:00:32 2016
@@ -112,10 +112,6 @@ define <8 x i64> @combine_permvar_8i64_i
 define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
 ; X32-LABEL: combine_vpermt2var_8f64_identity:
 ; X32:       # BB#0:
-; X32-NEXT:    vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
-; X32-NEXT:    vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermt2var_8f64_identity:
@@ -152,8 +148,7 @@ define <8 x double> @combine_vpermt2var_
 define <8 x double> @combine_vpermt2var_8f64_movddup(<8 x double> %x0, <8 x double> %x1) {
 ; X32-LABEL: combine_vpermt2var_8f64_movddup:
 ; X32:       # BB#0:
-; X32-NEXT:    vmovapd {{.*#+}} zmm2 = <0,0,0,0,2,0,2,0,4,0,4,0,u,u,u,u>
-; X32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; X32-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermt2var_8f64_movddup:
@@ -167,10 +162,7 @@ define <8 x double> @combine_vpermt2var_
 ; X32-LABEL: combine_vpermt2var_8f64_movddup_load:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovapd (%eax), %zmm1
-; X32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
-; X32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
-; X32-NEXT:    vmovapd %zmm1, %zmm0
+; X32-NEXT:    vmovddup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermt2var_8f64_movddup_load:
@@ -186,8 +178,7 @@ define <8 x double> @combine_vpermt2var_
 ; X32:       # BB#0:
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kmovd %eax, %k1
-; X32-NEXT:    vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
-; X32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0 {%k1} {z}
+; X32-NEXT:    vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermt2var_8f64_movddup_mask:
@@ -868,10 +859,6 @@ define <32 x i16> @combine_pshufb_as_psh
 define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
 ; X32-LABEL: combine_vpermi2var_8f64_identity:
 ; X32:       # BB#0:
-; X32-NEXT:    vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
-; X32-NEXT:    vpermi2pd %zmm1, %zmm0, %zmm2
-; X32-NEXT:    vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X32-NEXT:    vpermi2pd %zmm2, %zmm2, %zmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermi2var_8f64_identity:




More information about the llvm-commits mailing list