[llvm] ad16c6e - [X86][AVX] Ensure we retain zero elements in select(pshufb,pshufb) -> or(pshufb,pshufb) fold (PR52122)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 11 06:50:42 PDT 2021


Author: Simon Pilgrim
Date: 2021-10-11T14:50:24+01:00
New Revision: ad16c6e52fb74c2e784530f14c19e72d93c9ad62

URL: https://github.com/llvm/llvm-project/commit/ad16c6e52fb74c2e784530f14c19e72d93c9ad62
DIFF: https://github.com/llvm/llvm-project/commit/ad16c6e52fb74c2e784530f14c19e72d93c9ad62.diff

LOG: [X86][AVX] Ensure we retain zero elements in select(pshufb,pshufb) -> or(pshufb,pshufb) fold (PR52122)

The select(pshufb,pshufb) -> or(pshufb,pshufb) fold uses getConstVector to create the refreshed pshufb masks, which treats all negative indices as undef.

PR52122 shows that if we were selecting an element that the PSHUFB has set to zero we must set it back to 0x80 when we recreate the PSHUFB mask and not just leave it as SM_SentinelZero

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index deb8e2fb49cf3..2b34f9a07f6bf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42852,10 +42852,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
         getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) {
       int NumElts = VT.getVectorNumElements();
       for (int i = 0; i != NumElts; ++i) {
-        if (CondMask[i] < NumElts)
+        // getConstVector sets negative shuffle mask values as undef, so ensure
+        // we hardcode SM_SentinelZero values to zero (0x80).
+        if (CondMask[i] < NumElts) {
+          LHSMask[i] = (LHSMask[i] == SM_SentinelZero) ? 0x80 : LHSMask[i];
           RHSMask[i] = 0x80;
-        else
+        } else {
           LHSMask[i] = 0x80;
+          RHSMask[i] = (RHSMask[i] == SM_SentinelZero) ? 0x80 : RHSMask[i];
+        }
       }
       LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),
                         getConstVector(LHSMask, SimpleVT, DAG, DL, true));

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index c01b83cc3064c..1cccf4e827ffe 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -847,7 +847,7 @@ define <32 x i8> @PR52122(<32 x i8> %0, <32 x i8> %1) {
 ; CHECK-LABEL: PR52122:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[4],zero,zero,zero,ymm1[5],zero,zero,zero,ymm1[6],zero,zero,zero,ymm1[7],zero,zero,zero,ymm1[20],zero,zero,zero,ymm1[21],zero,zero,zero,ymm1[22],zero,zero,zero,ymm1[23],zero
-; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,4],zero,ymm0[u,u,5],zero,ymm0[u,3,6],zero,ymm0[u,u,7],zero,ymm0[u,18,20],zero,ymm0[u,u,21],zero,ymm0[u,19,22],zero,ymm0[u,u,23],zero,ymm0[u]
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,4],zero,zero,zero,ymm0[5],zero,zero,ymm0[3,6],zero,zero,zero,ymm0[7],zero,zero,ymm0[18,20],zero,zero,zero,ymm0[21],zero,zero,ymm0[19,22],zero,zero,zero,ymm0[23],zero,zero
 ; CHECK-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %3 = shufflevector <32 x i8> %0, <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>


        


More information about the llvm-commits mailing list