[llvm] 6fd0ae3 - [X86] combineScalarAndWithMaskSetcc - handle (concat_vectors (and (vYi1 setcc, vYi1 x), undef)) patterns

Fri Nov 25 03:29:08 PST 2022

Author: Simon Pilgrim
Date: 2022-11-25T11:16:24Z
New Revision: 6fd0ae39be79bdb686e4cc966e4cdee9950e2645

URL: https://github.com/llvm/llvm-project/commit/6fd0ae39be79bdb686e4cc966e4cdee9950e2645
DIFF: https://github.com/llvm/llvm-project/commit/6fd0ae39be79bdb686e4cc966e4cdee9950e2645.diff

LOG: [X86] combineScalarAndWithMaskSetcc - handle (concat_vectors (and (vYi1 setcc, vYi1 x), undef)) patterns

If one of the AND operands is a setcc then we're implicitly zeroing the upper mask bits

Similar pattern to regressions identified in D127115 (masked comparisons)

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 53abc3d97ef1f..f293058abec0f 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48647,18 +48647,27 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
   SDValue SubVec = Src.getOperand(0);
   EVT SubVecVT = SubVec.getValueType();
 
-  // First subvector should be a setcc with a legal result type. The RHS of the
-  // AND should be a mask with this many bits.
-  if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) ||
+  // The RHS of the AND should be a mask with as many bits as SubVec.
+  if (!TLI.isTypeLegal(SubVecVT) ||
       !C1->getAPIntValue().isMask(SubVecVT.getVectorNumElements()))
     return SDValue();
 
-  EVT SetccVT = SubVec.getOperand(0).getValueType();
-  if (!TLI.isTypeLegal(SetccVT) ||
-      !(Subtarget.hasVLX() || SetccVT.is512BitVector()))
-    return SDValue();
-
-  if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32))
+  // First subvector should be a setcc with a legal result type or a
+  // AND containing at least one setcc with a legal result type.
+  auto IsLegalSetCC = [&](SDValue V) {
+    if (V.getOpcode() != ISD::SETCC)
+      return false;
+    EVT SetccVT = V.getOperand(0).getValueType();
+    if (!TLI.isTypeLegal(SetccVT) ||
+        !(Subtarget.hasVLX() || SetccVT.is512BitVector()))
+      return false;
+    if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32))
+      return false;
+    return true;
+  };
+  if (!(IsLegalSetCC(SubVec) || (SubVec.getOpcode() == ISD::AND &&
+                                 (IsLegalSetCC(SubVec.getOperand(0)) ||
+                                  IsLegalSetCC(SubVec.getOperand(1))))))
     return SDValue();
 
   // We passed all the checks. Rebuild the concat_vectors with zeroes

diff  --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
index 5fa144c18022a..42ea007e07b97 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
@@ -1615,8 +1615,7 @@ define i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_i32(i32 %__u, <2 x i64> %__a, <
 ; VLX:       # %bb.0: # %entry
 ; VLX-NEXT:    kmovd %edi, %k1
 ; VLX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
-; VLX-NEXT:    kmovd %k0, %eax
-; VLX-NEXT:    andl $15, %eax
+; VLX-NEXT:    kmovb %k0, %eax
 ; VLX-NEXT:    retq
 ;
 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32: