[llvm] b0f20f2 - [X86] combineVectorCompare - add constant folding support for PCMPEQ/PCMPGT instructions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 28 06:55:18 PDT 2024


Author: Simon Pilgrim
Date: 2024-06-28T14:54:47+01:00
New Revision: b0f20f214ab43c800130e0d249e8ee2459b906ea

URL: https://github.com/llvm/llvm-project/commit/b0f20f214ab43c800130e0d249e8ee2459b906ea
DIFF: https://github.com/llvm/llvm-project/commit/b0f20f214ab43c800130e0d249e8ee2459b906ea.diff

LOG: [X86] combineVectorCompare - add constant folding support for PCMPEQ/PCMPGT instructions

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/pr81136.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 348c2b56e6e3c..0870118b23008 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55692,14 +55692,43 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
 
 static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
                                     const X86Subtarget &Subtarget) {
+  unsigned Opcode = N->getOpcode();
+  assert((Opcode == X86ISD::PCMPEQ || Opcode == X86ISD::PCMPGT) &&
+         "Unknown PCMP opcode");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
   MVT VT = N->getSimpleValueType(0);
+  unsigned EltBits = VT.getScalarSizeInBits();
+  unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
 
-  if (N->getOperand(0) == N->getOperand(1)) {
-    if (N->getOpcode() == X86ISD::PCMPEQ)
-      return DAG.getConstant(-1, DL, VT);
-    if (N->getOpcode() == X86ISD::PCMPGT)
-      return DAG.getConstant(0, DL, VT);
+  if (LHS == RHS)
+    return (Opcode == X86ISD::PCMPEQ) ? DAG.getAllOnesConstant(DL, VT)
+                                      : DAG.getConstant(0, DL, VT);
+
+  // Constant Folding.
+  // PCMPEQ(X,UNDEF) -> UNDEF
+  // PCMPGT(X,UNDEF) -> 0
+  // PCMPGT(UNDEF,X) -> 0
+  APInt LHSUndefs, RHSUndefs;
+  SmallVector<APInt> LHSBits, RHSBits;
+  if (getTargetConstantBitsFromNode(LHS, EltBits, LHSUndefs, LHSBits) &&
+      getTargetConstantBitsFromNode(RHS, EltBits, RHSUndefs, RHSBits)) {
+    APInt Ones = APInt::getAllOnes(EltBits);
+    APInt Zero = APInt::getZero(EltBits);
+    SmallVector<APInt> Results(NumElts);
+    for (unsigned I = 0; I != NumElts; ++I) {
+      if (Opcode == X86ISD::PCMPEQ) {
+        Results[I] = (LHSBits[I] == RHSBits[I]) ? Ones : Zero;
+      } else {
+        bool AnyUndef = LHSUndefs[I] || RHSUndefs[I];
+        Results[I] = (!AnyUndef && LHSBits[I].sgt(RHSBits[I])) ? Ones : Zero;
+      }
+    }
+    if (Opcode == X86ISD::PCMPEQ)
+      return getConstVector(Results, LHSUndefs | RHSUndefs, VT, DAG, DL);
+    return getConstVector(Results, VT, DAG, DL);
   }
 
   return SDValue();

diff  --git a/llvm/test/CodeGen/X86/pr81136.ll b/llvm/test/CodeGen/X86/pr81136.ll
index b4ac3fc783e0a..a627050a54d13 100644
--- a/llvm/test/CodeGen/X86/pr81136.ll
+++ b/llvm/test/CodeGen/X86/pr81136.ll
@@ -8,8 +8,6 @@ define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
 ; CHECK-NEXT:    vmovd %esi, %xmm1
 ; CHECK-NEXT:    vmovdqa (%rdx), %ymm2
 ; CHECK-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm4 = [128,1]
-; CHECK-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm4
 ; CHECK-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
@@ -17,10 +15,10 @@ define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
 ; CHECK-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
 ; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; CHECK-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
 ; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; CHECK-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
 ; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; CHECK-NEXT:    vandnpd %ymm0, %ymm1, %ymm0
 ; CHECK-NEXT:    vmovmskpd %ymm0, %eax


        


More information about the llvm-commits mailing list