[llvm] 5c8188c - [DAGCombine] Use `IsKnownNeverZero` to see if we need zero-check in is_pow2 setcc patern

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 12 11:52:55 PDT 2023


Author: Noah Goldstein
Date: 2023-06-12T13:52:43-05:00
New Revision: 5c8188c7bc524c3e4ea22762645fcbe43042aee9

URL: https://github.com/llvm/llvm-project/commit/5c8188c7bc524c3e4ea22762645fcbe43042aee9
DIFF: https://github.com/llvm/llvm-project/commit/5c8188c7bc524c3e4ea22762645fcbe43042aee9.diff

LOG: [DAGCombine] Use `IsKnownNeverZero` to see if we need zero-check in is_pow2 setcc patern

`ctpop(X) eq/ne 1` is checking if X is a non-zero power of 2. Power of
2 check including zero is `(X & (X-1)) eq/ne 0` and unfortunately
there is no good pattern for checking a power of 2 while excluding
zero. So, when lowering `ctpop(X) eq/ne 1`, explicitly check
`IsKnownNeverZero(X)` to maybe be able to optimize out the extra zero
check.

We need this explicitly as DAGCombiner does not re-analyze provable
setcc nodes, and the middle-end never finds it beneficially to broaden
`ctpop(X) eq/ne 1` -> `ctpop(X) ule/ugt 1` (power of 2 including
zero).

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D152675

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/X86/ispow2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 32be369d08cf7..6da772d43f893 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4085,8 +4085,12 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
     ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
     SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
     SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
-    SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
     SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+    // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
+    // check before the emit a potentially unnecessary op.
+    if (DAG.isKnownNeverZero(CTOp))
+      return RHS;
+    SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
     unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
     return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
   }

diff  --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 9fb2f9b4e1b67..45d04c27ab5f1 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -12,19 +12,14 @@ define i1 @is_pow2_non_zero(i32 %xin) {
 ; CHECK-NOBMI-NEXT:    orl $256, %edi # imm = 0x100
 ; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
 ; CHECK-NOBMI-NEXT:    testl %eax, %edi
-; CHECK-NOBMI-NEXT:    sete %cl
-; CHECK-NOBMI-NEXT:    testl %edi, %edi
-; CHECK-NOBMI-NEXT:    setne %al
-; CHECK-NOBMI-NEXT:    andb %cl, %al
+; CHECK-NOBMI-NEXT:    sete %al
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI2-LABEL: is_pow2_non_zero:
 ; CHECK-BMI2:       # %bb.0:
 ; CHECK-BMI2-NEXT:    orl $256, %edi # imm = 0x100
-; CHECK-BMI2-NEXT:    setne %cl
 ; CHECK-BMI2-NEXT:    blsrl %edi, %eax
 ; CHECK-BMI2-NEXT:    sete %al
-; CHECK-BMI2-NEXT:    andb %cl, %al
 ; CHECK-BMI2-NEXT:    retq
   %x = or i32 %xin, 256
   %cnt = call i32 @llvm.ctpop.i32(i32 %x)
@@ -64,19 +59,14 @@ define i1 @neither_pow2_non_zero(i32 %xin) {
 ; CHECK-NOBMI-NEXT:    orl $256, %edi # imm = 0x100
 ; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
 ; CHECK-NOBMI-NEXT:    testl %eax, %edi
-; CHECK-NOBMI-NEXT:    setne %cl
-; CHECK-NOBMI-NEXT:    testl %edi, %edi
-; CHECK-NOBMI-NEXT:    sete %al
-; CHECK-NOBMI-NEXT:    orb %cl, %al
+; CHECK-NOBMI-NEXT:    setne %al
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-BMI2-LABEL: neither_pow2_non_zero:
 ; CHECK-BMI2:       # %bb.0:
 ; CHECK-BMI2-NEXT:    orl $256, %edi # imm = 0x100
-; CHECK-BMI2-NEXT:    sete %cl
 ; CHECK-BMI2-NEXT:    blsrl %edi, %eax
 ; CHECK-BMI2-NEXT:    setne %al
-; CHECK-BMI2-NEXT:    orb %cl, %al
 ; CHECK-BMI2-NEXT:    retq
   %x = or i32 %xin, 256
   %cnt = call i32 @llvm.ctpop.i32(i32 %x)
@@ -94,24 +84,16 @@ define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
 ; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm3
 ; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm3
 ; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm3
-; CHECK-NOBMI-NEXT:    pxor %xmm4, %xmm4
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm3
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
-; CHECK-NOBMI-NEXT:    pand %xmm3, %xmm5
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
-; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm3
-; CHECK-NOBMI-NEXT:    pandn %xmm5, %xmm3
+; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm3
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2]
+; CHECK-NOBMI-NEXT:    pand %xmm3, %xmm4
 ; CHECK-NOBMI-NEXT:    paddq %xmm0, %xmm2
-; CHECK-NOBMI-NEXT:    pand %xmm0, %xmm2
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm2
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
-; CHECK-NOBMI-NEXT:    pand %xmm2, %xmm1
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm0
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
 ; CHECK-NOBMI-NEXT:    pand %xmm2, %xmm0
-; CHECK-NOBMI-NEXT:    pandn %xmm1, %xmm0
-; CHECK-NOBMI-NEXT:    packssdw %xmm3, %xmm0
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    packssdw %xmm4, %xmm0
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: is_pow2_non_zero_4xv64:
@@ -153,27 +135,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
 ; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm3
 ; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm3
 ; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm3
-; CHECK-NOBMI-NEXT:    pxor %xmm4, %xmm4
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm3
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
-; CHECK-NOBMI-NEXT:    pand %xmm3, %xmm5
-; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm5
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
-; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm3
-; CHECK-NOBMI-NEXT:    por %xmm5, %xmm3
-; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm1
-; CHECK-NOBMI-NEXT:    pand %xmm0, %xmm1
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm1
-; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,0,3,2]
-; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm5
-; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm5
-; CHECK-NOBMI-NEXT:    pcmpeqd %xmm4, %xmm0
+; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm3
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2]
+; CHECK-NOBMI-NEXT:    pand %xmm3, %xmm4
+; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm4
+; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm3
+; CHECK-NOBMI-NEXT:    paddq %xmm2, %xmm3
+; CHECK-NOBMI-NEXT:    pand %xmm3, %xmm0
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm0
 ; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
 ; CHECK-NOBMI-NEXT:    pand %xmm1, %xmm0
-; CHECK-NOBMI-NEXT:    por %xmm5, %xmm0
-; CHECK-NOBMI-NEXT:    packssdw %xmm3, %xmm0
+; CHECK-NOBMI-NEXT:    pxor %xmm2, %xmm0
+; CHECK-NOBMI-NEXT:    packssdw %xmm4, %xmm0
 ; CHECK-NOBMI-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64:


        


More information about the llvm-commits mailing list