[llvm] [CodeGenPrepare] Convert `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` (PR #111284)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 7 02:53:36 PDT 2024


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/111284

>From 5f341b0f189438ba883e645514c367f495ec598c Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 6 Oct 2024 16:41:24 +0800
Subject: [PATCH 1/4] [CodeGenPrepare] Add pre-commit tests. NFC.

---
 llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 72 +++++++++++++++++--
 llvm/test/CodeGen/RISCV/rv32zbb.ll        | 41 +++++++++++
 llvm/test/CodeGen/RISCV/rv64zbb.ll        | 84 +++++++++++++++++++++++
 llvm/test/CodeGen/X86/ispow2.ll           | 31 +++++++++
 4 files changed, 221 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index f5ce73a366125b..e1d9f40a4ae349 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK-NONEON-LABEL: cnt32_advsimd:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-NONEON-LABEL: cnt32_advsimd_2:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK-NONEON-LABEL: cnt64_advsimd:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr x9, x0, #1
-; CHECK-NONEON-NEXT:    mov x8, #72340172838076673
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NONEON-NEXT:    sub x9, x0, x9
 ; CHECK-NONEON-NEXT:    lsr x10, x9, #2
@@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-LABEL: cnt32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr w9, w0, #1
-; CHECK-NEXT:    mov w8, #16843009
+; CHECK-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NEXT:    sub w9, w0, w9
 ; CHECK-NEXT:    lsr w10, w9, #2
@@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
 ; CHECK-NONEON-LABEL: cnt32:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr w9, w0, #1
-; CHECK-NONEON-NEXT:    mov w8, #16843009
+; CHECK-NONEON-NEXT:    mov w8, #16843009 // =0x1010101
 ; CHECK-NONEON-NEXT:    and w9, w9, #0x55555555
 ; CHECK-NONEON-NEXT:    sub w9, w0, w9
 ; CHECK-NONEON-NEXT:    lsr w10, w9, #2
@@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-LABEL: cnt64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr x9, x0, #1
-; CHECK-NEXT:    mov x8, #72340172838076673
+; CHECK-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NEXT:    sub x9, x0, x9
 ; CHECK-NEXT:    lsr x10, x9, #2
@@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
 ; CHECK-NONEON-LABEL: cnt64:
 ; CHECK-NONEON:       // %bb.0:
 ; CHECK-NONEON-NEXT:    lsr x9, x0, #1
-; CHECK-NONEON-NEXT:    mov x8, #72340172838076673
+; CHECK-NONEON-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; CHECK-NONEON-NEXT:    and x9, x9, #0x5555555555555555
 ; CHECK-NONEON-NEXT:    sub x9, x0, x9
 ; CHECK-NONEON-NEXT:    lsr x10, x9, #2
@@ -278,5 +278,63 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
   ret i1 %cmp
 }
 
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_eq_one_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #1
+; CHECK-NEXT:    eor w9, w0, w8
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NONEON:       // %bb.0: // %entry
+; CHECK-NONEON-NEXT:    sub w8, w0, #1
+; CHECK-NONEON-NEXT:    eor w9, w0, w8
+; CHECK-NONEON-NEXT:    cmp w9, w8
+; CHECK-NONEON-NEXT:    cset w0, hi
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-CSSC:       // %bb.0: // %entry
+; CHECK-CSSC-NEXT:    cnt w8, w0
+; CHECK-CSSC-NEXT:    cmp w8, #1
+; CHECK-CSSC-NEXT:    cset w0, eq
+; CHECK-CSSC-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_ne_one_nonzero:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub w8, w0, #1
+; CHECK-NEXT:    eor w9, w0, w8
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    ret
+;
+; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NONEON:       // %bb.0: // %entry
+; CHECK-NONEON-NEXT:    sub w8, w0, #1
+; CHECK-NONEON-NEXT:    eor w9, w0, w8
+; CHECK-NONEON-NEXT:    cmp w9, w8
+; CHECK-NONEON-NEXT:    cset w0, ls
+; CHECK-NONEON-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-CSSC:       // %bb.0: // %entry
+; CHECK-CSSC-NEXT:    cnt w8, w0
+; CHECK-CSSC-NEXT:    cmp w8, #1
+; CHECK-CSSC-NEXT:    cset w0, ne
+; CHECK-CSSC-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
+
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index e24b1b41645cdf..edbe7d6518a337 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1441,3 +1441,44 @@ define i32 @srai_slli2(i16 signext %0) {
   %3 = sext i16 %sext to i32
   ret i32 %3
 }
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_eq_one_nonzero:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    addi a0, a0, -1
+; RV32ZBB-NEXT:    seqz a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV32I-LABEL: ctpop32_ne_one_nonzero:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    addi a0, a0, -1
+; RV32ZBB-NEXT:    snez a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 43a499806ab5ae..4364db625745cb 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1618,3 +1618,87 @@ entry:
   %5 = add nsw i32 %4, %0
   ret i32 %5
 }
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addiw a1, a0, -1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; RV64I-LABEL: ctpop32_ne_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addiw a1, a0, -1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    xori a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    snez a0, a0
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop64_eq_one_nonzero(i64 %x) {
+; RV64I-LABEL: ctpop64_eq_one_nonzero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp eq i64 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_eq_one_maybezero(i32 %x) {
+; RV64I-LABEL: ctpop32_eq_one_maybezero:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addiw a1, a0, -1
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    addi a0, a0, -1
+; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    ret
+entry:
+  %popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 8723432de8b6b0..de7828640da7a4 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -220,3 +220,34 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
   %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
   ret <4 x i1> %r
 }
+
+
+define i1 @ctpop32_eq_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_eq_one_nonzero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -1(%rdi), %eax
+; CHECK-NEXT:    xorl %eax, %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    retq
+entry:
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 1
+  ret i1 %cmp
+}
+
+define i1 @ctpop32_ne_one_nonzero(i32 %x) {
+; CHECK-LABEL: ctpop32_ne_one_nonzero:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -1(%rdi), %eax
+; CHECK-NEXT:    xorl %eax, %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    retq
+entry:
+  %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp ne i32 %popcnt, 1
+  ret i1 %cmp
+}

>From a1529c696fc22b9343039fdd4cb28f7a421197e7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 6 Oct 2024 16:49:25 +0800
Subject: [PATCH 2/4] [CodeGenPrepare] Convert `ctpop(X) ==/!= 1 -> ctpop(X)
 u</u> 2/1`

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp       | 29 ++++++++++++++++
 llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 28 +++++++--------
 llvm/test/CodeGen/RISCV/rv32zbb.ll        | 16 ++++-----
 llvm/test/CodeGen/RISCV/rv64zbb.ll        | 27 +++++++--------
 llvm/test/CodeGen/X86/ispow2.ll           | 42 ++++++++++++++---------
 5 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 631cc26d6022fe..7953c0d09f2a86 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
   return false;
 }
 
+/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
+/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
+/// result cannot be zero.
+static bool adjustIsPower2Test(CmpInst *Cmp) {
+  ICmpInst::Predicate Pred;
+  if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
+    return false;
+  if (!ICmpInst::isEquality(Pred))
+    return false;
+  auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
+  if (auto Range = II->getRange()) {
+    Type *Ty = II->getType();
+    unsigned BitWidth = Ty->getScalarSizeInBits();
+    if (Range->contains(APInt::getZero(BitWidth)))
+      return false;
+
+    if (Pred == ICmpInst::ICMP_EQ) {
+      Cmp->setPredicate(ICmpInst::ICMP_ULT);
+      Cmp->setOperand(1, ConstantInt::get(Ty, 2));
+    } else
+      Cmp->setPredicate(ICmpInst::ICMP_UGT);
+    return true;
+  }
+  return false;
+}
+
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (sinkCmpExpression(Cmp, *TLI))
     return true;
@@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
     return true;
 
+  if (adjustIsPower2Test(Cmp))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index e1d9f40a4ae349..0030e9ce80abb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -282,23 +282,21 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
 ; CHECK-LABEL: ctpop32_eq_one_nonzero:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub w8, w0, #1
-; CHECK-NEXT:    eor w9, w0, w8
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    tst w0, w8
+; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
 ; CHECK-NONEON:       // %bb.0: // %entry
 ; CHECK-NONEON-NEXT:    sub w8, w0, #1
-; CHECK-NONEON-NEXT:    eor w9, w0, w8
-; CHECK-NONEON-NEXT:    cmp w9, w8
-; CHECK-NONEON-NEXT:    cset w0, hi
+; CHECK-NONEON-NEXT:    tst w0, w8
+; CHECK-NONEON-NEXT:    cset w0, eq
 ; CHECK-NONEON-NEXT:    ret
 ;
 ; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
 ; CHECK-CSSC:       // %bb.0: // %entry
-; CHECK-CSSC-NEXT:    cnt w8, w0
-; CHECK-CSSC-NEXT:    cmp w8, #1
+; CHECK-CSSC-NEXT:    sub w8, w0, #1
+; CHECK-CSSC-NEXT:    tst w0, w8
 ; CHECK-CSSC-NEXT:    cset w0, eq
 ; CHECK-CSSC-NEXT:    ret
 entry:
@@ -311,23 +309,21 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
 ; CHECK-LABEL: ctpop32_ne_one_nonzero:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub w8, w0, #1
-; CHECK-NEXT:    eor w9, w0, w8
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    tst w0, w8
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
 ; CHECK-NONEON:       // %bb.0: // %entry
 ; CHECK-NONEON-NEXT:    sub w8, w0, #1
-; CHECK-NONEON-NEXT:    eor w9, w0, w8
-; CHECK-NONEON-NEXT:    cmp w9, w8
-; CHECK-NONEON-NEXT:    cset w0, ls
+; CHECK-NONEON-NEXT:    tst w0, w8
+; CHECK-NONEON-NEXT:    cset w0, ne
 ; CHECK-NONEON-NEXT:    ret
 ;
 ; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
 ; CHECK-CSSC:       // %bb.0: // %entry
-; CHECK-CSSC-NEXT:    cnt w8, w0
-; CHECK-CSSC-NEXT:    cmp w8, #1
+; CHECK-CSSC-NEXT:    sub w8, w0, #1
+; CHECK-CSSC-NEXT:    tst w0, w8
 ; CHECK-CSSC-NEXT:    cset w0, ne
 ; CHECK-CSSC-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index edbe7d6518a337..4c52047b928f4d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1446,15 +1446,14 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
 ; RV32I-LABEL: ctpop32_eq_one_nonzero:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    addi a1, a0, -1
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    seqz a0, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
 ; RV32ZBB:       # %bb.0: # %entry
 ; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    addi a0, a0, -1
-; RV32ZBB-NEXT:    seqz a0, a0
+; RV32ZBB-NEXT:    sltiu a0, a0, 2
 ; RV32ZBB-NEXT:    ret
 entry:
   %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
@@ -1466,16 +1465,15 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
 ; RV32I-LABEL: ctpop32_ne_one_nonzero:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    addi a1, a0, -1
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    sltu a0, a1, a0
-; RV32I-NEXT:    xori a0, a0, 1
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
 ; RV32ZBB:       # %bb.0: # %entry
 ; RV32ZBB-NEXT:    cpop a0, a0
-; RV32ZBB-NEXT:    addi a0, a0, -1
-; RV32ZBB-NEXT:    snez a0, a0
+; RV32ZBB-NEXT:    sltiu a0, a0, 2
+; RV32ZBB-NEXT:    xori a0, a0, 1
 ; RV32ZBB-NEXT:    ret
 entry:
   %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 4364db625745cb..1e7814d588e4c0 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1622,17 +1622,16 @@ entry:
 define i1 @ctpop32_eq_one_nonzero(i32 %x) {
 ; RV64I-LABEL: ctpop32_eq_one_nonzero:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    addiw a1, a0, -1
-; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
 ; RV64ZBB:       # %bb.0: # %entry
 ; RV64ZBB-NEXT:    cpopw a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
 ; RV64ZBB-NEXT:    ret
 entry:
   %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
@@ -1643,18 +1642,17 @@ entry:
 define i1 @ctpop32_ne_one_nonzero(i32 %x) {
 ; RV64I-LABEL: ctpop32_ne_one_nonzero:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    addiw a1, a0, -1
-; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    sltu a0, a1, a0
-; RV64I-NEXT:    xori a0, a0, 1
+; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
 ; RV64ZBB:       # %bb.0: # %entry
 ; RV64ZBB-NEXT:    cpopw a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    snez a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
+; RV64ZBB-NEXT:    xori a0, a0, 1
 ; RV64ZBB-NEXT:    ret
 entry:
   %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
@@ -1666,15 +1664,14 @@ define i1 @ctpop64_eq_one_nonzero(i64 %x) {
 ; RV64I-LABEL: ctpop64_eq_one_nonzero:
 ; RV64I:       # %bb.0: # %entry
 ; RV64I-NEXT:    addi a1, a0, -1
-; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    sltu a0, a1, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
 ; RV64ZBB:       # %bb.0: # %entry
 ; RV64ZBB-NEXT:    cpop a0, a0
-; RV64ZBB-NEXT:    addi a0, a0, -1
-; RV64ZBB-NEXT:    seqz a0, a0
+; RV64ZBB-NEXT:    sltiu a0, a0, 2
 ; RV64ZBB-NEXT:    ret
 entry:
   %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index de7828640da7a4..96e33e1dafdc4a 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -223,14 +223,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
 
 
 define i1 @ctpop32_eq_one_nonzero(i32 %x) {
-; CHECK-LABEL: ctpop32_eq_one_nonzero:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal -1(%rdi), %eax
-; CHECK-NEXT:    xorl %eax, %edi
-; CHECK-NEXT:    cmpl %eax, %edi
-; CHECK-NEXT:    seta %al
-; CHECK-NEXT:    retq
+; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-NOBMI:       # %bb.0: # %entry
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT:    testl %eax, %edi
+; CHECK-NOBMI-NEXT:    sete %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    blsrl %edi, %eax
+; CHECK-BMI2-NEXT:    sete %al
+; CHECK-BMI2-NEXT:    retq
 entry:
   %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp eq i32 %popcnt, 1
@@ -238,14 +243,19 @@ entry:
 }
 
 define i1 @ctpop32_ne_one_nonzero(i32 %x) {
-; CHECK-LABEL: ctpop32_ne_one_nonzero:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal -1(%rdi), %eax
-; CHECK-NEXT:    xorl %eax, %edi
-; CHECK-NEXT:    cmpl %eax, %edi
-; CHECK-NEXT:    setbe %al
-; CHECK-NEXT:    retq
+; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-NOBMI:       # %bb.0: # %entry
+; CHECK-NOBMI-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NOBMI-NEXT:    leal -1(%rdi), %eax
+; CHECK-NOBMI-NEXT:    testl %eax, %edi
+; CHECK-NOBMI-NEXT:    setne %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
+; CHECK-BMI2:       # %bb.0: # %entry
+; CHECK-BMI2-NEXT:    blsrl %edi, %eax
+; CHECK-BMI2-NEXT:    setne %al
+; CHECK-BMI2-NEXT:    retq
 entry:
   %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %popcnt, 1

>From 6695777475a7daa87c4ac663484522736143956a Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 7 Oct 2024 10:31:11 +0800
Subject: [PATCH 3/4] [CodeGenPrepare] Use `isKnownNonZero`

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp       | 13 ++++---------
 llvm/test/CodeGen/X86/ispow2.ll           |  4 ++--
 llvm/test/CodeGen/X86/known-never-zero.ll | 12 ++++++------
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 7953c0d09f2a86..0bfc10ca8c63d9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2114,22 +2114,17 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
 /// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
 /// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
 /// result cannot be zero.
-static bool adjustIsPower2Test(CmpInst *Cmp) {
+static bool adjustIsPower2Test(CmpInst *Cmp, const DataLayout &DL) {
   ICmpInst::Predicate Pred;
   if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
     return false;
   if (!ICmpInst::isEquality(Pred))
     return false;
   auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
-  if (auto Range = II->getRange()) {
-    Type *Ty = II->getType();
-    unsigned BitWidth = Ty->getScalarSizeInBits();
-    if (Range->contains(APInt::getZero(BitWidth)))
-      return false;
-
+  if (isKnownNonZero(II, DL)) {
     if (Pred == ICmpInst::ICMP_EQ) {
       Cmp->setPredicate(ICmpInst::ICMP_ULT);
-      Cmp->setOperand(1, ConstantInt::get(Ty, 2));
+      Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
     } else
       Cmp->setPredicate(ICmpInst::ICMP_UGT);
     return true;
@@ -2156,7 +2151,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
     return true;
 
-  if (adjustIsPower2Test(Cmp))
+  if (adjustIsPower2Test(Cmp, *DL))
     return true;
 
   return false;
diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll
index 96e33e1dafdc4a..649d257b28d762 100644
--- a/llvm/test/CodeGen/X86/ispow2.ll
+++ b/llvm/test/CodeGen/X86/ispow2.ll
@@ -102,7 +102,7 @@ define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
 ; CHECK-AVX512:       # %bb.0:
 ; CHECK-AVX512-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
 ; CHECK-AVX512-NEXT:    vpopcntq %ymm0, %ymm0
-; CHECK-AVX512-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; CHECK-AVX512-NEXT:    vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
 ; CHECK-AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; CHECK-AVX512-NEXT:    vzeroupper
@@ -155,7 +155,7 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
 ; CHECK-AVX512:       # %bb.0:
 ; CHECK-AVX512-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
 ; CHECK-AVX512-NEXT:    vpopcntq %ymm0, %ymm0
-; CHECK-AVX512-NEXT:    vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; CHECK-AVX512-NEXT:    vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
 ; CHECK-AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; CHECK-AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; CHECK-AVX512-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index ac41a3fe6bb7e4..6c0aaeb451e14a 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -555,9 +555,9 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
 ; X86-NEXT:    por %xmm2, %xmm0
 ; X86-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X86-NEXT:    paddd %xmm0, %xmm1
-; X86-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-NEXT:    pxor %xmm1, %xmm0
-; X86-NEXT:    pcmpgtd %xmm1, %xmm0
+; X86-NEXT:    pand %xmm1, %xmm0
+; X86-NEXT:    pxor %xmm1, %xmm1
+; X86-NEXT:    pcmpeqd %xmm1, %xmm0
 ; X86-NEXT:    psrld $31, %xmm0
 ; X86-NEXT:    retl
 ;
@@ -566,10 +566,10 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
 ; X64-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
-; X64-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm1
+; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; X64-NEXT:    vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vpsrld $31, %xmm0, %xmm0
 ; X64-NEXT:    retq
   %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
   %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)

>From 15333292abbe68576c8be915cfea147094b7d7f4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 7 Oct 2024 17:51:52 +0800
Subject: [PATCH 4/4] [CodeGenPrepare] Check if it is profitable for the target

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0bfc10ca8c63d9..d070a466d16b20 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2114,19 +2114,30 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
 /// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
 /// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
 /// result cannot be zero.
-static bool adjustIsPower2Test(CmpInst *Cmp, const DataLayout &DL) {
+static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
+                               const TargetTransformInfo &TTI,
+                               const DataLayout &DL) {
   ICmpInst::Predicate Pred;
   if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
     return false;
   if (!ICmpInst::isEquality(Pred))
     return false;
   auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
+
+  // Check if it is profitable for the target
+  ICmpInst::Predicate NewPred =
+      Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGT;
+  if (TLI.isCtpopFast(TLI.getValueType(DL, II->getType())) &&
+      TTI.getCmpSelInstrCost(Instruction::ICmp, II->getType(), Cmp->getType(),
+                             Pred) <
+          TTI.getCmpSelInstrCost(Instruction::ICmp, II->getType(),
+                                 Cmp->getType(), NewPred))
+    return false;
+
   if (isKnownNonZero(II, DL)) {
-    if (Pred == ICmpInst::ICMP_EQ) {
-      Cmp->setPredicate(ICmpInst::ICMP_ULT);
+    if (Pred == ICmpInst::ICMP_EQ)
       Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
-    } else
-      Cmp->setPredicate(ICmpInst::ICMP_UGT);
+    Cmp->setPredicate(NewPred);
     return true;
   }
   return false;
@@ -2151,7 +2162,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
     return true;
 
-  if (adjustIsPower2Test(Cmp, *DL))
+  if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
     return true;
 
   return false;



More information about the llvm-commits mailing list