[llvm] [SelectionDAG] Widen cttz to cttz_zero_undef (PR #92514)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 17 02:43:10 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
Instead of widening e.g. i8 cttz(x) to i16 cttz(x | 0x100), use the more
optimizable form cttz_zero_undef(x | 0x100) since the widened operand is
definitely not zero.
---
Full diff: https://github.com/llvm/llvm-project/pull/92514.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+7-5)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+9-6)
- (modified) llvm/test/CodeGen/AMDGPU/cttz.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll (+8-22)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fd97a1283b65a..0543c211c4971 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5032,7 +5032,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- case ISD::CTPOP:
+ case ISD::CTPOP: {
// Zero extend the argument unless its cttz, then use any_extend.
if (Node->getOpcode() == ISD::CTTZ ||
Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
@@ -5040,7 +5040,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
else
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- if (Node->getOpcode() == ISD::CTTZ) {
+ unsigned NewOpc = Node->getOpcode();
+ if (NewOpc == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
@@ -5048,12 +5049,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
OVT.getSizeInBits());
Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
DAG.getConstant(TopBit, dl, NVT));
+ NewOpc = ISD::CTTZ_ZERO_UNDEF;
}
// Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
// already the correct result.
- Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- if (Node->getOpcode() == ISD::CTLZ ||
- Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ Tmp1 = DAG.getNode(NewOpc, dl, NVT, Tmp1);
+ if (NewOpc == ISD::CTLZ || NewOpc == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
@@ -5061,6 +5062,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
break;
+ }
case ISD::BITREVERSE:
case ISD::BSWAP: {
unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0aa36deda79dc..98f64947bcabc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -709,23 +709,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
}
}
- if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) {
+ unsigned NewOpc = N->getOpcode();
+ if (NewOpc == ISD::CTTZ || NewOpc == ISD::VP_CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
OVT.getScalarSizeInBits());
- if (N->getOpcode() == ISD::CTTZ)
+ if (NewOpc == ISD::CTTZ) {
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
- else
+ NewOpc = ISD::CTTZ_ZERO_UNDEF;
+ } else {
Op =
DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT),
N->getOperand(1), N->getOperand(2));
+ NewOpc = ISD::VP_CTTZ_ZERO_UNDEF;
+ }
}
if (!N->isVPOpcode())
- return DAG.getNode(N->getOpcode(), dl, NVT, Op);
- return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1),
- N->getOperand(2));
+ return DAG.getNode(NewOpc, dl, NVT, Op);
+ return DAG.getNode(NewOpc, dl, NVT, Op, N->getOperand(1), N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_VP_CttzElements(SDNode *N) {
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 118d6c123046b..ee2894a66fbfc 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -1408,7 +1408,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0
; VI-NEXT: v_ffbl_b32_e32 v2, v2
-; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
@@ -1451,7 +1450,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-NEXT: v_or_b32_e32 v2, 0x10000, v1
; GFX10-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v1
; GFX10-NEXT: v_ffbl_b32_e32 v2, v2
-; GFX10-NEXT: v_min_u32_e32 v2, 32, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
; GFX10-NEXT: global_store_short v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 71f1cd54d705c..392a44318b0a5 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -1561,7 +1561,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; VI-NEXT: v_or_b32_e32 v0, v2, v0
; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0
; VI-NEXT: v_ffbl_b32_e32 v2, v2
-; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index ef8a6c704a44b..4a001662ce2ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -4145,29 +4145,15 @@ define <vscale x 1 x i9> @vp_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1
; CHECK-NEXT: li a1, 512
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vor.vx v8, v8, a1, v0.t
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: vsub.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vnot.v v8, v8, v0.t
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
-; CHECK-NEXT: lui a0, 5
-; CHECK-NEXT: addi a0, a0, 1365
-; CHECK-NEXT: vand.vx v9, v9, a0, v0.t
-; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
-; CHECK-NEXT: lui a0, 3
-; CHECK-NEXT: addi a0, a0, 819
-; CHECK-NEXT: vand.vx v9, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t
-; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
-; CHECK-NEXT: lui a0, 1
-; CHECK-NEXT: addi a0, a0, -241
-; CHECK-NEXT: vand.vx v8, v8, a0, v0.t
-; CHECK-NEXT: li a0, 257
-; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9:
``````````
</details>
https://github.com/llvm/llvm-project/pull/92514
More information about the llvm-commits
mailing list