[llvm] [AMDGPU] Omit umin on ctlz/cttz if operand is non-zero. (PR #79127)
Leon Clark via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 04:15:11 PST 2024
https://github.com/PeddleSpam created https://github.com/llvm/llvm-project/pull/79127
None
>From 9b69b953bcfe31008a52dad4d148ab733150bfe0 Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Tue, 23 Jan 2024 12:13:43 +0000
Subject: [PATCH] [AMDGPU] Omit umin on ctlz/cttz if operand is non-zero.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 ++++++-
llvm/test/CodeGen/AMDGPU/cttz.ll | 2 --
llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll | 1 -
3 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 55d95154c75878b..a6ec514b09c66d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3114,8 +3114,13 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
// (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64)
// (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src)
// (cttz_zero_undef src) -> (S_FF1_I32_B64 src)
+
+ // umin can be omitted if the operand is known to be non-zero.
+ auto KB = DAG.computeKnownBits(Src);
+ auto const IsNonZero = KB.countMinPopulation() > 0u;
+
SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
- if (!ZeroUndef) {
+ if (!ZeroUndef && !IsNonZero) {
const SDValue ConstVal = DAG.getConstant(
Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 118d6c123046b79..ee2894a66fbfcc0 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -1408,7 +1408,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0
; VI-NEXT: v_ffbl_b32_e32 v2, v2
-; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
@@ -1451,7 +1450,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-NEXT: v_or_b32_e32 v2, 0x10000, v1
; GFX10-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v1
; GFX10-NEXT: v_ffbl_b32_e32 v2, v2
-; GFX10-NEXT: v_min_u32_e32 v2, 32, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo
; GFX10-NEXT: global_store_short v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 71f1cd54d705c83..392a44318b0a5bb 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -1561,7 +1561,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; VI-NEXT: v_or_b32_e32 v0, v2, v0
; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0
; VI-NEXT: v_ffbl_b32_e32 v2, v2
-; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
More information about the llvm-commits
mailing list