[llvm] 4df4922 - AMDGPU/SDAG: Custom SETCC (i.e. ballot) is always uniform
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 11 12:04:50 PDT 2022
Author: Nicolai Hähnle
Date: 2022-04-11T14:04:21-05:00
New Revision: 4df4922da6cd9d4f58aa17bd3a54435d947570fd
URL: https://github.com/llvm/llvm-project/commit/4df4922da6cd9d4f58aa17bd3a54435d947570fd
DIFF: https://github.com/llvm/llvm-project/commit/4df4922da6cd9d4f58aa17bd3a54435d947570fd.diff
LOG: AMDGPU/SDAG: Custom SETCC (i.e. ballot) is always uniform
The AMDGPUISD::SETCC node is like ISD::SETCC, but returns a lane mask
instead of a per-lane boolean. The lane mask is uniform.
This improves instruction selection for code patterns like
ctpop(ballot(x)), which can now use an S_BCNT1_* instruction instead
of V_BCNT_*.
GlobalISel already selects scalar instructions (an earlier commit
added a test case)..
Differential Revision: https://reviews.llvm.org/D123432
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9f21fd942e288..96240661cbed1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -846,6 +846,8 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
return false;
+ case AMDGPUISD::SETCC: // ballot-style instruction
+ return true;
}
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index 6f0454ab2184f..e63b18216dab2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -71,21 +71,14 @@ define amdgpu_cs i32 @compare_floats(float %x, float %y) {
ret i32 %ballot
}
-define amdgpu_cs float @ctpop_of_ballot(i32 %x, i32 %y) {
+define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) {
; CHECK-LABEL: ctpop_of_ballot:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1
-
-; TODO: This should use a scalar s_bcnt1 instruction.
-; NOTE: The final mul is cruft to prevent a "bad VGPR to SGPR copy" error
-
-; CHECK-NEXT: v_bcnt_u32_b32 v1, vcc_lo, 0
-; CHECK-NEXT: v_mul_lo_u32 v0, v0, v1
+; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; CHECK-NEXT: s_bcnt1_i32_b32 s0, vcc_lo
; CHECK-NEXT: ; return to shader part epilog
- %cmp = icmp ugt i32 %x, %y
+ %cmp = fcmp ogt float %x, %y
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
%bcnt = call i32 @llvm.ctpop.i32(i32 %ballot)
- %r.i = mul i32 %x, %bcnt
- %r = bitcast i32 %r.i to float
- ret float %r
+ ret i32 %bcnt
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
index f57a601cf12c1..473159ec20e55 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
@@ -74,23 +74,15 @@ define amdgpu_cs i64 @compare_floats(float %x, float %y) {
ret i64 %ballot
}
-define amdgpu_cs float @ctpop_of_ballot(i32 %x, i32 %y) {
+define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) {
; CHECK-LABEL: ctpop_of_ballot:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
-
-; TODO: This should use a scalar s_bcnt1 instruction.
-; NOTE: The final mul is cruft to prevent a "bad VGPR to SGPR copy" error
-
-; CHECK-NEXT: v_bcnt_u32_b32 v1, vcc_lo, 0
-; CHECK-NEXT: v_bcnt_u32_b32 v1, vcc_hi, v1
-; CHECK-NEXT: v_mul_lo_u32 v0, v0, v1
+; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1
+; CHECK-NEXT: s_bcnt1_i32_b64 s0, vcc
+; CHECK-NEXT: s_mov_b32 s1, 0
; CHECK-NEXT: ; return to shader part epilog
- %cmp = icmp ugt i32 %x, %y
+ %cmp = fcmp ogt float %x, %y
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
%bcnt = call i64 @llvm.ctpop.i64(i64 %ballot)
- %bcnt.32 = trunc i64 %bcnt to i32
- %r.i = mul i32 %x, %bcnt.32
- %r = bitcast i32 %r.i to float
- ret float %r
+ ret i64 %bcnt
}
More information about the llvm-commits
mailing list