[llvm] [AMDGPU] Improve selection of ballot.i64 intrinsic in wave32 mode in SelectionDAG. (PR #71556)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 7 08:24:42 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Valery Pykhtin (vpykhtin)
<details>
<summary>Changes</summary>
Currently ballot intrinsic is lowered to AMDGPUISD::SETCC using bit width of the lowered intrinsic mnemonic. Instead use the bit width of the current wave mode converted to the required bit width. This is similar to ICMP/FCMP lowering.
I haven't added tests for ballot.i32 in wave64 mode because I'm not sure this should be a compilation error?
---
Full diff: https://github.com/llvm/llvm-project/pull/71556.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+9-4)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll (+4-10)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cff7e4bc66218ca..0b1a8bf0861720b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5586,10 +5586,14 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
SDValue Src = N->getOperand(1);
SDLoc SL(N);
+ unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+ EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+
if (Src.getOpcode() == ISD::SETCC) {
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
- Src.getOperand(1), Src.getOperand(2));
+ SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src.getOperand(0),
+ Src.getOperand(1), Src.getOperand(2));
+ return VT.bitsEq(CCVT) ? SetCC : DAG.getZExtOrTrunc(SetCC, SL, VT);
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
@@ -5612,9 +5616,10 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
// (ballot (i1 $src)) -> (AMDGPUISD::SETCC (i32 (zext $src)) (i32 0)
// ISD::SETNE)
- return DAG.getNode(
- AMDGPUISD::SETCC, SL, VT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
+ SDValue SetCC = DAG.getNode(
+ AMDGPUISD::SETCC, SL, CCVT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
+ return VT.bitsEq(CCVT) ? SetCC : DAG.getZExtOrTrunc(SetCC, SL, VT);
}
void SITargetLowering::ReplaceNodeResults(SDNode *N,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index 3337d053eb930b9..563899b6bcce117 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -202,10 +202,8 @@ false:
define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_compare(i32 %v) {
; CHECK-LABEL: branch_divergent_ballot64_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 12, v0
-; CHECK-NEXT: s_mov_b32 s1, 0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT: s_cbranch_vccz .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB12_3
@@ -320,12 +318,8 @@ define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and(i32 %v1, i32 %v2) {
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT: s_mov_b32 s1, 0
-; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB17_2
+; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT: s_cbranch_vccz .LBB17_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB17_3
``````````
</details>
https://github.com/llvm/llvm-project/pull/71556
More information about the llvm-commits
mailing list