[llvm] [AMDGPU] Improve selection of ballot.i64 intrinsic in wave32 mode in SelectionDAG. (PR #71556)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 8 01:40:34 PST 2023
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/71556
>From 7cbe1aca75413e8f5e008677f3ef4807712ff126 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 31 Oct 2023 13:36:46 +0100
Subject: [PATCH 1/2] [AMDGPU] Improve selection of ballot.i64 intrinsic in
wave32 mode in SelectionDAG.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 13 +++++++++----
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll | 14 ++++----------
2 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cff7e4bc66218ca..0b1a8bf0861720b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5586,10 +5586,14 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
SDValue Src = N->getOperand(1);
SDLoc SL(N);
+ unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+ EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+
if (Src.getOpcode() == ISD::SETCC) {
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
- Src.getOperand(1), Src.getOperand(2));
+ SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src.getOperand(0),
+ Src.getOperand(1), Src.getOperand(2));
+ return VT.bitsEq(CCVT) ? SetCC : DAG.getZExtOrTrunc(SetCC, SL, VT);
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
@@ -5612,9 +5616,10 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
// (ballot (i1 $src)) -> (AMDGPUISD::SETCC (i32 (zext $src)) (i32 0)
// ISD::SETNE)
- return DAG.getNode(
- AMDGPUISD::SETCC, SL, VT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
+ SDValue SetCC = DAG.getNode(
+ AMDGPUISD::SETCC, SL, CCVT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
+ return VT.bitsEq(CCVT) ? SetCC : DAG.getZExtOrTrunc(SetCC, SL, VT);
}
void SITargetLowering::ReplaceNodeResults(SDNode *N,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index 3337d053eb930b9..563899b6bcce117 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -202,10 +202,8 @@ false:
define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_compare(i32 %v) {
; CHECK-LABEL: branch_divergent_ballot64_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 12, v0
-; CHECK-NEXT: s_mov_b32 s1, 0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT: s_cbranch_vccz .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB12_3
@@ -320,12 +318,8 @@ define amdgpu_cs i32 @branch_divergent_ballot64_ne_zero_and(i32 %v1, i32 %v2) {
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
-; CHECK-NEXT: s_mov_b32 s1, 0
-; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
-; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB17_2
+; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT: s_cbranch_vccz .LBB17_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB17_3
>From 51578ea440a20a0785e0a081f30ce333a5db7fbf Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Wed, 8 Nov 2023 10:38:50 +0100
Subject: [PATCH 2/2] implement TODO
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index cd810f0b43e50db..6956c5050e4b686 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2313,9 +2313,9 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
SDValue VCMP = Cond->getOperand(0);
auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
auto *CRHS = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
- if ((CC == ISD::SETEQ || CC == ISD::SETNE) && CRHS && CRHS->isZero() &&
- // TODO: make condition below an assert after fixing ballot bitwidth.
- VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) && CRHS && CRHS->isZero()) {
+
+ assert(VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize());
// %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
// %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
// BRCOND i1 %C, %BB
More information about the llvm-commits
mailing list