[llvm] [AMDGPU] Simplify selection of llvm.amdgcn.inverse.ballot. NFCI. (PR #99345)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 08:58:12 PDT 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/99345
None
>From 8824c14813c8bb19176f478aa7c249255975b211 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 17 Jul 2024 16:52:06 +0100
Subject: [PATCH] [AMDGPU] Simplify selection of llvm.amdgcn.inverse.ballot.
NFCI.
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 12 -----------
.../AMDGPU/AMDGPUInstructionSelector.cpp | 13 ------------
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 -
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 21 ++++---------------
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +++-
llvm/lib/Target/AMDGPU/SIInstructions.td | 10 +++++++--
6 files changed, 15 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 6d5ffc66d98b2..b7471bab12850 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2775,18 +2775,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
return;
- case Intrinsic::amdgcn_inverse_ballot:
- switch (N->getOperand(1).getValueSizeInBits()) {
- case 32:
- Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
- break;
- case 64:
- Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
- break;
- default:
- llvm_unreachable("Unsupported size for inverse ballot mask.");
- }
- break;
default:
SelectCode(N);
break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index dcb0f47973c4a..da3e8c0a62b08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1055,8 +1055,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectIntrinsicCmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
- case Intrinsic::amdgcn_inverse_ballot:
- return selectInverseBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
case Intrinsic::amdgcn_groupstaticsize:
@@ -1449,17 +1447,6 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectInverseBallot(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- const DebugLoc &DL = I.getDebugLoc();
- const Register DstReg = I.getOperand(0).getReg();
- const Register MaskReg = I.getOperand(2).getReg();
-
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(MaskReg);
- I.eraseFromParent();
- return true;
-}
-
bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 2d3317e04ce12..43ed210508d33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -112,7 +112,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicCmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
- bool selectInverseBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
bool selectGroupStaticSize(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index df5a334f83082..4d81048ce54db 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5479,24 +5479,11 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
case AMDGPU::S_INVERSE_BALLOT_U32:
- case AMDGPU::S_INVERSE_BALLOT_U64: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const DebugLoc &DL = MI.getDebugLoc();
- const Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
-
- const bool IsVALU = TRI->isVectorRegister(MRI, MaskReg);
-
- if (IsVALU) {
- MaskReg = TII->readlaneVGPRToSGPR(MaskReg, MI, MRI);
- }
-
- BuildMI(*BB, &MI, DL, TII->get(AMDGPU::COPY), DstReg).addReg(MaskReg);
- MI.eraseFromParent();
+ case AMDGPU::S_INVERSE_BALLOT_U64:
+ // These opcodes only exist to let SIFixSGPRCopies insert a readfirstlane if
+ // necessary. After that they are equivalent to a COPY.
+ MI.setDesc(TII->get(AMDGPU::COPY));
return BB;
- }
case AMDGPU::ENDPGM_TRAP: {
const DebugLoc &DL = MI.getDebugLoc();
if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6d12e8c6f2de2..57c6597f124f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6687,7 +6687,9 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
MI.getOpcode() == AMDGPU::S_WQM_B32 ||
- MI.getOpcode() == AMDGPU::S_WQM_B64) {
+ MI.getOpcode() == AMDGPU::S_WQM_B64 ||
+ MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
+ MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
MachineOperand &Src = MI.getOperand(1);
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 77b17a0f2789b..f2721fbd164bf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -212,9 +212,15 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
}
let usesCustomInserter = 1 in {
-def S_INVERSE_BALLOT_U32 : SPseudoInstSI <(outs SReg_32:$sdst), (ins SSrc_b32:$mask)>;
+def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
+ (outs SReg_32:$sdst), (ins SSrc_b32:$mask),
+ [(set i1:$sdst, (int_amdgcn_inverse_ballot i32:$mask))]
+>;
-def S_INVERSE_BALLOT_U64 : SPseudoInstSI <(outs SReg_64:$sdst), (ins SSrc_b64:$mask)>;
+def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
+ (outs SReg_64:$sdst), (ins SSrc_b64:$mask),
+ [(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
+>;
} // End usesCustomInserter = 1
// Pseudo instructions used for @llvm.fptrunc.round upward
More information about the llvm-commits
mailing list