[llvm] f6e93b8 - AMDGPU: Minor improvement and cleanup for waterfall loop generation (#111886)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 12:13:40 PDT 2024
Author: Changpeng Fang
Date: 2024-10-10T12:13:36-07:00
New Revision: f6e93b8147a94a595293b47c39d20d2038c812d1
URL: https://github.com/llvm/llvm-project/commit/f6e93b8147a94a595293b47c39d20d2038c812d1
DIFF: https://github.com/llvm/llvm-project/commit/f6e93b8147a94a595293b47c39d20d2038c812d1.diff
LOG: AMDGPU: Minor improvement and cleanup for waterfall loop generation (#111886)
First, ReadlanePieces should be in the scope of each MachineOperand. It
is not correct if we declare in a outer scope without clearing after the
use for a MachineOperand.
Additionally, we do not need the OrigBB argyment for
emitLoadScalarOpsFromVGPRLoop, since MachineFunction (the only use) can
be obtained from LoopBB (or BodyBB).
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0c2ae382f53a19..d676d561d08180 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6302,11 +6302,14 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
// Emit the actual waterfall loop, executing the wrapped instruction for each
// unique value of \p ScalarOps across all lanes. In the best case we execute 1
// iteration, in the worst case we execute 64 (once per lane).
-static void emitLoadScalarOpsFromVGPRLoop(
- const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB,
- MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL,
- ArrayRef<MachineOperand *> ScalarOps) {
- MachineFunction &MF = *OrigBB.getParent();
+static void
+emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ MachineBasicBlock &LoopBB,
+ MachineBasicBlock &BodyBB,
+ const DebugLoc &DL,
+ ArrayRef<MachineOperand *> ScalarOps) {
+ MachineFunction &MF = *LoopBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@@ -6319,8 +6322,6 @@ static void emitLoadScalarOpsFromVGPRLoop(
const auto *BoolXExecRC = TRI->getWaveMaskRegClass();
MachineBasicBlock::iterator I = LoopBB.begin();
-
- SmallVector<Register, 8> ReadlanePieces;
Register CondReg;
for (MachineOperand *ScalarOp : ScalarOps) {
@@ -6355,6 +6356,7 @@ static void emitLoadScalarOpsFromVGPRLoop(
ScalarOp->setReg(CurReg);
ScalarOp->setIsKill();
} else {
+ SmallVector<Register, 8> ReadlanePieces;
unsigned VScalarOpUndef = getUndefRegState(ScalarOp->isUndef());
assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
"Unhandled register size");
@@ -6535,7 +6537,7 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
}
}
- emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps);
+ emitLoadScalarOpsFromVGPRLoop(TII, MRI, *LoopBB, *BodyBB, DL, ScalarOps);
MachineBasicBlock::iterator First = RemainderBB->begin();
// Restore SCC
More information about the llvm-commits
mailing list