[llvm] AMDGPU: Handle folding frame indexes into s_add_i32 (PR #101694)
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 05:40:50 PDT 2024
================
@@ -2432,7 +2432,98 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MI->eraseFromParent();
return true;
}
+ case AMDGPU::S_ADD_I32: {
+ // TODO: Handle s_or_b32, s_and_b32.
+ unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
+ MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
+ assert(FrameReg || MFI->isBottomOfStack());
+
+ MachineOperand &DstOp = MI->getOperand(0);
+ const DebugLoc &DL = MI->getDebugLoc();
+ Register MaterializedReg = FrameReg;
+
+ // Defend against live scc, which should never happen in practice.
+ bool DeadSCC = MI->getOperand(3).isDead();
+
+ Register TmpReg;
+
+ // Do an in-place scale of the wave offset to the lane offset.
+ if (FrameReg && !ST.enableFlatScratch()) {
+ // FIXME: In the common case where the add does not also read its result
+ // (i.e. this isn't a reg += fi), it's not finding the dest reg as
+ // available.
+ TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI,
+ false, 0);
+ BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
+ .addDef(TmpReg, RegState::Renamable)
+ .addReg(FrameReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .setOperandDead(3); // Set SCC dead
+ MaterializedReg = TmpReg;
+ }
+
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+
+ // For the non-immediate case, we could fall through to the default
+ // handling, but we do an in-place update of the result register here to
+ // avoid scavenging another register.
+ if (OtherOp.isImm()) {
+ OtherOp.setImm(OtherOp.getImm() + Offset);
+ Offset = 0;
+ }
+
+ // If we can't fold the other operand, do another increment.
+ if (!OtherOp.isImm() && MaterializedReg) {
+ Register DstReg = DstOp.getReg();
+
+ if (!TmpReg && MaterializedReg == FrameReg) {
+ TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+ MI, false, 0);
+ DstReg = TmpReg;
+ }
+
+ auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
+ .addDef(DstReg, RegState::Renamable)
+ .addReg(MaterializedReg, RegState::Kill)
+ .add(OtherOp);
+ if (DeadSCC)
+ AddI32.setOperandDead(3);
+
+ MaterializedReg = DstReg;
+
+ OtherOp.ChangeToRegister(MaterializedReg, false);
+ OtherOp.setIsKill(true);
+ OtherOp.setIsRenamable(true);
+ FIOp.ChangeToImmediate(Offset);
+ } else if (!OtherOp.isImm() && !MaterializedReg) {
+ FIOp.ChangeToImmediate(Offset);
+ } else {
+ assert(Offset == 0);
----------------
rovka wrote:
Nit: I think this would be easier to follow if you moved all of this into the `if` at line 2471, and then had an else instead of `if (!OtherOp.isImm)`.
https://github.com/llvm/llvm-project/pull/101694
More information about the llvm-commits
mailing list