[llvm] AMDGPU: Handle folding frame indexes into s_add_i32 (PR #101694)

Diana Picus via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 05:40:50 PDT 2024


================
@@ -2432,7 +2432,98 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       MI->eraseFromParent();
       return true;
     }
+    case AMDGPU::S_ADD_I32: {
+      // TODO: Handle s_or_b32, s_and_b32.
+      unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
+      MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
 
+      assert(FrameReg || MFI->isBottomOfStack());
+
+      MachineOperand &DstOp = MI->getOperand(0);
+      const DebugLoc &DL = MI->getDebugLoc();
+      Register MaterializedReg = FrameReg;
+
+      // Defend against live scc, which should never happen in practice.
+      bool DeadSCC = MI->getOperand(3).isDead();
+
+      Register TmpReg;
+
+      // Do an in-place scale of the wave offset to the lane offset.
+      if (FrameReg && !ST.enableFlatScratch()) {
+        // FIXME: In the common case where the add does not also read its result
+        // (i.e. this isn't a reg += fi), it's not finding the dest reg as
+        // available.
+        TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI,
+                                               false, 0);
+        BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
+            .addDef(TmpReg, RegState::Renamable)
+            .addReg(FrameReg)
+            .addImm(ST.getWavefrontSizeLog2())
+            .setOperandDead(3); // Set SCC dead
+        MaterializedReg = TmpReg;
+      }
+
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+
+      // For the non-immediate case, we could fall through to the default
+      // handling, but we do an in-place update of the result register here to
+      // avoid scavenging another register.
+      if (OtherOp.isImm()) {
+        OtherOp.setImm(OtherOp.getImm() + Offset);
+        Offset = 0;
+      }
+
+      // If we can't fold the other operand, do another increment.
+      if (!OtherOp.isImm() && MaterializedReg) {
+        Register DstReg = DstOp.getReg();
+
+        if (!TmpReg && MaterializedReg == FrameReg) {
+          TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+                                                 MI, false, 0);
+          DstReg = TmpReg;
+        }
+
+        auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
+                          .addDef(DstReg, RegState::Renamable)
+                          .addReg(MaterializedReg, RegState::Kill)
+                          .add(OtherOp);
+        if (DeadSCC)
+          AddI32.setOperandDead(3);
+
+        MaterializedReg = DstReg;
+
+        OtherOp.ChangeToRegister(MaterializedReg, false);
+        OtherOp.setIsKill(true);
+        OtherOp.setIsRenamable(true);
+        FIOp.ChangeToImmediate(Offset);
+      } else if (!OtherOp.isImm() && !MaterializedReg) {
+        FIOp.ChangeToImmediate(Offset);
+      } else {
+        assert(Offset == 0);
----------------
rovka wrote:

Nit: I think this would be easier to follow if you moved all of this into the `if` at line 2471, and then had an else instead of `if (!OtherOp.isImm)`.

https://github.com/llvm/llvm-project/pull/101694


More information about the llvm-commits mailing list