[llvm] [AMDGPU] Inplace FI elimination during PEI for scalar copy instruction (PR #99556)

Pankaj Dwivedi via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 04:55:08 PDT 2024


================
@@ -2523,22 +2532,46 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 
             // We may have 1 free scratch SGPR even though a carry out is
             // unavailable. Only one additional mov is needed.
-            Register TmpScaledReg = RS->scavengeRegisterBackwards(
-                AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
-            Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
-
-            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
-              .addReg(FrameReg)
-              .addImm(ST.getWavefrontSizeLog2());
-            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
-                .addReg(ScaledReg, RegState::Kill)
-                .addImm(Offset);
+            Register TmpScaledReg =
+                IsCopy && IsSALU
+                    ? ResultReg
+                    : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+                                                    MI, false, 0, false);
+            Register ScaledReg =
+                TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+            Register TmpResultReg = ScaledReg;
+
+            if (!LiveSCC) {
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
+                  .addReg(FrameReg)
+                  .addImm(ST.getWavefrontSizeLog2());
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
+                  .addReg(TmpResultReg, RegState::Kill)
+                  .addImm(Offset);
+            } else {
+              TmpResultReg = RS->scavengeRegisterBackwards(
+                  AMDGPU::VGPR_32RegClass, MI, false, 0);
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
+                      TmpResultReg)
+                  .addImm(ST.getWavefrontSizeLog2())
+                  .addReg(FrameReg);
+              auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
+                                 TmpResultReg);
+              Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+              Register NewDest =
+                  IsCopy ? ResultReg
+                         : RS->scavengeRegisterBackwards(
+                               AMDGPU::SReg_32RegClass, Add, false, 0);
----------------
PankajDwivedi-25 wrote:

yes, I guess the last two test cases present in frame-index.mir are spill case, which are working fine.

https://github.com/llvm/llvm-project/pull/99556


More information about the llvm-commits mailing list