[llvm] [AMDGPU] Generate waterfall for calls with SGPR(inreg) argument (PR #146997)

Juan Manuel Martinez CaamaƱo via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 31 01:48:46 PDT 2025


================
@@ -7846,26 +7889,57 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
     Register DstReg = Inst.getOperand(0).getReg();
     const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
 
-    // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and
-    // hope for the best.
     if (Inst.isCopy() && DstReg.isPhysical() &&
         RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
-      // TODO: Only works for 32 bit registers.
-      if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
-        BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
-                get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
-            .add(Inst.getOperand(1));
-      } else {
-        Register NewDst =
-            MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
-        BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
-                get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
-            .add(Inst.getOperand(1));
-        BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
-                DstReg)
-            .addReg(NewDst);
+      const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+      Register SrcReg = Inst.getOperand(1).getReg();
+      MachineBasicBlock::iterator I = Inst.getIterator();
+      MachineBasicBlock::iterator E = Inst.getParent()->end();
+      // Only search current block since phyreg's def & use cannot cross
+      // blocks when MF.NoPhi = false.
+      while (++I != E) {
+        // Currently, we only support waterfall on SI_CALL_ISEL.
+        if (I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
+          MachineInstr *UseMI = &*I;
+          for (unsigned i = 0; i < UseMI->getNumOperands(); ++i) {
+            if (UseMI->getOperand(i).isReg() &&
+                UseMI->getOperand(i).getReg() == DstReg) {
+              MachineOperand *MO = &UseMI->getOperand(i);
+              MO->setReg(SrcReg);
+              V2PhysSCopyInfo &V2SCopyInfo = Worklist.WaterFalls[UseMI];
+              V2SCopyInfo.MOs.push_back(MO);
+              V2SCopyInfo.SGPRs.push_back(DstReg);
+              Worklist.V2PhySCopiesToErase[&Inst] = true;
+            }
+          }
+        } else if ((I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
+                    I->getOperand(0).isReg() &&
+                    I->getOperand(0).getReg() == DstReg) ||
+                   DstReg == AMDGPU::M0) {
+          // If it's a copy of a VGPR to a physical SGPR, insert a
+          // V_READFIRSTLANE and hope for the best.
+          // TODO: Only works for 32 bit registers.
+          if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
+            BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+                    get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
+                .add(Inst.getOperand(1));
+          } else {
+            Register NewDst =
+                MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+            BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+                    get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+                .add(Inst.getOperand(1));
+            BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+                    get(AMDGPU::COPY), DstReg)
+                .addReg(NewDst);
+          }
+          Worklist.V2PhySCopiesToErase[&Inst] = true;
+        } else if (I->readsRegister(DstReg, TRI))
+          // COPY can be erased if other type of inst uses it.
----------------
jmmartinez wrote:

typo ?
```suggestion
          // COPY cannot be erased if other type of inst uses it.
```

https://github.com/llvm/llvm-project/pull/146997


More information about the llvm-commits mailing list