[llvm] [WIP][AMDGPU] Improve the handling of `inreg` arguments (PR #133614)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 30 06:26:48 PDT 2025


================
@@ -2841,6 +2841,86 @@ void SITargetLowering::insertCopiesSplitCSR(
   }
 }
 
+class InregVPGRSpiller {
+  CCState &State;
+  const unsigned WaveFrontSize;
+
+  Register CurReg;
+  unsigned CurLane = 0;
+
+protected:
+  SelectionDAG &DAG;
+  MachineFunction &MF;
+
+  Register getCurReg() const { return CurReg; }
+  unsigned getCurLane() const { return CurLane; }
+
+  InregVPGRSpiller(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+      : State(State),
+        WaveFrontSize(MF.getSubtarget<GCNSubtarget>().getWavefrontSize()),
+        DAG(DAG), MF(MF) {}
+
+  void setReg(Register &Reg) {
+    if (CurReg.isValid()) {
+      State.DeallocateReg(Reg);
+      Reg = CurReg;
+    } else {
+      CurReg = Reg;
+    }
+  }
+
+  void forward() {
+    // We have used the same VGPRs of all the lanes, so we need to reset it and
+    // pick up a new one in the next move.
+    if (++CurLane % WaveFrontSize == 0)
+      CurReg = 0;
+  }
+};
+
+class InregVPGRSpillerCallee final : private InregVPGRSpiller {
+public:
+  InregVPGRSpillerCallee(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+      : InregVPGRSpiller(DAG, MF, State) {}
+
+  SDValue read(SDValue Chain, const SDLoc &SL, Register &Reg, EVT VT) {
+    setReg(Reg);
+
+    MF.addLiveIn(getCurReg(), &AMDGPU::VGPR_32RegClass);
+
+    // TODO: Do we need the chain here?
----------------
shiltian wrote:

But IIUC we can't simply call `getCopyFromReg` right? We want to copy from the register of exact lane, and that's why we need to use the readlane intrinsic here.

https://github.com/llvm/llvm-project/pull/133614


More information about the llvm-commits mailing list