[llvm] [WIP][AMDGPU] Improve the handling of `inreg` arguments (PR #133614)

Sun Apr 13 07:24:29 PDT 2025

================
@@ -2841,6 +2841,96 @@ void SITargetLowering::insertCopiesSplitCSR(
   }
 }
 
+/// Classes for spilling inreg VGPR arguments.
+///
+/// When an argument marked inreg is pushed to a VGPR, it indicates that the
+/// available SGPRs for argument passing have been exhausted. In such cases, it
+/// is preferable to pack multiple inreg arguments into individual lanes of
+/// VGPRs instead of assigning each directly to separate VGPRs.
+///
+/// Spilling involves two parts: the caller-side (call site) and the
+/// callee-side. Both must follow the same method for selecting registers and
+/// lanes, ensuring that an argument written at the call site matches exactly
+/// with the one read at the callee.
+
+/// The spilling class for the caller-side that lowers packing of call site
+/// arguments.
+class InregVPGRSpillerCallee {
+  CCState &State;
+  SelectionDAG &DAG;
+  MachineFunction &MF;
+
+  Register SrcReg;
+  SDValue SrcVal;
+  unsigned CurLane = 0;
+
+public:
+  InregVPGRSpillerCallee(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+      : State(State), DAG(DAG), MF(MF) {}
+
+  SDValue readLane(SDValue Chain, const SDLoc &SL, Register &Reg, EVT VT) {
+    if (SrcVal) {
+      State.DeallocateReg(Reg);
+    } else {
+      Reg = MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+      SrcReg = Reg;
+      SrcVal = DAG.getCopyFromReg(Chain, SL, Reg, VT);
+    }
+    // According to the calling convention, VGPR0-31 are used for passing
+    // function arguments, no matter they are regular arguments, or 'inreg'
+    // function arguments that get spilled into VGPRs. Therefore, there are at
+    // most 32 'inreg' arguments that can be spilled to VGPRs.
+    assert(CurLane < 32 && "more than expected VGPR inreg arguments");
+    SmallVector<SDValue, 4> Operands{
+        DAG.getTargetConstant(Intrinsic::amdgcn_readlane, SL, MVT::i32),
+        DAG.getRegister(SrcReg, VT),
+        DAG.getTargetConstant(CurLane++, SL, MVT::i32)};
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, VT, Operands);
+  }
+};
+
+/// The spilling class for the caller-side that lowers packing of call site
+/// arguments.
+class InregVPGRSpillerCallSite {
+  Register DstReg;
+  SDValue LastWrite;
+  unsigned CurLane = 0;
+
+  SelectionDAG &DAG;
+  MachineFunction &MF;
+
+public:
+  InregVPGRSpillerCallSite(SelectionDAG &DAG, MachineFunction &MF)
+      : DAG(DAG), MF(MF) {}
+
+  void writeLane(const SDLoc &SL, Register &Reg, SDValue Val, EVT VT) {
+    if (DstReg.isValid())
+      Reg = DstReg;
+    else
+      DstReg = Reg;
+    // According to the calling convention, VGPR0-31 are used for passing
+    // function arguments, no matter they are regular arguments, or 'inreg'
+    // function arguments that get spilled into VGPRs. Therefore, there are at
+    // most 32 'inreg' arguments that can be spilled to VGPRs.
+    assert(CurLane < 32 && "more than expected VGPR inreg arguments");
+    SmallVector<SDValue, 4> Operands{
+        DAG.getTargetConstant(Intrinsic::amdgcn_writelane, SL, MVT::i32), Val,
+        DAG.getTargetConstant(CurLane++, SL, MVT::i32)};
+    if (!LastWrite) {
+      Register VReg = MF.getRegInfo().getLiveInVirtReg(DstReg);
----------------
arsenm wrote:

This example MIR is missing the output part. This is all fine as long as it copies to the correct register in the end. I don't see what the issue is? 

https://github.com/llvm/llvm-project/pull/133614