[llvm] [WIP][AMDGPU] Improve the handling of `inreg` arguments (PR #133614)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 31 01:04:06 PDT 2025


================
@@ -2841,6 +2841,86 @@ void SITargetLowering::insertCopiesSplitCSR(
   }
 }
 
+class InregVPGRSpiller {
+  CCState &State;
+  const unsigned WaveFrontSize;
+
+  Register CurReg;
+  unsigned CurLane = 0;
+
+protected:
+  SelectionDAG &DAG;
+  MachineFunction &MF;
+
+  Register getCurReg() const { return CurReg; }
+  unsigned getCurLane() const { return CurLane; }
+
+  InregVPGRSpiller(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+      : State(State),
+        WaveFrontSize(MF.getSubtarget<GCNSubtarget>().getWavefrontSize()),
+        DAG(DAG), MF(MF) {}
+
+  void setReg(Register &Reg) {
+    if (CurReg.isValid()) {
+      State.DeallocateReg(Reg);
+      Reg = CurReg;
+    } else {
+      CurReg = Reg;
+    }
+  }
+
+  void forward() {
+    // We have used the same VGPRs of all the lanes, so we need to reset it and
+    // pick up a new one in the next move.
+    if (++CurLane % WaveFrontSize == 0)
+      CurReg = 0;
+  }
+};
+
+class InregVPGRSpillerCallee final : private InregVPGRSpiller {
+public:
+  InregVPGRSpillerCallee(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+      : InregVPGRSpiller(DAG, MF, State) {}
+
+  SDValue read(SDValue Chain, const SDLoc &SL, Register &Reg, EVT VT) {
+    setReg(Reg);
+
+    MF.addLiveIn(getCurReg(), &AMDGPU::VGPR_32RegClass);
+
+    // TODO: Do we need the chain here?
+    SmallVector<SDValue, 4> Operands{
+        DAG.getTargetConstant(Intrinsic::amdgcn_readlane, SL, MVT::i32),
+        DAG.getRegister(getCurReg(), VT),
+        DAG.getTargetConstant(getCurLane(), SL, MVT::i32)};
+    SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, VT, Operands);
+
+    forward();
+
+    return Res;
+  }
+};
+
+class InregVPGRSpillerCallSite final : private InregVPGRSpiller {
+public:
+  InregVPGRSpillerCallSite(SelectionDAG &DAG, MachineFunction &MF,
+                           CCState &State)
+      : InregVPGRSpiller(DAG, MF, State) {}
+
+  SDValue write(const SDLoc &SL, Register &Reg, SDValue V, EVT VT) {
+    setReg(Reg);
+
+    SmallVector<SDValue, 4> Operands{
+        DAG.getTargetConstant(Intrinsic::amdgcn_writelane, SL, MVT::i32),
+        DAG.getRegister(getCurReg(), VT), V,
----------------
arsenm wrote:

Yes, it's the same as the input case just the opposite. Build the vreg value and one final copyToReg for the physical register 

https://github.com/llvm/llvm-project/pull/133614


More information about the llvm-commits mailing list