[llvm] [WIP][AMDGPU] Improve the handling of `inreg` arguments (PR #133614)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 01:04:06 PDT 2025
================
@@ -2841,6 +2841,86 @@ void SITargetLowering::insertCopiesSplitCSR(
}
}
+class InregVPGRSpiller {
+ CCState &State;
+ const unsigned WaveFrontSize;
+
+ Register CurReg;
+ unsigned CurLane = 0;
+
+protected:
+ SelectionDAG &DAG;
+ MachineFunction &MF;
+
+ Register getCurReg() const { return CurReg; }
+ unsigned getCurLane() const { return CurLane; }
+
+ InregVPGRSpiller(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+ : State(State),
+ WaveFrontSize(MF.getSubtarget<GCNSubtarget>().getWavefrontSize()),
+ DAG(DAG), MF(MF) {}
+
+ void setReg(Register &Reg) {
+ if (CurReg.isValid()) {
+ State.DeallocateReg(Reg);
+ Reg = CurReg;
+ } else {
+ CurReg = Reg;
+ }
+ }
+
+ void forward() {
+ // We have used the same VGPRs of all the lanes, so we need to reset it and
+ // pick up a new one in the next move.
+ if (++CurLane % WaveFrontSize == 0)
+ CurReg = 0;
+ }
+};
+
+class InregVPGRSpillerCallee final : private InregVPGRSpiller {
+public:
+ InregVPGRSpillerCallee(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+ : InregVPGRSpiller(DAG, MF, State) {}
+
+ SDValue read(SDValue Chain, const SDLoc &SL, Register &Reg, EVT VT) {
+ setReg(Reg);
+
+ MF.addLiveIn(getCurReg(), &AMDGPU::VGPR_32RegClass);
+
+ // TODO: Do we need the chain here?
+ SmallVector<SDValue, 4> Operands{
+ DAG.getTargetConstant(Intrinsic::amdgcn_readlane, SL, MVT::i32),
+ DAG.getRegister(getCurReg(), VT),
+ DAG.getTargetConstant(getCurLane(), SL, MVT::i32)};
+ SDValue Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, VT, Operands);
+
+ forward();
+
+ return Res;
+ }
+};
+
+class InregVPGRSpillerCallSite final : private InregVPGRSpiller {
+public:
+ InregVPGRSpillerCallSite(SelectionDAG &DAG, MachineFunction &MF,
+ CCState &State)
+ : InregVPGRSpiller(DAG, MF, State) {}
+
+ SDValue write(const SDLoc &SL, Register &Reg, SDValue V, EVT VT) {
+ setReg(Reg);
+
+ SmallVector<SDValue, 4> Operands{
+ DAG.getTargetConstant(Intrinsic::amdgcn_writelane, SL, MVT::i32),
+ DAG.getRegister(getCurReg(), VT), V,
----------------
arsenm wrote:
Yes, it's the same as the input case just the opposite. Build the vreg value and one final copyToReg for the physical register
https://github.com/llvm/llvm-project/pull/133614
More information about the llvm-commits
mailing list