[llvm] [WIP][AMDGPU] Improve the handling of `inreg` arguments (PR #133614)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 30 06:26:48 PDT 2025
================
@@ -2841,6 +2841,86 @@ void SITargetLowering::insertCopiesSplitCSR(
}
}
+class InregVPGRSpiller {
+ CCState &State;
+ const unsigned WaveFrontSize;
+
+ Register CurReg;
+ unsigned CurLane = 0;
+
+protected:
+ SelectionDAG &DAG;
+ MachineFunction &MF;
+
+ Register getCurReg() const { return CurReg; }
+ unsigned getCurLane() const { return CurLane; }
+
+ InregVPGRSpiller(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+ : State(State),
+ WaveFrontSize(MF.getSubtarget<GCNSubtarget>().getWavefrontSize()),
+ DAG(DAG), MF(MF) {}
+
+ void setReg(Register &Reg) {
+ if (CurReg.isValid()) {
+ State.DeallocateReg(Reg);
+ Reg = CurReg;
+ } else {
+ CurReg = Reg;
+ }
+ }
+
+ void forward() {
+ // We have used the same VGPRs of all the lanes, so we need to reset it and
+ // pick up a new one in the next move.
+ if (++CurLane % WaveFrontSize == 0)
+ CurReg = 0;
+ }
+};
+
+class InregVPGRSpillerCallee final : private InregVPGRSpiller {
+public:
+ InregVPGRSpillerCallee(SelectionDAG &DAG, MachineFunction &MF, CCState &State)
+ : InregVPGRSpiller(DAG, MF, State) {}
+
+ SDValue read(SDValue Chain, const SDLoc &SL, Register &Reg, EVT VT) {
+ setReg(Reg);
+
+ MF.addLiveIn(getCurReg(), &AMDGPU::VGPR_32RegClass);
+
+ // TODO: Do we need the chain here?
----------------
shiltian wrote:
But IIUC we can't simply call `getCopyFromReg` right? We want to copy from the register of exact lane, and that's why we need to use the readlane intrinsic here.
https://github.com/llvm/llvm-project/pull/133614
More information about the llvm-commits
mailing list