[llvm] [AMDGPU][GlobalISel] Enable kernel argument preloading (PR #134655)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 25 04:28:20 PDT 2025
================
@@ -497,6 +499,66 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
// these from the dispatch pointer.
}
+void AMDGPUCallLowering::lowerPreloadedParameter(
+ MachineIRBuilder &B, ArrayRef<Register> VRegs, Type *ArgTy,
+ uint64_t ArgOffset, Align Alignment,
+ ArrayRef<MCRegister> PreloadRegs) const {
+ MachineFunction &MF = B.getMF();
+ const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const DataLayout &DL = B.getDataLayout();
+
+ LLT ResTy = getLLTForType(*ArgTy, DL);
+ LLT ScalarTy = LLT::scalar(DL.getTypeSizeInBits(ArgTy));
+ unsigned TotalSize = 0;
+ SmallVector<Register> SrcRegs(PreloadRegs.size());
+
+ for (auto [Idx, PhysReg] : enumerate(PreloadRegs)) {
+ Register VReg = MRI.getLiveInVirtReg(PhysReg);
+ TypeSize RegSize = TRI->getRegSizeInBits(VReg, MRI);
+
+ if (!MRI.getVRegDef(VReg)) {
+ MRI.setType(VReg, LLT::scalar(RegSize));
+ B.getMBB().addLiveIn(PhysReg);
+ B.buildInstr(TargetOpcode::COPY).addDef(VReg).addReg(PhysReg);
+ }
+
+ constexpr const unsigned SGPRSize = 4;
+ // Arg is preloaded into the previous SGPR.
+ if (DL.getTypeStoreSize(ArgTy) < SGPRSize && Alignment < SGPRSize) {
+ int64_t AlignDownOffset = alignDown(ArgOffset, SGPRSize);
+ int64_t OffsetDiff = ArgOffset - AlignDownOffset;
+ auto ShiftAmt = B.buildConstant(LLT::scalar(32), OffsetDiff * 8);
+ auto Shift = B.buildRotateLeft(LLT::scalar(RegSize), VReg, ShiftAmt);
+
+ if (ResTy.isVector())
+ B.buildBitcast(VRegs[0], B.buildTrunc(ScalarTy, Shift));
+ else
+ B.buildTrunc(VRegs[0], Shift);
+
+ return;
+ }
+
+ TotalSize += RegSize;
+ SrcRegs[Idx] = VReg;
+ }
+
+ LLT MergeTy = LLT::scalar(TotalSize);
+ Register Res = SrcRegs.back();
+
+ if (SrcRegs.size() > 1)
+ Res = B.buildMergeLikeInstr(MergeTy, SrcRegs).getReg(0);
+
+ if (DL.getTypeStoreSizeInBits(ArgTy) < MergeTy.getSizeInBits())
+ Res = B.buildTrunc(ScalarTy, Res).getReg(0);
+
+ if (ResTy.isVector())
+ Res = B.buildBitcast(ResTy, Res).getReg(0);
----------------
arsenm wrote:
I thought MIRBuilder had nicer coercion helpers these days?
https://github.com/llvm/llvm-project/pull/134655
More information about the llvm-commits
mailing list