[llvm] [AMDGPU][GlobalISel] Enable kernel argument preloading (PR #134655)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 25 04:28:19 PDT 2025


================
@@ -497,6 +499,66 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
   // these from the dispatch pointer.
 }
 
+void AMDGPUCallLowering::lowerPreloadedParameter(
+    MachineIRBuilder &B, ArrayRef<Register> VRegs, Type *ArgTy,
+    uint64_t ArgOffset, Align Alignment,
+    ArrayRef<MCRegister> PreloadRegs) const {
+  MachineFunction &MF = B.getMF();
+  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const DataLayout &DL = B.getDataLayout();
+
+  LLT ResTy = getLLTForType(*ArgTy, DL);
+  LLT ScalarTy = LLT::scalar(DL.getTypeSizeInBits(ArgTy));
+  unsigned TotalSize = 0;
+  SmallVector<Register> SrcRegs(PreloadRegs.size());
+
+  for (auto [Idx, PhysReg] : enumerate(PreloadRegs)) {
+    Register VReg = MRI.getLiveInVirtReg(PhysReg);
+    TypeSize RegSize = TRI->getRegSizeInBits(VReg, MRI);
+
+    if (!MRI.getVRegDef(VReg)) {
+      MRI.setType(VReg, LLT::scalar(RegSize));
+      B.getMBB().addLiveIn(PhysReg);
+      B.buildInstr(TargetOpcode::COPY).addDef(VReg).addReg(PhysReg);
+    }
+
+    constexpr const unsigned SGPRSize = 4;
+    // Arg is preloaded into the previous SGPR.
+    if (DL.getTypeStoreSize(ArgTy) < SGPRSize && Alignment < SGPRSize) {
----------------
arsenm wrote:

Repeating the size check from above, but slightly different. Really should try to do this in terms of getNumRegsForCallingConv rather than type sizes 

https://github.com/llvm/llvm-project/pull/134655


More information about the llvm-commits mailing list