[llvm] [AMDGPU][NFCI] Remove preload kernarg alloc dep on DAG isel path (PR #96030)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 21 13:21:01 PDT 2024
================
@@ -2465,71 +2465,94 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
// these from the dispatch pointer.
}
+static bool allocPreloadKernArg(uint64_t &LastExplicitArgOffset,
+ uint64_t ExplicitArgOffset, uint64_t ArgOffset,
+ unsigned ArgSize, unsigned Idx,
+ MachineFunction &MF, const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info, CCState &CCInfo) {
+ if (ArgOffset >= ExplicitArgOffset)
+ return false;
+
+ const Align KernelArgBaseAlign = Align(16);
+ Align Alignment = commonAlignment(KernelArgBaseAlign, ArgOffset);
+ unsigned NumAllocSGPRs = alignTo(ArgSize, 4) / 4;
+
+ // Arg is preloaded into the previous SGPR.
+ if (ArgSize < 4 && Alignment < 4) {
+ Info.getArgInfo().PreloadKernArgs[Idx].Regs.push_back(
+ Info.getArgInfo().PreloadKernArgs[Idx - 1].Regs[0]);
+ return true;
+ }
+
+ unsigned Padding = ArgOffset - LastExplicitArgOffset;
+ unsigned PaddingSGPRs = alignTo(Padding, 4) / 4;
+ // Check for free user SGPRs for preloading.
+ if (PaddingSGPRs + NumAllocSGPRs + 1 /*Synthetic SGPRs*/ >
+ Info.getUserSGPRInfo().getNumFreeUserSGPRs()) {
+ return false;
+ }
+
+ // Preload this argument.
+ const TargetRegisterClass *RC =
+ TRI.getSGPRClassForBitWidth(NumAllocSGPRs * 32);
+ SmallVectorImpl<MCRegister> *PreloadRegs =
+ Info.addPreloadedKernArg(TRI, RC, NumAllocSGPRs, Idx, PaddingSGPRs);
+
+ if (PreloadRegs->size() > 1)
+ RC = &AMDGPU::SGPR_32RegClass;
+
+ for (auto &Reg : *PreloadRegs) {
+ assert(Reg);
+ MF.addLiveIn(Reg, RC);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ LastExplicitArgOffset = NumAllocSGPRs * 4 + ArgOffset;
+ return true;
+}
+
// Allocate pre-loaded kernel arguemtns. Arguments to be preloading must be
// sequential starting from the first argument.
void SITargetLowering::allocatePreloadKernArgSGPRs(
- CCState &CCInfo, SmallVectorImpl<CCValAssign> &ArgLocs,
- const SmallVectorImpl<ISD::InputArg> &Ins, MachineFunction &MF,
+ CCState &CCInfo, SmallVectorImpl<CCValAssign> &ArgLocs, MachineFunction &MF,
const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
Function &F = MF.getFunction();
- unsigned LastExplicitArgOffset =
- MF.getSubtarget<GCNSubtarget>().getExplicitKernelArgOffset();
- GCNUserSGPRUsageInfo &SGPRInfo = Info.getUserSGPRInfo();
- bool InPreloadSequence = true;
- unsigned InIdx = 0;
+ const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset();
+ uint64_t ExplicitArgOffset = BaseOffset;
+ uint64_t LastExplicitArgOffset = ExplicitArgOffset;
+ unsigned LocIdx = 0;
for (auto &Arg : F.args()) {
- if (!InPreloadSequence || !Arg.hasInRegAttr())
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const bool IsByRef = Arg.hasByRefAttr();
+ Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
+ unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
----------------
arsenm wrote:
Use TypeSize?
https://github.com/llvm/llvm-project/pull/96030
More information about the llvm-commits
mailing list