[llvm] [AMDGPU] Move kernarg preload logic to AMDGPU Attributor (PR #123547)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 21 04:57:03 PST 2025
================
@@ -1314,19 +1523,64 @@ struct AAAMDGPUNoAGPR
const char AAAMDGPUNoAGPR::ID = 0;
-static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- for (unsigned I = 0;
- I < F.arg_size() &&
- I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
- ++I) {
- Argument &Arg = *F.getArg(I);
- // Check for incompatible attributes.
- if (Arg.hasByRefAttr() || Arg.hasNestAttr())
- break;
+static void markKernelArgsAsInreg(SetVector<Function *> &Functions,
+ TargetMachine &TM) {
+ SmallVector<Function *, 4> FunctionsToErase;
+ for (auto *F : Functions) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
+ if (!ST.hasKernargPreload() ||
+ F->getCallingConv() != CallingConv::AMDGPU_KERNEL || F->arg_empty())
+ continue;
+
+ PreloadKernelArgInfo PreloadInfo(*F, ST);
+ uint64_t ExplicitArgOffset = 0;
+ const DataLayout &DL = F->getDataLayout();
+ const uint64_t BaseOffset = ST.getExplicitKernelArgOffset();
+ unsigned NumPreloadsRequested = KernargPreloadCount;
+ unsigned NumPreloadedExplicitArgs = 0;
+ for (Argument &Arg : F->args()) {
+ // Avoid incompatible attributes and guard against running this pass
+ // twice.
+ if (Arg.hasByRefAttr() || Arg.hasNestAttr() ||
+ Arg.hasAttribute("amdgpu-hidden-argument"))
+ break;
+
+ // Inreg may be pre-existing on some arguments, try to preload these.
+ if (NumPreloadsRequested == 0 && !Arg.hasInRegAttr())
+ break;
+
+ // FIXME: Preload aggregates.
+ if (Arg.getType()->isAggregateType())
+ break;
+
+ Type *ArgTy = Arg.getType();
+ Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
+ uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
----------------
arsenm wrote:
You probably should be using the TargetLowering functions the calling convention lowering uses to see how these types will really be processed
https://github.com/llvm/llvm-project/pull/123547
More information about the llvm-commits
mailing list