[llvm] [AMDGPU] Support preloading hidden kernel arguments (PR #98861)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 2 11:15:15 PDT 2024
================
@@ -64,6 +150,94 @@ class PreloadKernelArgInfo {
NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);
return true;
}
+
+ // Try to allocate SGPRs to preload implicit kernel arguments.
+ void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,
+ IRBuilder<> &Builder) {
+ StringRef Name = Intrinsic::getName(Intrinsic::amdgcn_implicitarg_ptr);
+ Function *ImplicitArgPtr = F.getParent()->getFunction(Name);
+ if (!ImplicitArgPtr)
+ return;
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ // Pair is the load and the load offset.
+ SmallVector<std::pair<LoadInst *, unsigned>, 4> ImplicitArgLoads;
+ for (auto *U : ImplicitArgPtr->users()) {
+ Instruction *CI = dyn_cast<Instruction>(U);
+ if (!CI || CI->getParent()->getParent() != &F)
+ continue;
+
+ for (auto *U : CI->users()) {
+ int64_t Offset = 0;
+ auto *Load = dyn_cast<LoadInst>(U); // Load from ImplicitArgPtr?
+ if (!Load) {
+ if (GetPointerBaseWithConstantOffset(U, Offset, DL) != CI)
+ continue;
+
+ Load = dyn_cast<LoadInst>(*U->user_begin()); // Load from GEP?
+ }
+
+ if (!Load || !Load->isSimple())
+ continue;
+
+ // FIXME: Expand to handle 64-bit implicit args and large merged loads.
+ unsigned LoadSize = Load->getType()->getScalarSizeInBits();
+ if (LoadSize != 32 && LoadSize != 16)
+ continue;
+
+ ImplicitArgLoads.push_back(std::make_pair(Load, Offset));
+ }
+ }
+
+ if (ImplicitArgLoads.empty())
+ return;
+
+ // Allocate loads in order of offset. We need to be sure that the implicit
+ // argument can actually be preloaded.
+ std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(),
+ [](const std::pair<LoadInst *, unsigned> &A,
+ const std::pair<LoadInst *, unsigned> &B) {
+ return A.second < B.second;
+ });
+
+ uint64_t LastExplicitArgOffset = ImplicitArgsBaseOffset;
+ bool AddedHiddenArgsToSignature = false;
+ Function *NF = nullptr;
+ unsigned LastPreloadIndex = 0;
+ for (const auto &Load : ImplicitArgLoads) {
+ LoadInst *LoadInst = Load.first;
+ Type *LoadType = LoadInst->getType();
+ auto LoadOffset = Load.second;
+ unsigned LoadSize = DL.getTypeStoreSize(LoadType);
+ // If we fail to preload any implicit argument we know we don't have SGPRs
+ // to preload any subsequent ones with larger offsets.
+ if (!tryAllocPreloadSGPRs(LoadSize, LoadOffset + ImplicitArgsBaseOffset,
+ LastExplicitArgOffset))
+ break;
+
+ if (!AddedHiddenArgsToSignature) {
+ NF = cloneFunctionWithPreloadImplicitArgs();
+ AddedHiddenArgsToSignature = true;
+ }
+
+ LastExplicitArgOffset = LoadOffset + LoadSize;
+ unsigned HiddenArgIndex = getHiddenArgIndexFromOffset(LoadOffset);
+ assert(NF);
+ unsigned Index = NF->arg_size() - END_HIDDEN_ARGS + HiddenArgIndex;
+ Argument *Arg = NF->getArg(Index);
+ LoadInst->replaceAllUsesWith(Arg);
+ if (Index > HiddenArgIndex)
+ LastPreloadIndex = HiddenArgIndex;
+ }
+
+ // Ensure all hidden arguments up to the final preload are also
+ // preloaded, even if some are unused.
+ for (unsigned I = 0; I <= LastPreloadIndex; ++I)
+ NF->getArg(NF->arg_size() - END_HIDDEN_ARGS + I)
+ ->addAttr(Attribute::InReg);
----------------
arsenm wrote:
This should probably be using AttrBuilder and adding them all at once at the end
https://github.com/llvm/llvm-project/pull/98861
More information about the llvm-commits
mailing list