[llvm] [AMDGPU] Support preloading hidden kernel arguments (PR #98861)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 2 11:15:16 PDT 2024
================
@@ -31,9 +33,93 @@ class PreloadKernelArgInfo {
const GCNSubtarget &ST;
unsigned NumFreeUserSGPRs;
-public:
- SmallVector<llvm::Metadata *, 8> KernelArgMetadata;
+ enum HiddenArg : unsigned {
+ HIDDEN_BLOCK_COUNT_X,
+ HIDDEN_BLOCK_COUNT_Y,
+ HIDDEN_BLOCK_COUNT_Z,
+ HIDDEN_GROUP_SIZE_X,
+ HIDDEN_GROUP_SIZE_Y,
+ HIDDEN_GROUP_SIZE_Z,
+ HIDDEN_REMAINDER_X,
+ HIDDEN_REMAINDER_Y,
+ HIDDEN_REMAINDER_Z,
+ END_HIDDEN_ARGS
+ };
+
+ struct HiddenArgInfo {
+ unsigned Offset;
+ unsigned Size;
+ const char *Name;
+ };
+
+ static constexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {
+ {0, 4, "_hidden_block_count_x"}, {4, 4, "_hidden_block_count_y"},
+ {8, 4, "_hidden_block_count_z"}, {12, 2, "_hidden_group_size_x"},
+ {14, 2, "_hidden_group_size_y"}, {16, 2, "_hidden_group_size_z"},
+ {18, 2, "_hidden_remainder_x"}, {20, 2, "_hidden_remainder_y"},
+ {22, 2, "_hidden_remainder_z"}};
+
+ static HiddenArg getHiddenArgIndexFromOffset(unsigned Offset) {
+ for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)
+ if (HiddenArgs[I].Offset == Offset)
+ return static_cast<HiddenArg>(I);
+
+ llvm_unreachable("Unexpected hidden argument offset.");
+ }
+
+ static Type *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {
+ if (HA < END_HIDDEN_ARGS)
+ return Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);
+
+ llvm_unreachable("Unexpected hidden argument.");
+ }
+ static const char *getHiddenArgName(HiddenArg HA) {
+ if (HA < END_HIDDEN_ARGS) {
+ return HiddenArgs[HA].Name;
+ }
+ llvm_unreachable("Unexpected hidden argument.");
+ }
+
+ Function *cloneFunctionWithPreloadImplicitArgs() {
+ FunctionType *FT = F.getFunctionType();
+ SmallVector<Type *, 16> FTypes(FT->param_begin(), FT->param_end());
+ for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)
+ FTypes.push_back(getHiddenArgType(F.getContext(), HiddenArg(I)));
+
+ FunctionType *NFT =
+ FunctionType::get(FT->getReturnType(), FTypes, FT->isVarArg());
+ Function *NF =
+ Function::Create(NFT, F.getLinkage(), F.getAddressSpace(), F.getName());
+
+ NF->copyAttributesFrom(&F);
+ NF->copyMetadata(&F, 0);
+ NF->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);
+
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->takeName(&F);
+ NF->splice(NF->begin(), &F);
+
+ Function::arg_iterator NFArg = NF->arg_begin();
+ for (Argument &Arg : F.args()) {
+ Arg.replaceAllUsesWith(&*NFArg);
+ NFArg->takeName(&Arg);
+ ++NFArg;
+ }
+
+ // Add an attribute that tracks the index offset to the first hidden
+ // argument.
+ NF->addFnAttr("amdgpu-hidden-arg-offset", utostr(NFArg->getArgNo()));
----------------
arsenm wrote:
Instead of relying on an exact argument sequence, you can attach a string attribute directly to the parameter
https://github.com/llvm/llvm-project/pull/98861
More information about the llvm-commits
mailing list