[llvm] [AMDGPU] Support preloading hidden kernel arguments (PR #98861)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 06:45:14 PDT 2024
================
@@ -31,9 +33,109 @@ class PreloadKernelArgInfo {
const GCNSubtarget &ST;
unsigned NumFreeUserSGPRs;
-public:
- SmallVector<llvm::Metadata *, 8> KernelArgMetadata;
+ enum HiddenArg : unsigned {
+ HIDDEN_BLOCK_COUNT_X,
+ HIDDEN_BLOCK_COUNT_Y,
+ HIDDEN_BLOCK_COUNT_Z,
+ HIDDEN_GROUP_SIZE_X,
+ HIDDEN_GROUP_SIZE_Y,
+ HIDDEN_GROUP_SIZE_Z,
+ HIDDEN_REMAINDER_X,
+ HIDDEN_REMAINDER_Y,
+ HIDDEN_REMAINDER_Z,
+ END_HIDDEN_ARGS
+ };
+
+ // Stores information about a specific hidden argument.
+ struct HiddenArgInfo {
+ // Offset in bytes from the location in the kernearg segment pointed to by
+ // the implicitarg pointer.
+ uint8_t Offset;
+ // The size of the hidden argument in bytes.
+ uint8_t Size;
+ // The name of the hidden argument in the kernel signature.
+ const char *Name;
+ };
+
+ static constexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {
+ {0, 4, "_hidden_block_count_x"}, {4, 4, "_hidden_block_count_y"},
+ {8, 4, "_hidden_block_count_z"}, {12, 2, "_hidden_group_size_x"},
+ {14, 2, "_hidden_group_size_y"}, {16, 2, "_hidden_group_size_z"},
+ {18, 2, "_hidden_remainder_x"}, {20, 2, "_hidden_remainder_y"},
+ {22, 2, "_hidden_remainder_z"}};
+
+ static HiddenArg getHiddenArgFromOffset(unsigned Offset) {
+ for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)
+ if (HiddenArgs[I].Offset == Offset)
+ return static_cast<HiddenArg>(I);
+
+ return END_HIDDEN_ARGS;
+ }
+
+ static Type *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {
+ if (HA < END_HIDDEN_ARGS)
+ return Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);
+
+ llvm_unreachable("Unexpected hidden argument.");
+ }
+
+ static const char *getHiddenArgName(HiddenArg HA) {
+ if (HA < END_HIDDEN_ARGS) {
+ return HiddenArgs[HA].Name;
+ }
+ llvm_unreachable("Unexpected hidden argument.");
+ }
+ // Clones the function after adding implicit arguments to the argument list
+ // and returns the new updated function. Preloaded implicit arguments are
+ // added up to and including the last one that will be preloaded, indicated by
+ // LastPreloadIndex. Currently preloading is only performed on the totality of
+ // sequential data from the kernarg segment including implicit (hidden)
+ // arguments. This means that all arguments up to the last preloaded argument
+ // will also be preloaded even if that data is unused.
+ Function *cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex) {
+ FunctionType *FT = F.getFunctionType();
+ LLVMContext &Ctx = F.getParent()->getContext();
+ SmallVector<Type *, 16> FTypes(FT->param_begin(), FT->param_end());
+ for (unsigned I = 0; I <= LastPreloadIndex; ++I)
+ FTypes.push_back(getHiddenArgType(Ctx, HiddenArg(I)));
+
+ FunctionType *NFT =
+ FunctionType::get(FT->getReturnType(), FTypes, FT->isVarArg());
+ Function *NF =
+ Function::Create(NFT, F.getLinkage(), F.getAddressSpace(), F.getName());
+
+ NF->copyAttributesFrom(&F);
+ NF->copyMetadata(&F, 0);
+ NF->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);
----------------
arsenm wrote:
This should probably be in copyAttributesFrom
https://github.com/llvm/llvm-project/pull/98861
More information about the llvm-commits
mailing list