[llvm] [AMDGPU] Update code object metadata for kernarg preload (PR #134666)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 01:18:06 PDT 2025
================
@@ -745,5 +762,241 @@ void MetadataStreamerMsgPackV6::emitVersion() {
getRootMetadata("amdhsa.version") = Version;
}
+void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
+ const DataLayout &DL, Type *ArgTy, Align Alignment,
+ KernArgPreload::HiddenArg HiddenArg, StringRef ArgName, unsigned &Offset,
+ msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
+
+ SmallString<16> PreloadStr;
+ auto PreloadDesc = ArgInfo.getHiddenArgPreloadDescriptor(HiddenArg);
+ if (PreloadDesc) {
+ const auto &Regs = (*PreloadDesc)->Regs;
+ for (unsigned I = 0; I < Regs.size(); ++I) {
+ if (I > 0)
+ PreloadStr += " ";
+ PreloadStr += AMDGPUInstPrinter::getRegisterName(Regs[I]);
+ }
+ }
+ emitKernelArgImpl(DL, ArgTy, Alignment, ArgName, Offset, Args, PreloadStr);
+}
+
+void MetadataStreamerMsgPackV6::emitHiddenKernelArgs(
+ const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
+ auto &Func = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ // No implicit kernel argument is used.
+ if (ST.getImplicitArgNumBytes(Func) == 0)
+ return;
+
+ const Module *M = Func.getParent();
+ auto &DL = M->getDataLayout();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ auto *Int64Ty = Type::getInt64Ty(Func.getContext());
+ auto *Int32Ty = Type::getInt32Ty(Func.getContext());
+ auto *Int16Ty = Type::getInt16Ty(Func.getContext());
+
+ Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
+
+ const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo();
+ emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+ KernArgPreload::HIDDEN_BLOCK_COUNT_X,
+ "hidden_block_count_x", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+ KernArgPreload::HIDDEN_BLOCK_COUNT_Y,
+ "hidden_block_count_y", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+ KernArgPreload::HIDDEN_BLOCK_COUNT_Z,
+ "hidden_block_count_z", Offset, Args, ArgInfo);
+
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_GROUP_SIZE_X,
+ "hidden_group_size_x", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_GROUP_SIZE_Y,
+ "hidden_group_size_y", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_GROUP_SIZE_Z,
+ "hidden_group_size_z", Offset, Args, ArgInfo);
+
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_REMAINDER_X,
+ "hidden_remainder_x", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_REMAINDER_Y,
+ "hidden_remainder_y", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_REMAINDER_Z,
+ "hidden_remainder_z", Offset, Args, ArgInfo);
+
+ // Reserved for hidden_tool_correlation_id.
+ Offset += 8;
+
+ Offset += 8; // Reserved.
+
+ emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
+ Args);
+ emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
+ Args);
+ emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
+ Args);
+
+ emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
+
+ Offset += 6; // Reserved.
+ auto *Int8PtrTy =
+ PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ if (M->getNamedMetadata("llvm.printf.fmts")) {
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+ Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
+
+ if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
+ Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
+
+ if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg",
+ Offset, Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
+
+ if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
+ else
+ Offset += 8; // Skipped.
+
+ if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
+ Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
+
+ if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_completion_action",
+ Offset, Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
+
+ // Emit argument for hidden dynamic lds size
+ if (MFI.isDynamicLDSUsed()) {
+ emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
+ Args);
+ } else {
+ Offset += 4; // skipped
+ }
+
+ Offset += 68; // Reserved.
+
+ // hidden_private_base and hidden_shared_base are only when the subtarget has
+ // ApertureRegs.
+ if (!ST.hasApertureRegs()) {
+ emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_private_base", Offset,
+ Args);
+ emitKernelArgImpl(DL, Int32Ty, Align(4), "hidden_shared_base", Offset,
+ Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
+
+ if (MFI.getUserSGPRInfo().hasQueuePtr())
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset,
+ Args);
+}
+
+void MetadataStreamerMsgPackV6::emitKernelArg(const Argument &Arg,
+ unsigned &Offset,
+ msgpack::ArrayDocNode Args,
+ const MachineFunction &MF) {
+ const auto *Func = Arg.getParent();
+ auto ArgNo = Arg.getArgNo();
+ const MDNode *Node;
+
+ StringRef Name;
+ Node = Func->getMetadata("kernel_arg_name");
+ if (Node && ArgNo < Node->getNumOperands())
+ Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ else if (Arg.hasName())
+ Name = Arg.getName();
+
+ StringRef TypeName;
+ Node = Func->getMetadata("kernel_arg_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef BaseTypeName;
+ Node = Func->getMetadata("kernel_arg_base_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef ActAccQual;
+ // Do we really need NoAlias check here?
+ if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
+ if (Arg.onlyReadsMemory())
+ ActAccQual = "read_only";
+ else if (Arg.hasAttribute(Attribute::WriteOnly))
+ ActAccQual = "write_only";
+ }
+
+ StringRef AccQual;
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef TypeQual;
+ Node = Func->getMetadata("kernel_arg_type_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ const DataLayout &DL = Func->getDataLayout();
+
+ MaybeAlign PointeeAlign;
+ Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
+
+ // FIXME: Need to distinguish in memory alignment from pointer alignment.
+ if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
+ PointeeAlign = Arg.getParamAlign().valueOrOne();
+ }
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ SmallString<8> PreloadRegisters;
+ if (MFI->getNumKernargPreloadedSGPRs()) {
+ assert(MF.getSubtarget<GCNSubtarget>().hasKernargPreload());
+ const auto &PreloadDescs =
+ MFI->getArgInfo().getPreloadDescriptorsForArgIdx(ArgNo);
+ for (auto &Desc : PreloadDescs) {
+ if (!PreloadRegisters.empty())
+ PreloadRegisters += " ";
+
+ for (unsigned I = 0; I < Desc->Regs.size(); ++I) {
+ if (I > 0)
+ PreloadRegisters += " ";
+ PreloadRegisters += AMDGPUInstPrinter::getRegisterName(Desc->Regs[I]);
+ }
+ }
+ }
+
+ // There's no distinction between byval aggregates and raw aggregates.
+ Type *ArgTy;
+ Align ArgAlign;
+ std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
----------------
Pierre-vh wrote:
```suggestion
auto [ArgTy, ArgAlign] = getArgumentTypeAlign(Arg, DL);
```
https://github.com/llvm/llvm-project/pull/134666
More information about the llvm-commits
mailing list