[llvm] [AMDGPU] Update code object metadata for kernarg preload (PR #134666)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 01:18:05 PDT 2025
================
@@ -745,5 +762,241 @@ void MetadataStreamerMsgPackV6::emitVersion() {
getRootMetadata("amdhsa.version") = Version;
}
+void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
+ const DataLayout &DL, Type *ArgTy, Align Alignment,
+ KernArgPreload::HiddenArg HiddenArg, StringRef ArgName, unsigned &Offset,
+ msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
+
+ SmallString<16> PreloadStr;
+ auto PreloadDesc = ArgInfo.getHiddenArgPreloadDescriptor(HiddenArg);
+ if (PreloadDesc) {
+ const auto &Regs = (*PreloadDesc)->Regs;
+ for (unsigned I = 0; I < Regs.size(); ++I) {
+ if (I > 0)
+ PreloadStr += " ";
+ PreloadStr += AMDGPUInstPrinter::getRegisterName(Regs[I]);
+ }
+ }
+ emitKernelArgImpl(DL, ArgTy, Alignment, ArgName, Offset, Args, PreloadStr);
+}
+
+void MetadataStreamerMsgPackV6::emitHiddenKernelArgs(
+ const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
+ auto &Func = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ // No implicit kernel argument is used.
+ if (ST.getImplicitArgNumBytes(Func) == 0)
+ return;
+
+ const Module *M = Func.getParent();
+ auto &DL = M->getDataLayout();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ auto *Int64Ty = Type::getInt64Ty(Func.getContext());
+ auto *Int32Ty = Type::getInt32Ty(Func.getContext());
+ auto *Int16Ty = Type::getInt16Ty(Func.getContext());
+
+ Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
+
+ const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo();
+ emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+ KernArgPreload::HIDDEN_BLOCK_COUNT_X,
+ "hidden_block_count_x", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+ KernArgPreload::HIDDEN_BLOCK_COUNT_Y,
+ "hidden_block_count_y", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+ KernArgPreload::HIDDEN_BLOCK_COUNT_Z,
+ "hidden_block_count_z", Offset, Args, ArgInfo);
+
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_GROUP_SIZE_X,
+ "hidden_group_size_x", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_GROUP_SIZE_Y,
+ "hidden_group_size_y", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_GROUP_SIZE_Z,
+ "hidden_group_size_z", Offset, Args, ArgInfo);
+
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_REMAINDER_X,
+ "hidden_remainder_x", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_REMAINDER_Y,
+ "hidden_remainder_y", Offset, Args, ArgInfo);
+ emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+ KernArgPreload::HIDDEN_REMAINDER_Z,
+ "hidden_remainder_z", Offset, Args, ArgInfo);
+
+ // Reserved for hidden_tool_correlation_id.
+ Offset += 8;
+
+ Offset += 8; // Reserved.
+
+ emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
+ Args);
+ emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
+ Args);
+ emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
+ Args);
+
+ emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
+
+ Offset += 6; // Reserved.
+ auto *Int8PtrTy =
+ PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ if (M->getNamedMetadata("llvm.printf.fmts")) {
+ emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+ Args);
+ } else {
----------------
Pierre-vh wrote:
small nit: don't use `{}` for the elses in this function, they're all one line ?
https://github.com/llvm/llvm-project/pull/134666
More information about the llvm-commits
mailing list