[llvm] [AMDGPU] Update code object metadata for kernarg preload (PR #134666)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 10 01:18:05 PDT 2025


================
@@ -745,5 +762,241 @@ void MetadataStreamerMsgPackV6::emitVersion() {
   getRootMetadata("amdhsa.version") = Version;
 }
 
+void MetadataStreamerMsgPackV6::emitHiddenKernelArgWithPreload(
+    const DataLayout &DL, Type *ArgTy, Align Alignment,
+    KernArgPreload::HiddenArg HiddenArg, StringRef ArgName, unsigned &Offset,
+    msgpack::ArrayDocNode Args, const AMDGPUFunctionArgInfo &ArgInfo) {
+
+  SmallString<16> PreloadStr;
+  auto PreloadDesc = ArgInfo.getHiddenArgPreloadDescriptor(HiddenArg);
+  if (PreloadDesc) {
+    const auto &Regs = (*PreloadDesc)->Regs;
+    for (unsigned I = 0; I < Regs.size(); ++I) {
+      if (I > 0)
+        PreloadStr += " ";
+      PreloadStr += AMDGPUInstPrinter::getRegisterName(Regs[I]);
+    }
+  }
+  emitKernelArgImpl(DL, ArgTy, Alignment, ArgName, Offset, Args, PreloadStr);
+}
+
+void MetadataStreamerMsgPackV6::emitHiddenKernelArgs(
+    const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
+  auto &Func = MF.getFunction();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+  // No implicit kernel argument is used.
+  if (ST.getImplicitArgNumBytes(Func) == 0)
+    return;
+
+  const Module *M = Func.getParent();
+  auto &DL = M->getDataLayout();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+  auto *Int64Ty = Type::getInt64Ty(Func.getContext());
+  auto *Int32Ty = Type::getInt32Ty(Func.getContext());
+  auto *Int16Ty = Type::getInt16Ty(Func.getContext());
+
+  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
+
+  const AMDGPUFunctionArgInfo &ArgInfo = MFI.getArgInfo();
+  emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+                                 KernArgPreload::HIDDEN_BLOCK_COUNT_X,
+                                 "hidden_block_count_x", Offset, Args, ArgInfo);
+  emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+                                 KernArgPreload::HIDDEN_BLOCK_COUNT_Y,
+                                 "hidden_block_count_y", Offset, Args, ArgInfo);
+  emitHiddenKernelArgWithPreload(DL, Int32Ty, Align(4),
+                                 KernArgPreload::HIDDEN_BLOCK_COUNT_Z,
+                                 "hidden_block_count_z", Offset, Args, ArgInfo);
+
+  emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+                                 KernArgPreload::HIDDEN_GROUP_SIZE_X,
+                                 "hidden_group_size_x", Offset, Args, ArgInfo);
+  emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+                                 KernArgPreload::HIDDEN_GROUP_SIZE_Y,
+                                 "hidden_group_size_y", Offset, Args, ArgInfo);
+  emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+                                 KernArgPreload::HIDDEN_GROUP_SIZE_Z,
+                                 "hidden_group_size_z", Offset, Args, ArgInfo);
+
+  emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+                                 KernArgPreload::HIDDEN_REMAINDER_X,
+                                 "hidden_remainder_x", Offset, Args, ArgInfo);
+  emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+                                 KernArgPreload::HIDDEN_REMAINDER_Y,
+                                 "hidden_remainder_y", Offset, Args, ArgInfo);
+  emitHiddenKernelArgWithPreload(DL, Int16Ty, Align(2),
+                                 KernArgPreload::HIDDEN_REMAINDER_Z,
+                                 "hidden_remainder_z", Offset, Args, ArgInfo);
+
+  // Reserved for hidden_tool_correlation_id.
+  Offset += 8;
+
+  Offset += 8; // Reserved.
+
+  emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
+                    Args);
+  emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
+                    Args);
+  emitKernelArgImpl(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
+                    Args);
+
+  emitKernelArgImpl(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
+
+  Offset += 6; // Reserved.
+  auto *Int8PtrTy =
+      PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+  if (M->getNamedMetadata("llvm.printf.fmts")) {
+    emitKernelArgImpl(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+                      Args);
+  } else {
----------------
Pierre-vh wrote:

small nit: don't use `{}` for the elses in this function, they're all one line ?

https://github.com/llvm/llvm-project/pull/134666


More information about the llvm-commits mailing list