[PATCH] D123346: AMDGPU: Align the implicit kernel argument segment to 8 bytes for v5

Changpeng Fang via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 8 09:24:42 PDT 2022


cfang updated this revision to Diff 421549.
cfang added a comment.

Use alignTo to force the alignment for the implicit kernarg segment:
Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123346/new/

https://reviews.llvm.org/D123346

Files:
  llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
  llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
  llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll


Index: llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
+++ llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
@@ -21,7 +21,7 @@
 ; CHECK-LABEL: amdhsa.kernels:
 ; CHECK:  - .args:
 ; CHECK-NEXT:      - .name:           four
-; CHECK-NEXT:        .offset:         0
+; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
 ; CHECK-NEXT:         .value_kind:     by_value
 ; CHECK-NEXT:       - .offset:         8
@@ -56,4 +56,3 @@
 ; CHECK-LABEL:        .name:           test_aligned_to_eight
 
 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
@@ -24,8 +24,8 @@
 }
 
 ; CHECK: - .args:
-; CHECK:             .value_kind:     hidden_multigrid_sync_arg
-; PRE-GFX9:          .offset:         200
+; PRE-GFX9:          .value_kind:     hidden_multigrid_sync_arg
+; PRE-GFX9-NEXT:          .offset:         200
 ; PRE-GFX9-NEXT:     .size:           4
 ; PRE-GFX9-NEXT:     .value_kind:     hidden_private_base
 ; PRE-GFX9-NEXT:     .offset:         204
@@ -44,8 +44,8 @@
 }
 
 ; CHECK: - .args:
-; CHECK:             .value_kind:     hidden_multigrid_sync_arg
-; PRE-GFX9:          .offset:         200
+; PRE-GFX9:             .value_kind:     hidden_multigrid_sync_arg
+; PRE-GFX9-NEXT:          .offset:         200
 ; PRE-GFX9-NEXT:     .size:           4
 ; PRE-GFX9-NEXT:     .value_kind:     hidden_private_base
 ; PRE-GFX9-NEXT:     .offset:         204
@@ -64,8 +64,8 @@
 }
 
 ; CHECK: - .args:
-; CHECK:             .value_kind:     hidden_multigrid_sync_arg
-; PRE-GFX9:          .offset:         192
+; PRE-GFX9:             .value_kind:     hidden_multigrid_sync_arg
+; PRE-GFX9-NEXT:          .offset:         192
 ; PRE-GFX9-NEXT:     .size:           4
 ; PRE-GFX9-NEXT:     .value_kind:     hidden_private_base
 ; PRE-GFX9-NEXT:     .offset:         196
Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -801,6 +801,8 @@
   auto &DL = M->getDataLayout();
   auto Int64Ty = Type::getInt64Ty(Func.getContext());
 
+  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
+
   if (HiddenArgNumBytes >= 8)
     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
                   Args);
@@ -973,6 +975,11 @@
                                               msgpack::ArrayDocNode Args) {
   auto &Func = MF.getFunction();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+  // No implicit kernel argument is used.
+  if (ST.getImplicitArgNumBytes(Func) == 0)
+    return;
+
   const Module *M = Func.getParent();
   auto &DL = M->getDataLayout();
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -981,6 +988,7 @@
   auto Int32Ty = Type::getInt32Ty(Func.getContext());
   auto Int16Ty = Type::getInt16Ty(Func.getContext());
 
+  Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D123346.421549.patch
Type: text/x-patch
Size: 3646 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220408/7cc46bc4/attachment.bin>


More information about the llvm-commits mailing list