[PATCH] D123346: AMDGPU: Align the implicit kernel argument segment to 8 bytes for v5
Changpeng Fang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 8 09:24:42 PDT 2022
cfang updated this revision to Diff 421549.
cfang added a comment.
Use alignTo to force the alignment for the implicit kernarg segment:
Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D123346/new/
https://reviews.llvm.org/D123346
Files:
llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
Index: llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
+++ llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
@@ -21,7 +21,7 @@
; CHECK-LABEL: amdhsa.kernels:
; CHECK: - .args:
; CHECK-NEXT: - .name: four
-; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .offset: 0
; CHECK-NEXT: .size: 4
; CHECK-NEXT: .value_kind: by_value
; CHECK-NEXT: - .offset: 8
@@ -56,4 +56,3 @@
; CHECK-LABEL: .name: test_aligned_to_eight
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
@@ -24,8 +24,8 @@
}
; CHECK: - .args:
-; CHECK: .value_kind: hidden_multigrid_sync_arg
-; PRE-GFX9: .offset: 200
+; PRE-GFX9: .value_kind: hidden_multigrid_sync_arg
+; PRE-GFX9-NEXT: .offset: 200
; PRE-GFX9-NEXT: .size: 4
; PRE-GFX9-NEXT: .value_kind: hidden_private_base
; PRE-GFX9-NEXT: .offset: 204
@@ -44,8 +44,8 @@
}
; CHECK: - .args:
-; CHECK: .value_kind: hidden_multigrid_sync_arg
-; PRE-GFX9: .offset: 200
+; PRE-GFX9: .value_kind: hidden_multigrid_sync_arg
+; PRE-GFX9-NEXT: .offset: 200
; PRE-GFX9-NEXT: .size: 4
; PRE-GFX9-NEXT: .value_kind: hidden_private_base
; PRE-GFX9-NEXT: .offset: 204
@@ -64,8 +64,8 @@
}
; CHECK: - .args:
-; CHECK: .value_kind: hidden_multigrid_sync_arg
-; PRE-GFX9: .offset: 192
+; PRE-GFX9: .value_kind: hidden_multigrid_sync_arg
+; PRE-GFX9-NEXT: .offset: 192
; PRE-GFX9-NEXT: .size: 4
; PRE-GFX9-NEXT: .value_kind: hidden_private_base
; PRE-GFX9-NEXT: .offset: 196
Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -801,6 +801,8 @@
auto &DL = M->getDataLayout();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
+ Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
+
if (HiddenArgNumBytes >= 8)
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
Args);
@@ -973,6 +975,11 @@
msgpack::ArrayDocNode Args) {
auto &Func = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ // No implicit kernel argument is used.
+ if (ST.getImplicitArgNumBytes(Func) == 0)
+ return;
+
const Module *M = Func.getParent();
auto &DL = M->getDataLayout();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
@@ -981,6 +988,7 @@
auto Int32Ty = Type::getInt32Ty(Func.getContext());
auto Int16Ty = Type::getInt16Ty(Func.getContext());
+ Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D123346.421549.patch
Type: text/x-patch
Size: 3646 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220408/7cc46bc4/attachment.bin>
More information about the llvm-commits
mailing list