[PATCH] D55453: AMDGPU: Fix offsets for < 4-byte aggregate kernel arguments
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 7 12:55:22 PST 2018
arsenm created this revision.
arsenm added a reviewer: rampitec.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
We were still using the rounded down offset and alignment even though
they aren't handled because you can't trivially bitcast the loaded
value.
https://reviews.llvm.org/D55453
Files:
lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
test/CodeGen/AMDGPU/kernel-args.ll
Index: test/CodeGen/AMDGPU/kernel-args.ll
===================================================================
--- test/CodeGen/AMDGPU/kernel-args.ll
+++ test/CodeGen/AMDGPU/kernel-args.ll
@@ -739,10 +739,10 @@
; multiple.
; FUNC-LABEL: {{^}}packed_struct_argument_alignment:
; HSA-GFX9: kernarg_segment_byte_size = 28
+; HSA-GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13
+; HSA-GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17
; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
-; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0xc
-; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10
define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
%val0 = extractvalue <{i32, i64}> %arg0, 0
%val1 = extractvalue <{i32, i64}> %arg0, 1
@@ -789,10 +789,18 @@
; FIXME: Why not all scalar loads?
; GCN-LABEL: {{^}}array_3xi16:
; HSA-GFX9: global_load_ushort v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:2
-; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x4
+; HSA-GFX9: global_load_ushort v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4
+; HSA-GFX9: global_load_ushort v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:6
define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) {
store volatile i8 %arg0, i8 addrspace(1)* undef
store volatile [3 x i16] %arg1, [3 x i16] addrspace(1)* undef
ret void
}
+
+; GCN-LABEL: {{^}}small_array_round_down_offset:
+; HSA-GFX9: global_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:1
+define amdgpu_kernel void @small_array_round_down_offset(i8, [1 x i8] %arg) {
+ %val = extractvalue [1 x i8] %arg, 0
+ store volatile i8 %val, i8 addrspace(1)* undef
+ ret void
+}
Index: lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -122,14 +122,17 @@
VectorType *VT = dyn_cast<VectorType>(ArgTy);
bool IsV3 = VT && VT->getNumElements() == 3;
+ bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
+
VectorType *V4Ty = nullptr;
int64_t AlignDownOffset = alignDown(EltOffset, 4);
int64_t OffsetDiff = EltOffset - AlignDownOffset;
- unsigned AdjustedAlign = MinAlign(KernArgBaseAlign, AlignDownOffset);
+ unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
+ KernArgBaseAlign);
Value *ArgPtr;
- if (Size < 32 && !ArgTy->isAggregateType()) { // FIXME: Handle aggregate types
+ if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
// bits.
@@ -147,7 +150,7 @@
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
KernArgSegment,
- AlignDownOffset,
+ EltOffset,
Arg.getName() + ".kernarg.offset");
ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
@@ -198,7 +201,7 @@
// TODO: Convert noalias arg to !noalias
- if (Size < 32 && !ArgTy->isAggregateType()) {
+ if (DoShiftOpt) {
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D55453.177284.patch
Type: text/x-patch
Size: 3593 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181207/22049c35/attachment.bin>
More information about the llvm-commits
mailing list