[llvm] r342133 - AMDGPU: Fix not preserving alignent in call setups
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 13 05:14:31 PDT 2018
Author: arsenm
Date: Thu Sep 13 05:14:31 2018
New Revision: 342133
URL: http://llvm.org/viewvc/llvm-project?rev=342133&view=rev
Log:
AMDGPU: Fix not preserving alignent in call setups
If an argument was passed on the stack, this
was using the default alignment.
I'm not sure there's an observable change from this. This
was observable due to bugs in expansion of unaligned
loads and stores, but since that is fixed I don't think
this matters much.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=342133&r1=342132&r2=342133&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Sep 13 05:14:31 2018
@@ -2518,12 +2518,17 @@ SDValue SITargetLowering::LowerCall(Call
int32_t Offset = LocMemOffset;
SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
+ unsigned Align = 0;
if (IsTailCall) {
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
unsigned OpSize = Flags.isByVal() ?
Flags.getByValSize() : VA.getValVT().getStoreSize();
+ // FIXME: We can have better than the minimum byval required alignment.
+ Align = Flags.isByVal() ? Flags.getByValAlign() :
+ MinAlign(Subtarget->getStackAlignment(), Offset);
+
Offset = Offset + FPDiff;
int FI = MFI.CreateFixedObject(OpSize, Offset, true);
@@ -2541,6 +2546,7 @@ SDValue SITargetLowering::LowerCall(Call
} else {
DstAddr = PtrOff;
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
+ Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -2555,7 +2561,7 @@ SDValue SITargetLowering::LowerCall(Call
MemOpChains.push_back(Cpy);
} else {
- SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
+ SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align);
MemOpChains.push_back(Store);
}
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=342133&r1=342132&r2=342133&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Thu Sep 13 05:14:31 2018
@@ -721,6 +721,56 @@ define amdgpu_kernel void @test_call_ext
ret void
}
+; GCN-LABEL: {{^}}stack_passed_arg_alignment_v32i32_f64:
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:8
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4
+; GCN: s_swappc_b64
+define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+entry:
+ call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
+ ret void
+}
+
+; GCN-LABEL: {{^}}tail_call_byval_align16:
+; GCN: s_mov_b32 s5, s32
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:28 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:24 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:32
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:36
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:20
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16
+; GCN: s_getpc_b64
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:24 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:28 ; 4-byte Folded Reload
+; GCN: s_setpc_b64
+define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
+entry:
+ %alloca = alloca double, align 8, addrspace(5)
+ tail call void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval align 16 %alloca)
+ ret void
+}
+
+; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64:
+; GCN: s_mov_b32 s5, s32
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:8
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_getpc_b64
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
+; GCN: s_setpc_b64
+define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+entry:
+ tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
+ ret void
+}
+
+declare void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval align 16) #0
+declare void @stack_passed_f64_arg(<32 x i32>, double) #0
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }
More information about the llvm-commits
mailing list