[llvm] r342133 - AMDGPU: Fix not preserving alignent in call setups

Thu Sep 13 05:14:31 PDT 2018

Author: arsenm
Date: Thu Sep 13 05:14:31 2018
New Revision: 342133

URL: http://llvm.org/viewvc/llvm-project?rev=342133&view=rev
Log:
AMDGPU: Fix not preserving alignent in call setups

If an argument was passed on the stack, this
was using the default alignment.

I'm not sure there's an observable change from this. This
was observable due to bugs in expansion of unaligned
loads and stores, but since that is fixed I don't think
this matters much.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=342133&r1=342132&r2=342133&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Sep 13 05:14:31 2018
@@ -2518,12 +2518,17 @@ SDValue SITargetLowering::LowerCall(Call
       int32_t Offset = LocMemOffset;
 
       SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
+      unsigned Align = 0;
 
       if (IsTailCall) {
         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
         unsigned OpSize = Flags.isByVal() ?
           Flags.getByValSize() : VA.getValVT().getStoreSize();
 
+        // FIXME: We can have better than the minimum byval required alignment.
+        Align = Flags.isByVal() ? Flags.getByValAlign() :
+          MinAlign(Subtarget->getStackAlignment(), Offset);
+
         Offset = Offset + FPDiff;
         int FI = MFI.CreateFixedObject(OpSize, Offset, true);
 
@@ -2541,6 +2546,7 @@ SDValue SITargetLowering::LowerCall(Call
       } else {
         DstAddr = PtrOff;
         DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
+        Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset);
       }
 
       if (Outs[i].Flags.isByVal()) {
@@ -2555,7 +2561,7 @@ SDValue SITargetLowering::LowerCall(Call
 
         MemOpChains.push_back(Cpy);
       } else {
-        SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
+        SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align);
         MemOpChains.push_back(Store);
       }
     }

Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=342133&r1=342132&r2=342133&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Thu Sep 13 05:14:31 2018
@@ -721,6 +721,56 @@ define amdgpu_kernel void @test_call_ext
   ret void
 }
 
+; GCN-LABEL: {{^}}stack_passed_arg_alignment_v32i32_f64:
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:8
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4
+; GCN: s_swappc_b64
+define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+entry:
+  call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
+  ret void
+}
+
+; GCN-LABEL: {{^}}tail_call_byval_align16:
+; GCN: s_mov_b32 s5, s32
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:28 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:24 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:32
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:36
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:20
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16
+; GCN: s_getpc_b64
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:24 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:28 ; 4-byte Folded Reload
+; GCN: s_setpc_b64
+define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
+entry:
+  %alloca = alloca double, align 8, addrspace(5)
+  tail call void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval align 16 %alloca)
+  ret void
+}
+
+; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64:
+; GCN: s_mov_b32 s5, s32
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:8
+; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_getpc_b64
+; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
+; GCN: s_setpc_b64
+define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+entry:
+  tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
+  ret void
+}
+
+declare void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval align 16) #0
+declare void @stack_passed_f64_arg(<32 x i32>, double) #0
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind noinline }