[llvm] r273080 - AMDGPU: Fix kernel argument alignment impacting stack size
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 17 22:15:54 PDT 2016
Author: arsenm
Date: Sat Jun 18 00:15:53 2016
New Revision: 273080
URL: http://llvm.org/viewvc/llvm-project?rev=273080&view=rev
Log:
AMDGPU: Fix kernel argument alignment impacting stack size
Don't use AllocateStack because kernel arguments have nothing
to do with the stack. The ensureMaxAlignment call was still
changing the stack alignment.
Added:
llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td?rev=273080&r1=273079&r2=273080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td Sat Jun 18 00:15:53 2016
@@ -110,7 +110,7 @@ def CC_R600 : CallingConv<[
// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
- CCCustom<"allocateStack">
+ CCCustom<"allocateKernArg">
]>;
def CC_AMDGPU : CallingConv<[
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=273080&r1=273079&r2=273080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Sat Jun 18 00:15:53 2016
@@ -31,13 +31,15 @@
#include "SIInstrInfo.h"
using namespace llvm;
-static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
- ArgFlags.getOrigAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ MachineFunction &MF = State.getMachineFunction();
+ AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+ uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
+ ArgFlags.getOrigAlign());
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp?rev=273080&r1=273079&r2=273080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp Sat Jun 18 00:15:53 2016
@@ -1,8 +1,5 @@
#include "AMDGPUMachineFunction.h"
-#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
+
using namespace llvm;
// Pin the vtable to this file.
@@ -10,8 +7,9 @@ void AMDGPUMachineFunction::anchor() {}
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
+ KernArgSize(0),
+ MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
ScratchSize(0),
- IsKernel(true) {
-}
+ IsKernel(true) {}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h?rev=273080&r1=273079&r2=273080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h Sat Jun 18 00:15:53 2016
@@ -16,10 +16,25 @@
namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
+ uint64_t KernArgSize;
+ unsigned MaxKernArgAlign;
+
virtual void anchor();
public:
AMDGPUMachineFunction(const MachineFunction &MF);
+
+ uint64_t allocateKernArg(uint64_t Size, unsigned Align) {
+ assert(isPowerOf2_32(Align));
+ KernArgSize = alignTo(KernArgSize, Align);
+
+ uint64_t Result = KernArgSize;
+ KernArgSize += Size;
+
+ MaxKernArgAlign = std::max(Align, MaxKernArgAlign);
+ return Result;
+ }
+
/// A map to keep track of local memory objects and their offsets within
/// the local memory space.
std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
Added: llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll?rev=273080&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll Sat Jun 18 00:15:53 2016
@@ -0,0 +1,44 @@
+; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+; Test that the alignment of kernel arguments does not impact the
+; alignment of the stack
+
+; CHECK-LABEL: {{^}}no_args:
+; CHECK: ScratchSize: 8{{$}}
+define void @no_args() {
+ %alloca = alloca i8
+ store volatile i8 0, i8* %alloca
+ ret void
+}
+
+; CHECK-LABEL: {{^}}force_align32:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align32(<8 x i32>) {
+ %alloca = alloca i8
+ store volatile i8 0, i8* %alloca
+ ret void
+}
+
+; CHECK-LABEL: {{^}}force_align64:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align64(<16 x i32>) {
+ %alloca = alloca i8
+ store volatile i8 0, i8* %alloca
+ ret void
+}
+
+; CHECK-LABEL: {{^}}force_align128:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align128(<32 x i32>) {
+ %alloca = alloca i8
+ store volatile i8 0, i8* %alloca
+ ret void
+}
+
+; CHECK-LABEL: {{^}}force_align256:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align256(<64 x i32>) {
+ %alloca = alloca i8
+ store volatile i8 0, i8* %alloca
+ ret void
+}
More information about the llvm-commits
mailing list