[llvm] r186916 - R600: Use the same compute kernel calling convention for all GPUs
Tom Stellard
thomas.stellard at amd.com
Mon Jul 22 18:48:05 PDT 2013
Author: tstellar
Date: Mon Jul 22 20:48:05 2013
New Revision: 186916
URL: http://llvm.org/viewvc/llvm-project?rev=186916&view=rev
Log:
R600: Use the same compute kernel calling convention for all GPUs
A side-effect of this is that now the compiler expects kernel arguments
to be 4-byte aligned.
Reviewed-by: Vincent Lejeune <vljn at ovi.com>
Modified:
llvm/trunk/lib/Target/R600/AMDGPUCallingConv.td
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
llvm/trunk/test/CodeGen/R600/128bit-kernel-args.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUCallingConv.td?rev=186916&r1=186915&r2=186916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUCallingConv.td (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUCallingConv.td Mon Jul 22 20:48:05 2013
@@ -36,9 +36,9 @@ def CC_SI : CallingConv<[
]>;
-// Calling convention for SI compute kernels
-def CC_SI_Kernel : CallingConv<[
- CCIfType<[v4i32, v4f32], CCAssignToStack <16, 4>>,
+// Calling convention for compute kernels
+def CC_AMDGPU_Kernel : CallingConv<[
+ CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>,
CCIfType<[i64, f64], CCAssignToStack < 8, 4>>,
CCIfType<[i32, f32], CCAssignToStack < 4, 4>>,
CCIfType<[i16], CCAssignToStack < 2, 4>>,
@@ -46,8 +46,14 @@ def CC_SI_Kernel : CallingConv<[
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
- "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_SI_Kernel>>,
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() == "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
+ "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() < "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
+ "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
]>;
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=186916&r1=186915&r2=186916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Mon Jul 22 20:48:05 2013
@@ -18,6 +18,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
+#include "R600MachineFunctionInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
Modified: llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ISelLowering.cpp?rev=186916&r1=186915&r2=186916&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ISelLowering.cpp Mon Jul 22 20:48:05 2013
@@ -16,6 +16,7 @@
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -1212,11 +1213,17 @@ SDValue R600TargetLowering::LowerFormalA
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- unsigned ParamOffsetBytes = 36;
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ AnalyzeFormalArguments(CCInfo, Ins);
+
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
- EVT VT = Ins[i].VT;
+ CCValAssign &VA = ArgLocs[i];
+ EVT VT = VA.getLocVT();
Type *ArgType = FuncArg->getType();
unsigned ArgSizeInBits = ArgType->isPointerTy() ?
32 : ArgType->getPrimitiveSizeInBits();
@@ -1239,12 +1246,14 @@ SDValue R600TargetLowering::LowerFormalA
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
+
+ // The first 36 bytes of the input buffer contains information about
+ // thread group and global sizes.
SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(),
- DAG.getConstant(ParamOffsetBytes, MVT::i32),
- MachinePointerInfo(UndefValue::get(PtrTy)),
- ArgVT, false, false, ArgBytes);
+ DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
+ ArgVT, false, false, ArgBytes);
InVals.push_back(Arg);
- ParamOffsetBytes += ArgBytes;
}
return Chain;
}
Modified: llvm/trunk/test/CodeGen/R600/128bit-kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/128bit-kernel-args.ll?rev=186916&r1=186915&r2=186916&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/128bit-kernel-args.ll (original)
+++ llvm/trunk/test/CodeGen/R600/128bit-kernel-args.ll Mon Jul 22 20:48:05 2013
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
; R600-CHECK: @v4i32_kernel_arg
-; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
; SI-CHECK: @v4i32_kernel_arg
; SI-CHECK: BUFFER_STORE_DWORDX4
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
@@ -12,7 +12,7 @@ entry:
}
; R600-CHECK: @v4f32_kernel_arg
-; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
; SI-CHECK: @v4f32_kernel_arg
; SI-CHECK: BUFFER_STORE_DWORDX4
define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) {
More information about the llvm-commits
mailing list