r334457 - [CUDA][HIP] Set kernel calling convention before arrange function
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 11 17:16:34 PDT 2018
Author: yaxunl
Date: Mon Jun 11 17:16:33 2018
New Revision: 334457
URL: http://llvm.org/viewvc/llvm-project?rev=334457&view=rev
Log:
[CUDA][HIP] Set kernel calling convention before arrange function
Currently clang set kernel calling convention for CUDA/HIP after
arranging function, which causes incorrect kernel function type since
it depends on calling convention.
This patch moves setting kernel convention before arranging
function.
Differential Revision: https://reviews.llvm.org/D47733
Added:
cfe/trunk/test/CodeGenCUDA/kernel-args.cu
Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/lib/CodeGen/TargetInfo.h
Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=334457&r1=334456&r2=334457&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 11 17:16:33 2018
@@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
}
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
+ const FunctionDecl *FD) {
+ if (FD->hasAttr<CUDAGlobalAttr>()) {
+ const FunctionType *FT = FTy->getAs<FunctionType>();
+ CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
+ FTy = FT->getCanonicalTypeUnqualified();
+ }
+}
+
/// Arrange the argument and result information for a declaration or
/// definition of the given C++ non-static member function. The
/// member function must be an ordinary function, i.e. not a
@@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaratio
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
- CanQual<FunctionProtoType> prototype = GetFormalType(MD);
+ CanQualType FT = GetFormalType(MD).getAs<Type>();
+ setCUDAKernelCallingConvention(FT, CGM, MD);
+ auto prototype = FT.getAs<FunctionProtoType>();
if (MD->isInstance()) {
// The abstract case is perfectly fine.
@@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
assert(isa<FunctionType>(FTy));
+ setCUDAKernelCallingConvention(FTy, CGM, FD);
// When declaring a function without a prototype, always use a
// non-variadic type.
Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=334457&r1=334456&r2=334457&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jun 11 17:16:33 2018
@@ -3671,8 +3671,6 @@ void CodeGenModule::EmitGlobalFunctionDe
MaybeHandleStaticInExternC(D, Fn);
- if (D->hasAttr<CUDAGlobalAttr>())
- getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
maybeSetTrivialComdat(*D, *Fn);
Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=334457&r1=334456&r2=334457&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Jun 11 17:16:33 2018
@@ -7646,7 +7646,7 @@ public:
llvm::Function *BlockInvokeFunc,
llvm::Value *BlockLiteral) const override;
bool shouldEmitStaticExternCAliases() const override;
- void setCUDAKernelCallingConvention(llvm::Function *F) const override;
+ void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
};
}
@@ -7783,8 +7783,9 @@ bool AMDGPUTargetCodeGenInfo::shouldEmit
}
void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
- llvm::Function *F) const {
- F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ const FunctionType *&FT) const {
+ FT = getABIInfo().getContext().adjustFunctionType(
+ FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
}
//===----------------------------------------------------------------------===//
Modified: cfe/trunk/lib/CodeGen/TargetInfo.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.h?rev=334457&r1=334456&r2=334457&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.h (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.h Mon Jun 11 17:16:33 2018
@@ -302,7 +302,7 @@ public:
/// as 'used', and having internal linkage.
virtual bool shouldEmitStaticExternCAliases() const { return true; }
- virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
+ virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
};
} // namespace CodeGen
Added: cfe/trunk/test/CodeGenCUDA/kernel-args.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-args.cu?rev=334457&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/kernel-args.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/kernel-args.cu Mon Jun 11 17:16:33 2018
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
+#include "Inputs/cuda.h"
+
+struct A {
+ int a[32];
+};
+
+// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
+// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x)
+__global__ void kernel(A x) {
+}
+
+class Kernel {
+public:
+ // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
+ // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x)
+ static __global__ void memberKernel(A x){}
+ template<typename T> static __global__ void templateMemberKernel(T x) {}
+};
+
+
+template <typename T>
+__global__ void templateKernel(T x) {}
+
+void launch(void*);
+
+void test() {
+ Kernel K;
+ // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
+ // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x)
+ launch((void*)templateKernel<A>);
+
+ // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
+ // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x)
+ launch((void*)Kernel::templateMemberKernel<A>);
+}
More information about the cfe-commits
mailing list