r372773 - [HIP] Support new kernel launching API
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 24 12:16:41 PDT 2019
Author: yaxunl
Date: Tue Sep 24 12:16:40 2019
New Revision: 372773
URL: http://llvm.org/viewvc/llvm-project?rev=372773&view=rev
Log:
[HIP] Support new kernel launching API
Differential Revision: https://reviews.llvm.org/D67947
Modified:
cfe/trunk/include/clang/Basic/LangOptions.def
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/lib/Sema/SemaCUDA.cpp
cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h
cfe/trunk/test/CodeGenCUDA/kernel-call.cu
Modified: cfe/trunk/include/clang/Basic/LangOptions.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/LangOptions.def (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.def Tue Sep 24 12:16:40 2019
@@ -226,6 +226,8 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0,
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
+LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
+
LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
Modified: cfe/trunk/include/clang/Driver/Options.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Tue Sep 24 12:16:40 2019
@@ -599,6 +599,9 @@ def hip_device_lib_EQ : Joined<["--"], "
HelpText<"HIP device library">;
def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
+def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">,
+ Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">;
+def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">;
def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-nvptx libraries">;
def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Tue Sep 24 12:16:40 2019
@@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(Cod
EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
- CudaFeature::CUDA_USES_NEW_LAUNCH))
+ CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+ CGF.getLangOpts().HIPUseNewLaunchAPI)
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
@@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBody
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
- // Lookup cudaLaunchKernel function.
+ // Lookup cudaLaunchKernel/hipLaunchKernel function.
// cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
// void **args, size_t sharedMem,
// cudaStream_t stream);
+ // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ // void **args, size_t sharedMem,
+ // hipStream_t stream);
TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+ auto LaunchKernelName = addPrefixToName("LaunchKernel");
IdentifierInfo &cudaLaunchKernelII =
- CGM.getContext().Idents.get("cudaLaunchKernel");
+ CGM.getContext().Idents.get(LaunchKernelName);
FunctionDecl *cudaLaunchKernelFD = nullptr;
for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
@@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBody
if (cudaLaunchKernelFD == nullptr) {
CGM.Error(CGF.CurFuncDecl->getLocation(),
- "Can't find declaration for cudaLaunchKernel()");
+ "Can't find declaration for " + LaunchKernelName);
return;
}
// Create temporary dim3 grid_dim, block_dim.
@@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBody
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
/*isVarArg=*/false),
- "__cudaPopCallConfiguration");
+ addUnderscoredPrefixToName("PopCallConfiguration"));
CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
{GridDim.getPointer(), BlockDim.getPointer(),
@@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBody
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
llvm::FunctionCallee cudaLaunchKernelFn =
- CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel");
+ CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
CGF.EmitBranch(EndBlock);
Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Tue Sep 24 12:16:40 2019
@@ -4774,6 +4774,10 @@ void Clang::ConstructJob(Compilation &C,
// Forward -cl options to -cc1
RenderOpenCLOptions(Args, CmdArgs);
+ if (Args.hasFlag(options::OPT_fhip_new_launch_api,
+ options::OPT_fno_hip_new_launch_api, false))
+ CmdArgs.push_back("-fhip-new-launch-api");
+
if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Tue Sep 24 12:16:40 2019
@@ -2517,6 +2517,7 @@ static void ParseLangArgs(LangOptions &O
Opts.CUDADeviceApproxTranscendentals = 1;
Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc);
+ Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);
if (Opts.ObjC) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Tue Sep 24 12:16:40 2019
@@ -820,7 +820,8 @@ void Sema::inheritCUDATargetAttrs(Functi
std::string Sema::getCudaConfigureFuncName() const {
if (getLangOpts().HIP)
- return "hipConfigureCall";
+ return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
+ : "hipConfigureCall";
// New CUDA kernel launch sequence.
if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
Modified: cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h (original)
+++ cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h Tue Sep 24 12:16:40 2019
@@ -14,12 +14,21 @@ struct dim3 {
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
};
-typedef struct cudaStream *cudaStream_t;
-typedef enum cudaError {} cudaError_t;
#ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
- cudaStream_t stream = 0);
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
size_t sharedSize = 0,
cudaStream_t stream = 0);
Modified: cfe/trunk/test/CodeGenCUDA/kernel-call.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-call.cu?rev=372773&r1=372772&r2=372773&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/kernel-call.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/kernel-call.cu Tue Sep 24 12:16:40 2019
@@ -3,14 +3,17 @@
// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \
// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK
// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \
-// RUN: | FileCheck %s --check-prefixes=HIP,CHECK
-
+// RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK
+// RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=HIP-NEW,CHECK
#include "Inputs/cuda.h"
// CHECK-LABEL: define{{.*}}g1
-// HIP: call{{.*}}hipSetupArgument
-// HIP: call{{.*}}hipLaunchByPtr
+// HIP-OLD: call{{.*}}hipSetupArgument
+// HIP-OLD: call{{.*}}hipLaunchByPtr
+// HIP-NEW: call{{.*}}__hipPopCallConfiguration
+// HIP-NEW: call{{.*}}hipLaunchKernel
// CUDA-OLD: call{{.*}}cudaSetupArgument
// CUDA-OLD: call{{.*}}cudaLaunch
// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration
@@ -19,7 +22,8 @@ __global__ void g1(int x) {}
// CHECK-LABEL: define{{.*}}main
int main(void) {
- // HIP: call{{.*}}hipConfigureCall
+ // HIP-OLD: call{{.*}}hipConfigureCall
+ // HIP-NEW: call{{.*}}__hipPushCallConfiguration
// CUDA-OLD: call{{.*}}cudaConfigureCall
// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration
// CHECK: icmp
More information about the cfe-commits
mailing list