[Openmp-commits] [openmp] 76bfbb7 - [libomptarget][amdgpu] Call into deviceRTL instead of ockl

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Mon Jan 4 08:49:38 PST 2021


Author: Jon Chesterfield
Date: 2021-01-04T16:48:47Z
New Revision: 76bfbb74d38b611f150e8e1a4becc11be95703da

URL: https://github.com/llvm/llvm-project/commit/76bfbb74d38b611f150e8e1a4becc11be95703da
DIFF: https://github.com/llvm/llvm-project/commit/76bfbb74d38b611f150e8e1a4becc11be95703da.diff

LOG: [libomptarget][amdgpu] Call into deviceRTL instead of ockl

[libomptarget][amdgpu] Call into deviceRTL instead of ockl

Amdgpu codegen presently emits a call into ockl. The same functionality
is already present in the deviceRTL. Adds an amdgpu specific entry point
to avoid the dependency. This lets simple openmp code (specifically, that
which doesn't use libm) run without rocm device libraries installed.

Reviewed By: ronlieb

Differential Revision: https://reviews.llvm.org/D93356

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
    clang/test/OpenMP/amdgcn_target_codegen.cpp
    openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
index ccffdf43549f..33d4ab838af1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -49,13 +49,12 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
 llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   llvm::Module *M = &CGF.CGM.getModule();
-  const char *LocSize = "__ockl_get_local_size";
+  const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
   llvm::Function *F = M->getFunction(LocSize);
   if (!F) {
     F = llvm::Function::Create(
-        llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false),
+        llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
         llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
   }
-  return Bld.CreateTrunc(
-      Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty);
+  return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
 }

diff  --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp
index 85ef69942a50..416ed06083b0 100644
--- a/clang/test/OpenMP/amdgcn_target_codegen.cpp
+++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp
@@ -13,9 +13,8 @@ int test_amdgcn_target_tid_threads() {
 
   int arr[N];
 
-// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0)
-// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32
-// CHECK-NEXT: sub nuw i32 [[VAR]], 64
+// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads()
+// CHECK: sub nuw i32 [[NUM_THREADS]], 64
 // CHECK: call i32 @llvm.amdgcn.workitem.id.x()
 #pragma omp target
   for (int i = 0; i < N; i++) {
@@ -30,9 +29,8 @@ int test_amdgcn_target_tid_threads_simd() {
 
   int arr[N];
 
-// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0)
-// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32
-// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0)
+// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[NUM_THREADS]], i16 0)
 #pragma omp target simd
   for (int i = 0; i < N; i++) {
     arr[i] = 1;

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
index f7c75c09362a..80409d611f6f 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
@@ -15,4 +15,6 @@
 typedef uint64_t __kmpc_impl_lanemask_t;
 typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
 
+EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads();
+
 #endif

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 9fbdc67b56ab..3e70beb85d5b 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -144,6 +144,10 @@ DEVICE unsigned GetLaneId() {
   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
 }
 
+EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
+  return GetNumberOfThreadsInBlock();
+}
+
 // Stub implementations
-DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr }
+DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; }
 DEVICE void __kmpc_impl_free(void *) {}


        


More information about the Openmp-commits mailing list