[Openmp-commits] [PATCH] D93356: [libomptarget][amdgpu] Call into deviceRTL instead of ockl
Jon Chesterfield via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jan 4 07:55:12 PST 2021
JonChesterfield updated this revision to Diff 314379.
JonChesterfield added a comment.
- update test, fix whitespace
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D93356/new/
https://reviews.llvm.org/D93356
Files:
clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
clang/test/OpenMP/amdgcn_target_codegen.cpp
openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -144,6 +144,10 @@
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
}
+EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
+ return GetNumberOfThreadsInBlock();
+}
+
// Stub implementations
-DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr }
+DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; }
DEVICE void __kmpc_impl_free(void *) {}
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
===================================================================
--- openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h
@@ -15,4 +15,6 @@
typedef uint64_t __kmpc_impl_lanemask_t;
typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
+EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads();
+
#endif
Index: clang/test/OpenMP/amdgcn_target_codegen.cpp
===================================================================
--- clang/test/OpenMP/amdgcn_target_codegen.cpp
+++ clang/test/OpenMP/amdgcn_target_codegen.cpp
@@ -13,9 +13,8 @@
int arr[N];
-// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0)
-// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32
-// CHECK-NEXT: sub nuw i32 [[VAR]], 64
+// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads()
+// CHECK: sub nuw i32 [[NUM_THREADS]], 64
// CHECK: call i32 @llvm.amdgcn.workitem.id.x()
#pragma omp target
for (int i = 0; i < N; i++) {
@@ -30,9 +29,8 @@
int arr[N];
-// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0)
-// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32
-// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0)
+// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads()
+// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[NUM_THREADS]], i16 0)
#pragma omp target simd
for (int i = 0; i < N; i++) {
arr[i] = 1;
Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -49,13 +49,12 @@
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Module *M = &CGF.CGM.getModule();
- const char *LocSize = "__ockl_get_local_size";
+ const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
llvm::Function *F = M->getFunction(LocSize);
if (!F) {
F = llvm::Function::Create(
- llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false),
+ llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
}
- return Bld.CreateTrunc(
- Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty);
+ return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D93356.314379.patch
Type: text/x-patch
Size: 3202 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20210104/f9c209d3/attachment.bin>
More information about the Openmp-commits
mailing list