[Openmp-commits] [openmp] d27b39c - [libomptarget][amdgcn] Implement missing symbols in deviceRTL
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Mon Oct 19 16:24:30 PDT 2020
Author: Jon Chesterfield
Date: 2020-10-20T00:24:15+01:00
New Revision: d27b39ce11629f8742a487f9d1d2a343756d0da7
URL: https://github.com/llvm/llvm-project/commit/d27b39ce11629f8742a487f9d1d2a343756d0da7
DIFF: https://github.com/llvm/llvm-project/commit/d27b39ce11629f8742a487f9d1d2a343756d0da7.diff
LOG: [libomptarget][amdgcn] Implement missing symbols in deviceRTL
[libomptarget][amdgcn] Implement missing symbols in deviceRTL
Malloc, wtime are stubs. Malloc needs a hostrpc implementation which is
a work in progress, wtime needs some experimentation to find out the
multiplier to get a time in seconds as documentation is scarce.
Reviewed By: ronlieb
Differential Revision: https://reviews.llvm.org/D89725
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 4c3d421c78cc..8c53d99b9fb6 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -36,9 +36,12 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
DEVICE double __kmpc_impl_get_wtick() { return ((double)1E-9); }
-EXTERN uint64_t __clock64();
DEVICE double __kmpc_impl_get_wtime() {
- return ((double)1.0 / 745000000.0) * __clock64();
+ // The intrinsics for measuring time have undocumented frequency
+ // This will probably need to be found by measurement on a number of
+ // architectures. Until then, return 0, which is very inaccurate as a
+ // timer but resolves the undefined symbol at link time.
+ return 0;
}
// Warp vote function
@@ -115,11 +118,38 @@ DEVICE void __kmpc_impl_named_sync(uint32_t num_threads) {
__atomic_thread_fence(__ATOMIC_RELEASE);
}
-EXTERN uint64_t __ockl_get_local_size(uint32_t);
-EXTERN uint64_t __ockl_get_num_groups(uint32_t);
-DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
-DEVICE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
+namespace {
+DEVICE uint32_t grid_size_x() {
+ size_t grid_size_x_offset = 96; // In bits, from AQL kernel dispatch format
+ return *(uint32_t *)((char *)__builtin_amdgcn_dispatch_ptr() +
+ grid_size_x_offset / 8);
+}
+
+DEVICE uint32_t get_grid_dim(uint32_t n, uint16_t d) {
+ uint32_t q = n / d;
+ return q + (n > q * d);
+}
+DEVICE uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size,
+ uint16_t group_size) {
+ uint32_t r = grid_size - group_id * group_size;
+ return (r < group_size) ? r : group_size;
+}
+} // namespace
+
+DEVICE int GetNumberOfBlocksInKernel() {
+ return get_grid_dim(grid_size_x(), __builtin_amdgcn_workgroup_size_x());
+}
+
+DEVICE int GetNumberOfThreadsInBlock() {
+ return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), grid_size_x(),
+ __builtin_amdgcn_workgroup_size_x());
+}
+
DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
DEVICE unsigned GetLaneId() {
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
}
+
+// Stub implementations
+DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr }
+DEVICE void __kmpc_impl_free(void *) {}
More information about the Openmp-commits
mailing list