[Openmp-commits] [openmp] d27b39c - [libomptarget][amdgcn] Implement missing symbols in deviceRTL

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Mon Oct 19 16:24:30 PDT 2020


Author: Jon Chesterfield
Date: 2020-10-20T00:24:15+01:00
New Revision: d27b39ce11629f8742a487f9d1d2a343756d0da7

URL: https://github.com/llvm/llvm-project/commit/d27b39ce11629f8742a487f9d1d2a343756d0da7
DIFF: https://github.com/llvm/llvm-project/commit/d27b39ce11629f8742a487f9d1d2a343756d0da7.diff

LOG: [libomptarget][amdgcn] Implement missing symbols in deviceRTL

[libomptarget][amdgcn] Implement missing symbols in deviceRTL

Malloc, wtime are stubs. Malloc needs a hostrpc implementation which is
a work in progress, wtime needs some experimentation to find out the
multiplier to get a time in seconds as documentation is scarce.

Reviewed By: ronlieb

Differential Revision: https://reviews.llvm.org/D89725

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 4c3d421c78cc..8c53d99b9fb6 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -36,9 +36,12 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
 
 DEVICE double __kmpc_impl_get_wtick() { return ((double)1E-9); }
 
-EXTERN uint64_t __clock64();
 DEVICE double __kmpc_impl_get_wtime() {
-  return ((double)1.0 / 745000000.0) * __clock64();
+  // The intrinsics for measuring time have undocumented frequency
+  // This will probably need to be found by measurement on a number of
+  // architectures. Until then, return 0, which is very inaccurate as a
+  // timer but resolves the undefined symbol at link time.
+  return 0;
 }
 
 // Warp vote function
@@ -115,11 +118,38 @@ DEVICE void __kmpc_impl_named_sync(uint32_t num_threads) {
   __atomic_thread_fence(__ATOMIC_RELEASE);
 }
 
-EXTERN uint64_t __ockl_get_local_size(uint32_t);
-EXTERN uint64_t __ockl_get_num_groups(uint32_t);
-DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
-DEVICE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
+namespace {
+DEVICE uint32_t grid_size_x() {
+  size_t grid_size_x_offset = 96; // In bits, from AQL kernel dispatch format
+  return *(uint32_t *)((char *)__builtin_amdgcn_dispatch_ptr() +
+                       grid_size_x_offset / 8);
+}
+
+DEVICE uint32_t get_grid_dim(uint32_t n, uint16_t d) {
+  uint32_t q = n / d;
+  return q + (n > q * d);
+}
+DEVICE uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size,
+                                  uint16_t group_size) {
+  uint32_t r = grid_size - group_id * group_size;
+  return (r < group_size) ? r : group_size;
+}
+} // namespace
+
+DEVICE int GetNumberOfBlocksInKernel() {
+  return get_grid_dim(grid_size_x(), __builtin_amdgcn_workgroup_size_x());
+}
+
+DEVICE int GetNumberOfThreadsInBlock() {
+  return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(), grid_size_x(),
+                           __builtin_amdgcn_workgroup_size_x());
+}
+
 DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
 DEVICE unsigned GetLaneId() {
   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
 }
+
+// Stub implementations
+DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr }
+DEVICE void __kmpc_impl_free(void *) {}


        


More information about the Openmp-commits mailing list