[clang] 30307a7 - [libc] Implement the 'clock()' function on the GPU

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 30 14:16:54 PDT 2023


Author: Joseph Huber
Date: 2023-08-30T16:16:34-05:00
New Revision: 30307a7bb795cf5e5ccdcb923bd8a7401d3d4bd6

URL: https://github.com/llvm/llvm-project/commit/30307a7bb795cf5e5ccdcb923bd8a7401d3d4bd6
DIFF: https://github.com/llvm/llvm-project/commit/30307a7bb795cf5e5ccdcb923bd8a7401d3d4bd6.diff

LOG: [libc] Implement the 'clock()' function on the GPU

This patch implements the `clock()` function on the GPU. This function
is supposed to return a timestamp that can be converted into seconds
using the `CLOCKS_PER_SEC` macro. The GPU has a fixed frequency timer
that can be used for this purpose. However, there are some
considerations.

First is that AMDGPU does not have a statically known fixed frequency. I
know internally that the gfx10xx and gfx11xx series use a 100 MHz clock
which will probably remain for the future. Gfx9xx typically uses a 25
MHz clock except for the Vega 10 GPU. The only way to know for sure is
to look it up from the runtime. For this purpose, I elected to default
it to some known values and assign these to an exteranlly visible symbol
that can be initialized if needed. If we do not have a good guess we
just return zero.

Second is that the `CLOCKS_PER_SEC` macro only gives about a microsecond
of resolution. POSIX demands that it's 1,000,000 so it's best that we
keep with this tradition as almost all targets seem to respect this. The
reason this is important is because on the GPU we will almost assuredly
be copying the host's macro value (see the wrapper header) so we should
go with the POSIX version that's most likely to be set. (We could
probably make a warning if the included header doesn't match the
expected value).

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D159118

Added: 
    clang/lib/Headers/llvm_libc_wrappers/time.h
    libc/include/llvm-libc-macros/gpu/CMakeLists.txt
    libc/include/llvm-libc-macros/gpu/time-macros.h
    libc/src/time/gpu/CMakeLists.txt
    libc/src/time/gpu/clock.cpp
    libc/src/time/gpu/time_utils.cpp
    libc/src/time/gpu/time_utils.h

Modified: 
    libc/config/gpu/api.td
    libc/config/gpu/entrypoints.txt
    libc/config/gpu/headers.txt
    libc/docs/gpu/support.rst
    libc/include/llvm-libc-macros/time-macros.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/llvm_libc_wrappers/time.h b/clang/lib/Headers/llvm_libc_wrappers/time.h
new file mode 100644
index 00000000000000..894f6f6c7aaf77
--- /dev/null
+++ b/clang/lib/Headers/llvm_libc_wrappers/time.h
@@ -0,0 +1,34 @@
+//===-- Wrapper for C standard time.h declarations on the GPU -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
+#define __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
+
+#if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
+#error "This file is for GPU offloading compilation only"
+#endif
+
+#include_next <time.h>
+
+#if __has_include(<llvm-libc-decls/time.h>)
+
+#if defined(__HIP__) || defined(__CUDA__)
+#define __LIBC_ATTRS __attribute__((device))
+#endif
+
+#pragma omp begin declare target
+
+_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
+
+#include <llvm-libc-decls/ctype.h>
+
+#pragma omp end declare target
+
+#endif
+
+#endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__

diff  --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td
index 3b55f7dc905729..8d3879eddf5b5c 100644
--- a/libc/config/gpu/api.td
+++ b/libc/config/gpu/api.td
@@ -34,3 +34,9 @@ def StdIOAPI : PublicAPI<"stdio.h"> {
 def IntTypesAPI : PublicAPI<"inttypes.h"> {
   let Types = ["imaxdiv_t"];
 }
+
+def TimeAPI : PublicAPI<"time.h"> {
+  let Types = [
+    "clock_t",
+  ];
+}

diff  --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 89ebe6df6a8c98..2cc420b8e528b1 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -96,6 +96,9 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.inttypes.strtoimax
     libc.src.inttypes.strtoumax
 
+    # time.h entrypoints
+    libc.src.time.clock
+
     # gpu/rpc.h entrypoints
     libc.src.gpu.rpc_reset
     libc.src.gpu.rpc_host_call

diff  --git a/libc/config/gpu/headers.txt b/libc/config/gpu/headers.txt
index cd96d30cb4c008..3a8f091cda413a 100644
--- a/libc/config/gpu/headers.txt
+++ b/libc/config/gpu/headers.txt
@@ -4,6 +4,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.inttypes
     libc.include.math
     libc.include.fenv
+    libc.include.time
     libc.include.errno
     libc.include.stdlib
     libc.include.stdio

diff  --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst
index bc6daad1410bbd..623e8462deaa6e 100644
--- a/libc/docs/gpu/support.rst
+++ b/libc/docs/gpu/support.rst
@@ -128,3 +128,12 @@ fclose         |check|    |check|
 fopen          |check|    |check|
 fread          |check|    |check|
 =============  =========  ============
+
+stdio.h
+--------
+
+=============  =========  ============
+Function Name  Available  RPC Required
+=============  =========  ============
+clock          |check|
+=============  =========  ============

diff  --git a/libc/include/llvm-libc-macros/gpu/CMakeLists.txt b/libc/include/llvm-libc-macros/gpu/CMakeLists.txt
new file mode 100644
index 00000000000000..ea08c63c003019
--- /dev/null
+++ b/libc/include/llvm-libc-macros/gpu/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_header(
+  time_macros
+  HDR
+    time-macros.h
+)

diff  --git a/libc/include/llvm-libc-macros/gpu/time-macros.h b/libc/include/llvm-libc-macros/gpu/time-macros.h
new file mode 100644
index 00000000000000..baf2ea5f41324f
--- /dev/null
+++ b/libc/include/llvm-libc-macros/gpu/time-macros.h
@@ -0,0 +1,14 @@
+//===-- Definition of macros from time.h ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_MACROS_GPU_TIME_MACROS_H
+#define __LLVM_LIBC_MACROS_GPU_TIME_MACROS_H
+
+#define CLOCKS_PER_SEC 1000000
+
+#endif // __LLVM_LIBC_MACROS_GPU_TIME_MACROS_H

diff  --git a/libc/include/llvm-libc-macros/time-macros.h b/libc/include/llvm-libc-macros/time-macros.h
index ffd46317b92e65..e5b79f8e4dc48d 100644
--- a/libc/include/llvm-libc-macros/time-macros.h
+++ b/libc/include/llvm-libc-macros/time-macros.h
@@ -3,6 +3,8 @@
 
 #ifdef __linux__
 #include "linux/time-macros.h"
+#elif defined(__AMDGPU__) || defined(__NVPTX__)
+#include "gpu/time-macros.h"
 #endif
 
 #endif // __LLVM_LIBC_MACROS_TIME_MACROS_H

diff  --git a/libc/src/time/gpu/CMakeLists.txt b/libc/src/time/gpu/CMakeLists.txt
new file mode 100644
index 00000000000000..c55ce23e19cdb2
--- /dev/null
+++ b/libc/src/time/gpu/CMakeLists.txt
@@ -0,0 +1,19 @@
+add_object_library(
+  time_utils
+  SRCS
+    time_utils.cpp
+  HDRS
+    time_utils.h
+)
+
+add_entrypoint_object(
+  clock
+  SRCS
+    clock.cpp
+  HDRS
+    ../clock.h
+  DEPENDS
+    libc.include.time
+    libc.src.__support.GPU.utils
+    .time_utils
+)

diff  --git a/libc/src/time/gpu/clock.cpp b/libc/src/time/gpu/clock.cpp
new file mode 100644
index 00000000000000..146addd24a7fc8
--- /dev/null
+++ b/libc/src/time/gpu/clock.cpp
@@ -0,0 +1,29 @@
+//===-- GPU implementation of the clock function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "time_utils.h"
+
+#include "src/time/clock.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(clock_t, clock, ()) {
+  if (!GPU_CLOCKS_PER_SEC)
+    return clock_t(0);
+
+  uint64_t ticks = gpu::fixed_frequency_clock();
+
+  // We need to convert between the GPU's fixed frequency and whatever `time.h`
+  // declares it to be. This is done so that dividing the result of this
+  // function by 'CLOCKS_PER_SEC' yields the elapsed time.
+  if (GPU_CLOCKS_PER_SEC > CLOCKS_PER_SEC)
+    return clock_t(ticks / (GPU_CLOCKS_PER_SEC / CLOCKS_PER_SEC));
+  return clock_t(ticks * (CLOCKS_PER_SEC / GPU_CLOCKS_PER_SEC));
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/time/gpu/time_utils.cpp b/libc/src/time/gpu/time_utils.cpp
new file mode 100644
index 00000000000000..ef5e72f208116b
--- /dev/null
+++ b/libc/src/time/gpu/time_utils.cpp
@@ -0,0 +1,22 @@
+//===-- Generic utilities for GPU timing ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "time_utils.h"
+
+namespace __llvm_libc {
+
+#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+// This is expected to be initialized by the runtime if the default value is
+// insufficient.
+// TODO: Once we have another use-case for this we should put it in a common
+// device environment struct.
+extern "C" [[gnu::visibility("protected")]] uint64_t
+    [[clang::address_space(4)]] __llvm_libc_clock_freq = clock_freq;
+#endif
+
+} // namespace __llvm_libc

diff  --git a/libc/src/time/gpu/time_utils.h b/libc/src/time/gpu/time_utils.h
new file mode 100644
index 00000000000000..da9cd3eebe7ebb
--- /dev/null
+++ b/libc/src/time/gpu/time_utils.h
@@ -0,0 +1,54 @@
+//===-- Generic utilities for GPU timing ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_TIME_GPU_TIME_UTILS_H
+#define LLVM_LIBC_SRC_TIME_GPU_TIME_UTILS_H
+
+#include "src/__support/GPU/utils.h"
+
+namespace __llvm_libc {
+
+#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+// AMDGPU does not have a single set frequency. Different architectures and
+// cards can have vary values. Here we default to a few known values, but for
+// complete support the frequency needs to be read from the kernel driver.
+#if defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) ||    \
+    defined(__gfx1013__) || defined(__gfx1030__) || defined(__gfx1031__) ||    \
+    defined(__gfx1032__) || defined(__gfx1033__) || defined(__gfx1034__) ||    \
+    defined(__gfx1035__) || defined(__gfx1036__) || defined(__gfx1100__) ||    \
+    defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) ||    \
+    defined(__gfx1150__) || defined(__gfx1151__)
+// These architectures use a 100 MHz fixed frequency clock.
+constexpr uint64_t clock_freq = 100000000;
+#elif defined(__gfx900__) || defined(__gfx902__) || defined(__gfx904__) ||     \
+    defined(__gfx906__) || defined(__gfx908__) || defined(__gfx909__) ||       \
+    defined(__gfx90a__) || defined(__gfx90c__) || defined(__gfx940__)
+// These architectures use a 25 MHz fixed frequency clock expect for Vega 10
+// which is actually 27 Mhz. We default to 25 MHz in all cases anyway.
+constexpr uint64_t clock_freq = 25000000;
+#else
+// The frequency for these architecture is unknown. We simply default to zero.
+constexpr uint64_t clock_freq = 0;
+#endif
+
+// We provide an externally visible symbol such that the runtime can set this to
+// the correct value. If it is not set we try to default to the known values.
+extern "C" [[gnu::visibility("protected")]] uint64_t
+    [[clang::address_space(4)]] __llvm_libc_clock_freq;
+#define GPU_CLOCKS_PER_SEC static_cast<clock_t>(__llvm_libc_clock_freq)
+
+#elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
+// NPVTX uses a single 1 GHz fixed frequency clock for all target architectures.
+#define GPU_CLOCKS_PER_SEC static_cast<clock_t>(1000000000UL)
+#else
+#error "Unsupported target"
+#endif
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_TIME_GPU_TIME_UTILS_H


        


More information about the cfe-commits mailing list