[libc-commits] [libc] 8060d96 - [libc] Begin implementing a 'libmgpu.a' for math on the GPU

Wed Jun 14 10:59:27 PDT 2023

Author: Joseph Huber
Date: 2023-06-14T12:59:15-05:00
New Revision: 8060d96aed7c2ee8be188fb7619a0cbb863d4f8c

URL: https://github.com/llvm/llvm-project/commit/8060d96aed7c2ee8be188fb7619a0cbb863d4f8c
DIFF: https://github.com/llvm/llvm-project/commit/8060d96aed7c2ee8be188fb7619a0cbb863d4f8c.diff

LOG: [libc] Begin implementing a 'libmgpu.a' for math on the GPU

This patch adds an outline to begin adding a `libmgpu.a` file for
provindg math on the GPU. Currently, this is most likely going to be
wrapping around existing vendor libraries and placing them in a more
usable format. Long term, we would like to provide our own
implementations of math functions that can be used instead.

This patch works by simply forwarding the calls to the standard C math
library calls like `sin` to the appropriate vendor call like `__nv_sin`.
Currently, we will use the vendor libraries directly and link them in
via `-mlink-builtin-bitcode`. This is necessary because of bizarre
interactions with the generic bitcode, `-mlink-builtin-bitcode`
internalizes and only links in the used symbols, furthermore is
propagates the target's default attributes and its the only "truly"
correct way to pull in these vendor bitcode libraries without error.

If the vendor libraries are not availible at build time, we will still
create the `libmgpu.a`, but we will expect that the vendor library
definitions will be provided by the user's compilation as is made
possible by https://reviews.llvm.org/D152442.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D152486

Added: 
    libc/src/math/gpu/CMakeLists.txt
    libc/src/math/gpu/round.cpp
    libc/src/math/gpu/roundf.cpp
    libc/src/math/gpu/roundl.cpp
    libc/src/math/gpu/vendor/CMakeLists.txt
    libc/src/math/gpu/vendor/amdgpu/amdgpu.h
    libc/src/math/gpu/vendor/amdgpu/declarations.h
    libc/src/math/gpu/vendor/amdgpu/platform.h
    libc/src/math/gpu/vendor/common.h
    libc/src/math/gpu/vendor/nvptx/declarations.h
    libc/src/math/gpu/vendor/nvptx/nvptx.h
    libc/src/math/gpu/vendor/sin.cpp

Modified: 
    libc/config/gpu/entrypoints.txt
    libc/config/gpu/headers.txt
    libc/src/math/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 650fa3d2affb3..deabf544456e7 100644

--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -81,7 +81,15 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.stderr
 )
 
+set(TARGET_LIBM_ENTRYPOINTS
+    # math.h entrypoints
+    libc.src.math.sin
+    libc.src.math.round
+    libc.src.math.roundf
+    libc.src.math.roundl
+)
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
+  ${TARGET_LIBM_ENTRYPOINTS}
 )
-

diff  --git a/libc/config/gpu/headers.txt b/libc/config/gpu/headers.txt
index 73406f2d0539c..b11ae18b5131c 100644
--- a/libc/config/gpu/headers.txt
+++ b/libc/config/gpu/headers.txt
@@ -1,6 +1,7 @@
 set(TARGET_PUBLIC_HEADERS
     libc.include.ctype
     libc.include.string
+    libc.include.math
     libc.include.fenv
     libc.include.errno
     libc.include.stdlib

diff  --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index c47584039d5dd..a211b1602243b 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -18,6 +18,19 @@ function(add_math_entrypoint_object name)
     return()
   endif()
 
+  # The GPU optionally depends on vendor libraries. If we emitted one of these
+  # entrypoints it means the user requested it and we should use it instead.
+  get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.vendor.${name}" fq_vendor_specific_target_name)
+  if(TARGET ${fq_vendor_specific_target_name})
+    add_entrypoint_object(
+      ${name}
+      ALIAS
+      DEPENDS
+        .${LIBC_TARGET_ARCHITECTURE}.vendor.${name}
+    )
+    return()
+  endif()
+
   get_fq_target_name("generic.${name}" fq_generic_target_name)
   if(TARGET ${fq_generic_target_name})
     add_entrypoint_object(

diff  --git a/libc/src/math/gpu/CMakeLists.txt b/libc/src/math/gpu/CMakeLists.txt
new file mode 100644
index 0000000000000..775a58478c3d8
--- /dev/null
+++ b/libc/src/math/gpu/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Math functions not yet available in the libc project, or those not yet tuned
+# for GPU workloads are provided as wrappers over vendor libraries. If we find
+# them ahead of time we will import them statically. Otherwise, we will keep
+# them as external references and expect them to be resolved by the user when
+# they compile. In the future,we will use implementations from the 'libc'
+# project and not provide these wrappers.
+add_subdirectory(vendor)
+
+# For the GPU we want to be able to optionally depend on the vendor libraries
+# until we have a suitable replacement inside `libc`.
+# TODO: We should have an option to enable or disable these on a per-function
+# basis.
+option(LIBC_GPU_VENDOR_MATH "Use vendor wrappers for GPU math" ON)
+function(add_math_entrypoint_gpu_object name)
+  get_fq_target_name("vendor.${name}" fq_vendor_specific_target_name)
+  if(TARGET ${fq_vendor_specific_target_name} AND ${LIBC_GPU_VENDOR_MATH})
+    return()
+  endif()
+
+  add_entrypoint_object(
+    ${name}
+    ${ARGN}
+  )
+endfunction()
+
+add_math_entrypoint_gpu_object(
+  round
+  SRCS
+    round.cpp
+  HDRS
+    ../round.h
+  COMPILE_OPTIONS
+    -O2
+)

diff  --git a/libc/src/math/gpu/round.cpp b/libc/src/math/gpu/round.cpp
new file mode 100644
index 0000000000000..ab840ba605a28
--- /dev/null
+++ b/libc/src/math/gpu/round.cpp
@@ -0,0 +1,16 @@
+//===-- Implementation of the GPU round function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/round.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(double, round, (double x)) { return __builtin_round(x); }
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/gpu/roundf.cpp b/libc/src/math/gpu/roundf.cpp
new file mode 100644
index 0000000000000..32d54b83746c1
--- /dev/null
+++ b/libc/src/math/gpu/roundf.cpp
@@ -0,0 +1,16 @@
+//===-- Implementation of the GPU roundf function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/roundf.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(float, roundf, (float x)) { return __builtin_roundf(x); }
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/gpu/roundl.cpp b/libc/src/math/gpu/roundl.cpp
new file mode 100644
index 0000000000000..6a7699ae8e383
--- /dev/null
+++ b/libc/src/math/gpu/roundl.cpp
@@ -0,0 +1,23 @@
+//===-- Implementation of the GPU roundl function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/roundl.h"
+#include "src/__support/FPUtil/PlatformDefs.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+#ifndef LONG_DOUBLE_IS_DOUBLE
+#error "GPU targets do not support long doubles"
+#endif
+
+LLVM_LIBC_FUNCTION(long double, roundl, (long double x)) {
+  return __builtin_round(x);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/gpu/vendor/CMakeLists.txt b/libc/src/math/gpu/vendor/CMakeLists.txt
new file mode 100644
index 0000000000000..3783c005aba85
--- /dev/null
+++ b/libc/src/math/gpu/vendor/CMakeLists.txt
@@ -0,0 +1,41 @@
+find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if(AMDDeviceLibs_FOUND)
+  message(STATUS "Found the ROCm device library. Implementations falling back "
+                 "to the vendor libraries will be resolved statically.")
+  get_target_property(ocml_path ocml IMPORTED_LOCATION)
+  list(APPEND bitcode_link_flags
+       "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}")
+else()
+  message(STATUS "Could not find the ROCm device library. Unimplemented "
+                 "functions will be an external reference to the vendor libraries.")
+endif()
+
+find_package(CUDAToolkit QUIET)
+if(CUDAToolkit_FOUND)
+  set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc)
+  if (EXISTS ${libdevice_path})
+    message(STATUS "Found the CUDA device library. Implementations falling back "
+                   "to the vendor libraries will be resolved statically.")
+    list(APPEND bitcode_link_flags
+         "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${libdevice_path}")
+  endif()
+else()
+  message(STATUS "Could not find the ROCm device library. Unimplemented "
+                 "functions will be an external reference to the vendor libraries.")
+endif()
+
+# FIXME: We need a way to pass the library to only the NVTPX / AMDGPU build.
+# This shouldn't cause issues because we only link in needed symbols, but it
+# will link in identity metadata from both libraries. This silences the warning.
+list(APPEND bitcode_link_flags "-Wno-linker-warnings")
+
+add_entrypoint_object(
+  sin
+  SRCS
+    sin.cpp
+  HDRS
+    ../../sin.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)

diff  --git a/libc/src/math/gpu/vendor/amdgpu/amdgpu.h b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
new file mode 100644
index 0000000000000..88b727f104716
--- /dev/null
+++ b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
@@ -0,0 +1,25 @@
+//===-- AMDGPU specific definitions for math support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
+#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
+
+#include "declarations.h"
+#include "platform.h"
+
+#include "src/__support/macros/attributes.h"
+
+namespace __llvm_libc {
+namespace internal {
+
+LIBC_INLINE double sin(double x) { return __ocml_sin_f64(x); }
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H

diff  --git a/libc/src/math/gpu/vendor/amdgpu/declarations.h b/libc/src/math/gpu/vendor/amdgpu/declarations.h
new file mode 100644
index 0000000000000..41bc0c7a35d90
--- /dev/null
+++ b/libc/src/math/gpu/vendor/amdgpu/declarations.h
@@ -0,0 +1,20 @@
+//===-- AMDGPU specific declarations for math support ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
+#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
+
+namespace __llvm_libc {
+
+extern "C" {
+double __ocml_sin_f64(double);
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H

diff  --git a/libc/src/math/gpu/vendor/amdgpu/platform.h b/libc/src/math/gpu/vendor/amdgpu/platform.h
new file mode 100644
index 0000000000000..543f408d8a961
--- /dev/null
+++ b/libc/src/math/gpu/vendor/amdgpu/platform.h
@@ -0,0 +1,110 @@
+//===-- AMDGPU specific platform definitions for math support -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
+#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// The ROCm device library uses control globals to alter codegen for the
+// 
diff erent targets. To avoid needing to link them in manually we simply
+// define them here.
+extern "C" {
+
+// Disable unsafe math optimizations in the implementation.
+extern const uint8_t __oclc_unsafe_math_opt = 0;
+
+// Disable denormalization at zero optimizations in the implementation.
+extern const uint8_t __oclc_daz_opt = 0;
+
+// Disable rounding optimizations for 32-bit square roots.
+extern const uint8_t __oclc_correctly_rounded_sqrt32 = 0;
+
+// Disable finite math optimizations.
+extern const uint8_t __oclc_finite_only_opt = 0;
+
+#if defined(__gfx700__)
+extern const uint32_t __oclc_ISA_version = 7000;
+#elif defined(__gfx701__)
+extern const uint32_t __oclc_ISA_version = 7001;
+#elif defined(__gfx702__)
+extern const uint32_t __oclc_ISA_version = 7002;
+#elif defined(__gfx703__)
+extern const uint32_t __oclc_ISA_version = 7003;
+#elif defined(__gfx704__)
+extern const uint32_t __oclc_ISA_version = 7004;
+#elif defined(__gfx705__)
+extern const uint32_t __oclc_ISA_version = 7005;
+#elif defined(__gfx801__)
+extern const uint32_t __oclc_ISA_version = 8001;
+#elif defined(__gfx802__)
+extern const uint32_t __oclc_ISA_version = 8002;
+#elif defined(__gfx803__)
+extern const uint32_t __oclc_ISA_version = 8003;
+#elif defined(__gfx805__)
+extern const uint32_t __oclc_ISA_version = 8005;
+#elif defined(__gfx810__)
+extern const uint32_t __oclc_ISA_version = 8100;
+#elif defined(__gfx900__)
+extern const uint32_t __oclc_ISA_version = 9000;
+#elif defined(__gfx902__)
+extern const uint32_t __oclc_ISA_version = 9002;
+#elif defined(__gfx904__)
+extern const uint32_t __oclc_ISA_version = 9004;
+#elif defined(__gfx906__)
+extern const uint32_t __oclc_ISA_version = 9006;
+#elif defined(__gfx908__)
+extern const uint32_t __oclc_ISA_version = 9008;
+#elif defined(__gfx909__)
+extern const uint32_t __oclc_ISA_version = 9009;
+#elif defined(__gfx90a__)
+extern const uint32_t __oclc_ISA_version = 9010;
+#elif defined(__gfx90c__)
+extern const uint32_t __oclc_ISA_version = 9012;
+#elif defined(__gfx940__)
+extern const uint32_t __oclc_ISA_version = 9400;
+#elif defined(__gfx1010__)
+extern const uint32_t __oclc_ISA_version = 10100;
+#elif defined(__gfx1011__)
+extern const uint32_t __oclc_ISA_version = 10101;
+#elif defined(__gfx1012__)
+extern const uint32_t __oclc_ISA_version = 10102;
+#elif defined(__gfx1013__)
+extern const uint32_t __oclc_ISA_version = 10103;
+#elif defined(__gfx1030__)
+extern const uint32_t __oclc_ISA_version = 10300;
+#elif defined(__gfx1031__)
+extern const uint32_t __oclc_ISA_version = 10301;
+#elif defined(__gfx1032__)
+extern const uint32_t __oclc_ISA_version = 10302;
+#elif defined(__gfx1033__)
+extern const uint32_t __oclc_ISA_version = 10303;
+#elif defined(__gfx1034__)
+extern const uint32_t __oclc_ISA_version = 10304;
+#elif defined(__gfx1035__)
+extern const uint32_t __oclc_ISA_version = 10305;
+#elif defined(__gfx1036__)
+extern const uint32_t __oclc_ISA_version = 10306;
+#elif defined(__gfx1100__)
+extern const uint32_t __oclc_ISA_version = 11000;
+#elif defined(__gfx1101__)
+extern const uint32_t __oclc_ISA_version = 11001;
+#elif defined(__gfx1102__)
+extern const uint32_t __oclc_ISA_version = 11002;
+#elif defined(__gfx1103__)
+extern const uint32_t __oclc_ISA_version = 11003;
+#else
+#error "Unknown AMDGPU architecture"
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H

diff  --git a/libc/src/math/gpu/vendor/common.h b/libc/src/math/gpu/vendor/common.h
new file mode 100644
index 0000000000000..041a9a01c30e9
--- /dev/null
+++ b/libc/src/math/gpu/vendor/common.h
@@ -0,0 +1,22 @@
+//===-- Common interface for compiling the GPU math -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_COMMON_H
+#define LLVM_LIBC_SRC_MATH_GPU_COMMON_H
+
+#include "src/__support/macros/properties/architectures.h"
+
+#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+#include "amdgpu/amdgpu.h"
+#elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
+#include "nvptx/nvptx.h"
+#else
+#error "Unsupported platform"
+#endif
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_COMMON_H

diff  --git a/libc/src/math/gpu/vendor/nvptx/declarations.h b/libc/src/math/gpu/vendor/nvptx/declarations.h
new file mode 100644
index 0000000000000..2faba835650a5
--- /dev/null
+++ b/libc/src/math/gpu/vendor/nvptx/declarations.h
@@ -0,0 +1,20 @@
+//===-- NVPTX specific declarations for math support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
+#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
+
+namespace __llvm_libc {
+
+extern "C" {
+double __nv_sin(double);
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H

diff  --git a/libc/src/math/gpu/vendor/nvptx/nvptx.h b/libc/src/math/gpu/vendor/nvptx/nvptx.h
new file mode 100644
index 0000000000000..718ac38a96e52
--- /dev/null
+++ b/libc/src/math/gpu/vendor/nvptx/nvptx.h
@@ -0,0 +1,24 @@
+//===-- NVPTX specific definitions for math support -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
+#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
+
+#include "declarations.h"
+
+#include "src/__support/macros/attributes.h"
+
+namespace __llvm_libc {
+namespace internal {
+
+LIBC_INLINE double sin(double x) { return __nv_sin(x); }
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_H

diff  --git a/libc/src/math/gpu/vendor/sin.cpp b/libc/src/math/gpu/vendor/sin.cpp
new file mode 100644
index 0000000000000..4eda066b60f53
--- /dev/null
+++ b/libc/src/math/gpu/vendor/sin.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the sin function for GPU ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/sin.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(double, sin, (double x)) { return internal::sin(x); }
+
+} // namespace __llvm_libc