[libc-commits] [libc] [libc][math][c23] Enable C23 _Float16 math functions on GPUs (PR #99248)
via libc-commits
libc-commits at lists.llvm.org
Fri Jul 19 12:50:43 PDT 2024
https://github.com/overmighty updated https://github.com/llvm/llvm-project/pull/99248
>From f55e4493b4c5c5060735d4bdb292ce1b84070d24 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Wed, 17 Jul 2024 00:02:45 +0200
Subject: [PATCH 1/4] [libc][math][c23] Enable C23 _Float16 math functions on
GPUs
---
.../cmake/modules/CheckCompilerFeatures.cmake | 33 +++++++--
libc/config/gpu/entrypoints.txt | 71 +++++++++++++++++++
2 files changed, 97 insertions(+), 7 deletions(-)
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index a6d793d495c45..361c1e710b187 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -15,6 +15,12 @@ set(
# Making sure ALL_COMPILER_FEATURES is sorted.
list(SORT ALL_COMPILER_FEATURES)
+# Compiler features that are unavailable on GPU targets with the in-tree Clang.
+set(
+ CPU_ONLY_COMPILER_FEATURES
+ "float128"
+)
+
# Function to check whether the compiler supports the provided set of features.
# Usage:
# compiler_supports(
@@ -65,13 +71,26 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE)
endif()
- try_compile(
- has_feature
- ${CMAKE_CURRENT_BINARY_DIR}/compiler_features
- SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
- COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
- LINK_OPTIONS ${link_options}
- )
+ if(LIBC_TARGET_OS_IS_GPU)
+ # CUDA shouldn't be required to build the libc, only to test it, so we can't
+ # try to build CUDA binaries here. Since GPU builds are always compiled with
+ # the in-tree Clang, we just hardcode which compiler features are available
+ # when targeting GPUs.
+ if(feature IN_LIST CPU_ONLY_COMPILER_FEATURES)
+ set(has_feature FALSE)
+ else()
+ set(has_feature TRUE)
+ endif()
+ else()
+ try_compile(
+ has_feature
+ ${CMAKE_CURRENT_BINARY_DIR}/compiler_features
+ SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
+ COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
+ LINK_OPTIONS ${link_options}
+ )
+ endif()
+
if(has_feature)
list(APPEND AVAILABLE_COMPILER_FEATURES ${feature})
if(${feature} STREQUAL "float16")
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index b8eb743cf587a..e5007f8afc49e 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -346,6 +346,77 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncf
)
+if(LIBC_TYPES_HAS_FLOAT16)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C23 _Float16 entrypoints
+ libc.src.math.canonicalizef16
+ libc.src.math.ceilf16
+ libc.src.math.copysignf16
+ libc.src.math.f16add
+ libc.src.math.f16addf
+ libc.src.math.f16addl
+ libc.src.math.f16div
+ libc.src.math.f16divf
+ libc.src.math.f16divl
+ libc.src.math.f16fma
+ libc.src.math.f16fmaf
+ libc.src.math.f16fmal
+ libc.src.math.f16sqrt
+ libc.src.math.f16sqrtf
+ libc.src.math.f16sqrtl
+ libc.src.math.f16sub
+ libc.src.math.f16subf
+ libc.src.math.f16subl
+ libc.src.math.fabsf16
+ libc.src.math.fdimf16
+ libc.src.math.floorf16
+ libc.src.math.fmaxf16
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.math.fmaximum_magf16
+ libc.src.math.fmaximum_numf16
+ libc.src.math.fmaximumf16
+ libc.src.math.fminf16
+ libc.src.math.fminimum_mag_numf16
+ libc.src.math.fminimum_magf16
+ libc.src.math.fminimum_numf16
+ libc.src.math.fminimumf16
+ libc.src.math.fmodf16
+ libc.src.math.frexpf16
+ libc.src.math.fromfpf16
+ libc.src.math.fromfpxf16
+ libc.src.math.getpayloadf16
+ libc.src.math.ilogbf16
+ libc.src.math.ldexpf16
+ libc.src.math.llogbf16
+ libc.src.math.llrintf16
+ libc.src.math.llroundf16
+ libc.src.math.logbf16
+ libc.src.math.lrintf16
+ libc.src.math.lroundf16
+ libc.src.math.modff16
+ libc.src.math.nanf16
+ libc.src.math.nearbyintf16
+ libc.src.math.nextafterf16
+ libc.src.math.nextdownf16
+ libc.src.math.nexttowardf16
+ libc.src.math.nextupf16
+ libc.src.math.remainderf16
+ libc.src.math.remquof16
+ libc.src.math.rintf16
+ libc.src.math.roundevenf16
+ libc.src.math.roundf16
+ libc.src.math.scalblnf16
+ libc.src.math.scalbnf16
+ libc.src.math.setpayloadf16
+ libc.src.math.setpayloadsigf16
+ libc.src.math.totalorderf16
+ libc.src.math.totalordermagf16
+ libc.src.math.truncf16
+ libc.src.math.ufromfpf16
+ libc.src.math.ufromfpxf16
+ )
+endif()
+
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
>From 7cc8e19068979ab8bbb862ef84e9c0fae7d8440c Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Wed, 17 Jul 2024 11:52:01 +0200
Subject: [PATCH 2/4] fixup! [libc][math][c23] Enable C23 _Float16 math
functions on GPUs
Enable ROUND_OPT flag and builtin usage for _Float16 functions on GPUs.
---
libc/cmake/modules/LLVMLibCFlagRules.cmake | 6 ++++--
libc/src/__support/macros/properties/cpu_features.h | 4 ++++
libc/src/math/generic/CMakeLists.txt | 12 ++++++------
libc/src/math/generic/ceilf16.cpp | 4 ++--
libc/src/math/generic/floorf16.cpp | 4 ++--
libc/src/math/generic/rintf16.cpp | 4 ++--
libc/src/math/generic/roundevenf16.cpp | 4 ++--
libc/src/math/generic/roundf16.cpp | 5 +++--
libc/src/math/generic/truncf16.cpp | 4 ++--
9 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index eca7ba8d183e6..92245ffab4746 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -276,8 +276,10 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
set(SKIP_FLAG_EXPANSION_EXPLICIT_SIMD_OPT TRUE)
endif()
-# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
+# Skip ROUND_OPT flag for targets that don't support rounding instructions. On
+# x86, these are SSE4.1 instructions, but we already had code to check for
+# SSE4.2 support.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
- LIBC_TARGET_ARCHITECTURE_IS_AARCH64))
+ LIBC_TARGET_ARCHITECTURE_IS_AARCH64 OR LIBC_TARGET_OS_IS_GPU))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index 80d48be702070..ba6e5b314e9de 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -49,4 +49,8 @@
#define LIBC_TARGET_CPU_HAS_NEAREST_INT
#endif
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || defined(LIBC_TARGET_ARCH_IS_GPU)
+#define LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS
+#endif
+
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index d775026fabb3e..51743784ff156 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -111,7 +111,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -503,7 +503,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -572,7 +572,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -641,7 +641,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -710,7 +710,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -903,7 +903,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
diff --git a/libc/src/math/generic/ceilf16.cpp b/libc/src/math/generic/ceilf16.cpp
index 708bc4cfd4860..8af31c6623a02 100644
--- a/libc/src/math/generic/ceilf16.cpp
+++ b/libc/src/math/generic/ceilf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_ceilf(x));
#else
return fputil::ceil(x);
diff --git a/libc/src/math/generic/floorf16.cpp b/libc/src/math/generic/floorf16.cpp
index 84e4b0730ac68..3092048f5ab06 100644
--- a/libc/src/math/generic/floorf16.cpp
+++ b/libc/src/math/generic/floorf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_floorf(x));
#else
return fputil::floor(x);
diff --git a/libc/src/math/generic/rintf16.cpp b/libc/src/math/generic/rintf16.cpp
index 0e8c091efcf9b..3a53dd28e3d10 100644
--- a/libc/src/math/generic/rintf16.cpp
+++ b/libc/src/math/generic/rintf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, rintf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_rintf(x));
#else
return fputil::round_using_current_rounding_mode(x);
diff --git a/libc/src/math/generic/roundevenf16.cpp b/libc/src/math/generic/roundevenf16.cpp
index b45670bd24ff1..c3dbd779b9739 100644
--- a/libc/src/math/generic/roundevenf16.cpp
+++ b/libc/src/math/generic/roundevenf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, roundevenf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_ROUNDEVEN) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_roundevenf(x));
#else
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);
diff --git a/libc/src/math/generic/roundf16.cpp b/libc/src/math/generic/roundf16.cpp
index cb668c0e76388..a5e2b44fbd54b 100644
--- a/libc/src/math/generic/roundf16.cpp
+++ b/libc/src/math/generic/roundf16.cpp
@@ -10,12 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) {
-#if defined(__LIBC_USE_BUILTIN_ROUND) && defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(__LIBC_USE_BUILTIN_ROUND) && \
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_roundf(x));
#else
return fputil::round(x);
diff --git a/libc/src/math/generic/truncf16.cpp b/libc/src/math/generic/truncf16.cpp
index b931053e53438..31b1214a9a0e4 100644
--- a/libc/src/math/generic/truncf16.cpp
+++ b/libc/src/math/generic/truncf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, truncf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_truncf(x));
#else
return fputil::trunc(x);
>From eb1764049da104aef73dd9aa1c7c2fba649f9c2d Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 19 Jul 2024 21:13:04 +0200
Subject: [PATCH 3/4] fixup! [libc][math][c23] Enable C23 _Float16 math
functions on GPUs
Disable narrowing functions that take `long double` arguments.
---
libc/config/gpu/entrypoints.txt | 5 -----
1 file changed, 5 deletions(-)
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index e5007f8afc49e..ac98e2c819932 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -354,19 +354,14 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.copysignf16
libc.src.math.f16add
libc.src.math.f16addf
- libc.src.math.f16addl
libc.src.math.f16div
libc.src.math.f16divf
- libc.src.math.f16divl
libc.src.math.f16fma
libc.src.math.f16fmaf
- libc.src.math.f16fmal
libc.src.math.f16sqrt
libc.src.math.f16sqrtf
- libc.src.math.f16sqrtl
libc.src.math.f16sub
libc.src.math.f16subf
- libc.src.math.f16subl
libc.src.math.fabsf16
libc.src.math.fdimf16
libc.src.math.floorf16
>From 3c7f76751fbbeb16b31bed92a0a04ea824008ed6 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 19 Jul 2024 21:15:30 +0200
Subject: [PATCH 4/4] fixup! [libc][math][c23] Enable C23 _Float16 math
functions on GPUs
Enable f16mul and f16mulf.
---
libc/config/gpu/entrypoints.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index ac98e2c819932..b6fdd9dbc65b2 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -358,6 +358,8 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.f16divf
libc.src.math.f16fma
libc.src.math.f16fmaf
+ libc.src.math.f16mul
+ libc.src.math.f16mulf
libc.src.math.f16sqrt
libc.src.math.f16sqrtf
libc.src.math.f16sub
More information about the libc-commits
mailing list