[libc-commits] [libc] 81ce796 - [libc][math][c23] Enable C23 _Float16 math functions on GPUs (#99248)
via libc-commits
libc-commits at lists.llvm.org
Thu Jul 25 12:09:52 PDT 2024
Author: OverMighty
Date: 2024-07-25T21:09:49+02:00
New Revision: 81ce796095c8e2ededa5d9df1c963cf08a75a6f1
URL: https://github.com/llvm/llvm-project/commit/81ce796095c8e2ededa5d9df1c963cf08a75a6f1
DIFF: https://github.com/llvm/llvm-project/commit/81ce796095c8e2ededa5d9df1c963cf08a75a6f1.diff
LOG: [libc][math][c23] Enable C23 _Float16 math functions on GPUs (#99248)
Added:
Modified:
libc/cmake/modules/CheckCompilerFeatures.cmake
libc/cmake/modules/LLVMLibCFlagRules.cmake
libc/config/gpu/entrypoints.txt
libc/src/__support/macros/properties/cpu_features.h
libc/src/math/generic/CMakeLists.txt
libc/src/math/generic/ceilf16.cpp
libc/src/math/generic/floorf16.cpp
libc/src/math/generic/rintf16.cpp
libc/src/math/generic/roundevenf16.cpp
libc/src/math/generic/roundf16.cpp
libc/src/math/generic/truncf16.cpp
Removed:
################################################################################
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index 8b02c590f11d0..63145fe709dda 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -17,6 +17,12 @@ set(
# Making sure ALL_COMPILER_FEATURES is sorted.
list(SORT ALL_COMPILER_FEATURES)
+# Compiler features that are unavailable on GPU targets with the in-tree Clang.
+set(
+ CPU_ONLY_COMPILER_FEATURES
+ "float128"
+)
+
# Function to check whether the compiler supports the provided set of features.
# Usage:
# compiler_supports(
@@ -67,13 +73,26 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE)
endif()
- try_compile(
- has_feature
- ${CMAKE_CURRENT_BINARY_DIR}/compiler_features
- SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
- COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
- LINK_OPTIONS ${link_options}
- )
+ if(LIBC_TARGET_OS_IS_GPU)
+ # CUDA shouldn't be required to build the libc, only to test it, so we can't
+ # try to build CUDA binaries here. Since GPU builds are always compiled with
+ # the in-tree Clang, we just hardcode which compiler features are available
+ # when targeting GPUs.
+ if(feature IN_LIST CPU_ONLY_COMPILER_FEATURES)
+ set(has_feature FALSE)
+ else()
+ set(has_feature TRUE)
+ endif()
+ else()
+ try_compile(
+ has_feature
+ ${CMAKE_CURRENT_BINARY_DIR}/compiler_features
+ SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
+ COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
+ LINK_OPTIONS ${link_options}
+ )
+ endif()
+
if(has_feature)
list(APPEND AVAILABLE_COMPILER_FEATURES ${feature})
if(${feature} STREQUAL "float16")
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index 4398fe55db5aa..3629a7f111a7c 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -279,8 +279,10 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
set(SKIP_FLAG_EXPANSION_EXPLICIT_SIMD_OPT TRUE)
endif()
-# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
+# Skip ROUND_OPT flag for targets that don't support rounding instructions. On
+# x86, these are SSE4.1 instructions, but we already had code to check for
+# SSE4.2 support.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
- LIBC_TARGET_ARCHITECTURE_IS_AARCH64))
+ LIBC_TARGET_ARCHITECTURE_IS_AARCH64 OR LIBC_TARGET_OS_IS_GPU))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index f7e89bdcd95cc..df7aa9e319624 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -348,6 +348,74 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncf
)
+if(LIBC_TYPES_HAS_FLOAT16)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C23 _Float16 entrypoints
+ libc.src.math.canonicalizef16
+ libc.src.math.ceilf16
+ libc.src.math.copysignf16
+ libc.src.math.f16add
+ libc.src.math.f16addf
+ libc.src.math.f16div
+ libc.src.math.f16divf
+ libc.src.math.f16fma
+ libc.src.math.f16fmaf
+ libc.src.math.f16mul
+ libc.src.math.f16mulf
+ libc.src.math.f16sqrt
+ libc.src.math.f16sqrtf
+ libc.src.math.f16sub
+ libc.src.math.f16subf
+ libc.src.math.fabsf16
+ libc.src.math.fdimf16
+ libc.src.math.floorf16
+ libc.src.math.fmaxf16
+ libc.src.math.fmaximum_mag_numf16
+ libc.src.math.fmaximum_magf16
+ libc.src.math.fmaximum_numf16
+ libc.src.math.fmaximumf16
+ libc.src.math.fminf16
+ libc.src.math.fminimum_mag_numf16
+ libc.src.math.fminimum_magf16
+ libc.src.math.fminimum_numf16
+ libc.src.math.fminimumf16
+ libc.src.math.fmodf16
+ libc.src.math.frexpf16
+ libc.src.math.fromfpf16
+ libc.src.math.fromfpxf16
+ libc.src.math.getpayloadf16
+ libc.src.math.ilogbf16
+ libc.src.math.ldexpf16
+ libc.src.math.llogbf16
+ libc.src.math.llrintf16
+ libc.src.math.llroundf16
+ libc.src.math.logbf16
+ libc.src.math.lrintf16
+ libc.src.math.lroundf16
+ libc.src.math.modff16
+ libc.src.math.nanf16
+ libc.src.math.nearbyintf16
+ libc.src.math.nextafterf16
+ libc.src.math.nextdownf16
+ libc.src.math.nexttowardf16
+ libc.src.math.nextupf16
+ libc.src.math.remainderf16
+ libc.src.math.remquof16
+ libc.src.math.rintf16
+ libc.src.math.roundevenf16
+ libc.src.math.roundf16
+ libc.src.math.scalblnf16
+ libc.src.math.scalbnf16
+ libc.src.math.setpayloadf16
+ libc.src.math.setpayloadsigf16
+ libc.src.math.totalorderf16
+ libc.src.math.totalordermagf16
+ libc.src.math.truncf16
+ libc.src.math.ufromfpf16
+ libc.src.math.ufromfpxf16
+ )
+endif()
+
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index 8d431f203335f..d2cea367516db 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -53,4 +53,8 @@
#define LIBC_TARGET_CPU_HAS_NEAREST_INT
#endif
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || defined(LIBC_TARGET_ARCH_IS_GPU)
+#define LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS
+#endif
+
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 80e862542e29c..7280add7c8bb0 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -111,7 +111,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -548,7 +548,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -617,7 +617,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -686,7 +686,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -755,7 +755,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
@@ -948,7 +948,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
- libc.src.__support.macros.properties.architectures
+ libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
diff --git a/libc/src/math/generic/ceilf16.cpp b/libc/src/math/generic/ceilf16.cpp
index 708bc4cfd4860..8af31c6623a02 100644
--- a/libc/src/math/generic/ceilf16.cpp
+++ b/libc/src/math/generic/ceilf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_ceilf(x));
#else
return fputil::ceil(x);
diff --git a/libc/src/math/generic/floorf16.cpp b/libc/src/math/generic/floorf16.cpp
index 84e4b0730ac68..3092048f5ab06 100644
--- a/libc/src/math/generic/floorf16.cpp
+++ b/libc/src/math/generic/floorf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_floorf(x));
#else
return fputil::floor(x);
diff --git a/libc/src/math/generic/rintf16.cpp b/libc/src/math/generic/rintf16.cpp
index 0e8c091efcf9b..3a53dd28e3d10 100644
--- a/libc/src/math/generic/rintf16.cpp
+++ b/libc/src/math/generic/rintf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, rintf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_rintf(x));
#else
return fputil::round_using_current_rounding_mode(x);
diff --git a/libc/src/math/generic/roundevenf16.cpp b/libc/src/math/generic/roundevenf16.cpp
index b45670bd24ff1..c3dbd779b9739 100644
--- a/libc/src/math/generic/roundevenf16.cpp
+++ b/libc/src/math/generic/roundevenf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, roundevenf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_ROUNDEVEN) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_roundevenf(x));
#else
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);
diff --git a/libc/src/math/generic/roundf16.cpp b/libc/src/math/generic/roundf16.cpp
index cb668c0e76388..a5e2b44fbd54b 100644
--- a/libc/src/math/generic/roundf16.cpp
+++ b/libc/src/math/generic/roundf16.cpp
@@ -10,12 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) {
-#if defined(__LIBC_USE_BUILTIN_ROUND) && defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(__LIBC_USE_BUILTIN_ROUND) && \
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_roundf(x));
#else
return fputil::round(x);
diff --git a/libc/src/math/generic/truncf16.cpp b/libc/src/math/generic/truncf16.cpp
index b931053e53438..31b1214a9a0e4 100644
--- a/libc/src/math/generic/truncf16.cpp
+++ b/libc/src/math/generic/truncf16.cpp
@@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
-#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, truncf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
- defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_truncf(x));
#else
return fputil::trunc(x);
More information about the libc-commits
mailing list