[libc-commits] [libc] ed261e7 - [libc] Add float type and flag for nearest_integer to enable SSE4.2.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Fri Jul 22 06:30:01 PDT 2022
Author: Tue Ly
Date: 2022-07-22T09:29:41-04:00
New Revision: ed261e710693cd611fd003db45a85bdeba1e8367
URL: https://github.com/llvm/llvm-project/commit/ed261e710693cd611fd003db45a85bdeba1e8367
DIFF: https://github.com/llvm/llvm-project/commit/ed261e710693cd611fd003db45a85bdeba1e8367.diff
LOG: [libc] Add float type and flag for nearest_integer to enable SSE4.2.
Add float type and flag for nearest integer to automatically test with
and without SSE4.2 flag.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D129916
Added:
Modified:
libc/cmake/modules/LLVMLibCFlagRules.cmake
libc/cmake/modules/LLVMLibCObjectRules.cmake
libc/src/__support/FPUtil/CMakeLists.txt
libc/src/__support/FPUtil/aarch64/nearest_integer.h
libc/src/__support/FPUtil/nearest_integer.h
libc/src/__support/FPUtil/x86_64/nearest_integer.h
Removed:
################################################################################
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index d28c7a75619dc..c96d6f3fb51fd 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -131,8 +131,14 @@ endfunction(get_fq_dep_list_without_flag)
# Special flags
set(FMA_OPT_FLAG "FMA_OPT")
+set(ROUND_OPT_FLAG "ROUND_OPT")
# Skip FMA_OPT flag for targets that don't support fma.
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")))
set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
endif()
+
+# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
+if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
+ set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
+endif()
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index 760a8cb9e27ee..a824cad94b7c2 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -9,6 +9,14 @@ function(_get_common_compile_options output_var flags)
set(ADD_FMA_FLAG TRUE)
endif()
+ list(FIND flags ${ROUND_OPT_FLAG} round)
+ if(${round} LESS 0)
+ list(FIND flags "${ROUND_OPT_FLAG}__ONLY" round)
+ endif()
+ if((${round} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE4_2"))
+ set(ADD_SSE4_2_FLAG TRUE)
+ endif()
+
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
if(NOT ${LIBC_TARGET_OS} STREQUAL "windows")
set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin)
@@ -21,6 +29,9 @@ function(_get_common_compile_options output_var flags)
if(ADD_FMA_FLAG)
list(APPEND compile_options "-mfma")
endif()
+ if(ADD_SSE4_2_FLAG)
+ list(APPEND compile_options "-msse4.2")
+ endif()
elseif(MSVC)
list(APPEND compile_options "/EHs-c-")
list(APPEND compile_options "/GR-")
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 9807cfa55444c..a4247fba214bf 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -75,6 +75,8 @@ add_header_library(
nearest_integer.h
DEPENDS
libc.src.__support.common
+ FLAGS
+ ROUND_OPT
)
add_subdirectory(generic)
diff --git a/libc/src/__support/FPUtil/aarch64/nearest_integer.h b/libc/src/__support/FPUtil/aarch64/nearest_integer.h
index 888b5c45cd3b9..e6c83e956ccba 100644
--- a/libc/src/__support/FPUtil/aarch64/nearest_integer.h
+++ b/libc/src/__support/FPUtil/aarch64/nearest_integer.h
@@ -18,6 +18,12 @@
namespace __llvm_libc {
namespace fputil {
+static inline float nearest_integer(float x) {
+ float result;
+ __asm__ __volatile__("frintn %s0, %s1\n\t" : "=w"(result) : "w"(x));
+ return result;
+}
+
static inline double nearest_integer(double x) {
double result;
__asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x));
diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 41f717da36dad..e6377fe9dc602 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -21,13 +21,27 @@
namespace __llvm_libc {
namespace fputil {
-// This is a fast implementation for rounding to a nearest integer that, in case
-// of a tie, might pick a random one among 2 closest integers when the rounding
-// mode is not FE_TONEAREST.
+// This is a fast implementation for rounding to a nearest integer that.
//
// Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their
// corresponding rounding instruction instead. And in those cases, the results
// are rounded to the nearest integer, tie-to-even.
+static inline float nearest_integer(float x) {
+ if (x < 0x1p24f && x > -0x1p24f) {
+ float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f;
+ float
diff = x - r;
+ // The expression above is correct for the default rounding mode, round-to-
+ // nearest, tie-to-even. For other rounding modes, it might be off by 1,
+ // which is corrected below.
+ if (unlikely(
diff > 0.5f))
+ return r + 1.0f;
+ if (unlikely(
diff < -0.5f))
+ return r - 1.0f;
+ return r;
+ }
+ return x;
+}
+
static inline double nearest_integer(double x) {
if (x < 0x1p53 && x > -0x1p53) {
double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52;
diff --git a/libc/src/__support/FPUtil/x86_64/nearest_integer.h b/libc/src/__support/FPUtil/x86_64/nearest_integer.h
index db9817a806616..e0c1b1a2d9e2d 100644
--- a/libc/src/__support/FPUtil/x86_64/nearest_integer.h
+++ b/libc/src/__support/FPUtil/x86_64/nearest_integer.h
@@ -24,6 +24,13 @@
namespace __llvm_libc {
namespace fputil {
+static inline float nearest_integer(float x) {
+ __m128 xmm = _mm_set_ss(x); // NOLINT
+ __m128 ymm =
+ _mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
+ return ymm[0];
+}
+
static inline double nearest_integer(double x) {
__m128d xmm = _mm_set_sd(x); // NOLINT
__m128d ymm =
More information about the libc-commits
mailing list