[libc-commits] [libc] ed261e7 - [libc] Add float type and flag for nearest_integer to enable SSE4.2.

Fri Jul 22 06:30:01 PDT 2022

Author: Tue Ly
Date: 2022-07-22T09:29:41-04:00
New Revision: ed261e710693cd611fd003db45a85bdeba1e8367

URL: https://github.com/llvm/llvm-project/commit/ed261e710693cd611fd003db45a85bdeba1e8367
DIFF: https://github.com/llvm/llvm-project/commit/ed261e710693cd611fd003db45a85bdeba1e8367.diff

LOG: [libc] Add float type and flag for nearest_integer to enable SSE4.2.

Add float type and flag for nearest integer to automatically test with
and without SSE4.2 flag.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D129916

Added: 
    

Modified: 
    libc/cmake/modules/LLVMLibCFlagRules.cmake
    libc/cmake/modules/LLVMLibCObjectRules.cmake
    libc/src/__support/FPUtil/CMakeLists.txt
    libc/src/__support/FPUtil/aarch64/nearest_integer.h
    libc/src/__support/FPUtil/nearest_integer.h
    libc/src/__support/FPUtil/x86_64/nearest_integer.h

Removed: 
    


################################################################################
diff  --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index d28c7a75619dc..c96d6f3fb51fd 100644

--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -131,8 +131,14 @@ endfunction(get_fq_dep_list_without_flag)
 
 # Special flags
 set(FMA_OPT_FLAG "FMA_OPT")
+set(ROUND_OPT_FLAG "ROUND_OPT")
 
 # Skip FMA_OPT flag for targets that don't support fma.
 if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")))
   set(SKIP_FLAG_EXPANSION_FMA_OPT TRUE)
 endif()
+
+# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
+if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
+  set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
+endif()

diff  --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index 760a8cb9e27ee..a824cad94b7c2 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -9,6 +9,14 @@ function(_get_common_compile_options output_var flags)
     set(ADD_FMA_FLAG TRUE)
   endif()
 
+  list(FIND flags ${ROUND_OPT_FLAG} round)
+  if(${round} LESS 0)
+    list(FIND flags "${ROUND_OPT_FLAG}__ONLY" round)
+  endif()
+  if((${round} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE4_2"))
+    set(ADD_SSE4_2_FLAG TRUE)
+  endif()
+
   set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
   if(NOT ${LIBC_TARGET_OS} STREQUAL "windows")
     set(compile_options ${compile_options} -fpie -ffreestanding -fno-builtin)
@@ -21,6 +29,9 @@ function(_get_common_compile_options output_var flags)
     if(ADD_FMA_FLAG)
       list(APPEND compile_options "-mfma")
     endif()
+    if(ADD_SSE4_2_FLAG)
+      list(APPEND compile_options "-msse4.2")
+    endif()
   elseif(MSVC)
     list(APPEND compile_options "/EHs-c-")
     list(APPEND compile_options "/GR-")

diff  --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 9807cfa55444c..a4247fba214bf 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -75,6 +75,8 @@ add_header_library(
     nearest_integer.h
   DEPENDS
     libc.src.__support.common
+  FLAGS
+    ROUND_OPT
 )
 
 add_subdirectory(generic)

diff  --git a/libc/src/__support/FPUtil/aarch64/nearest_integer.h b/libc/src/__support/FPUtil/aarch64/nearest_integer.h
index 888b5c45cd3b9..e6c83e956ccba 100644
--- a/libc/src/__support/FPUtil/aarch64/nearest_integer.h
+++ b/libc/src/__support/FPUtil/aarch64/nearest_integer.h
@@ -18,6 +18,12 @@
 namespace __llvm_libc {
 namespace fputil {
 
+static inline float nearest_integer(float x) {
+  float result;
+  __asm__ __volatile__("frintn %s0, %s1\n\t" : "=w"(result) : "w"(x));
+  return result;
+}
+
 static inline double nearest_integer(double x) {
   double result;
   __asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x));

diff  --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h
index 41f717da36dad..e6377fe9dc602 100644
--- a/libc/src/__support/FPUtil/nearest_integer.h
+++ b/libc/src/__support/FPUtil/nearest_integer.h
@@ -21,13 +21,27 @@
 namespace __llvm_libc {
 namespace fputil {
 
-// This is a fast implementation for rounding to a nearest integer that, in case
-// of a tie, might pick a random one among 2 closest integers when the rounding
-// mode is not FE_TONEAREST.
+// This is a fast implementation for rounding to a nearest integer that.
 //
 // Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their
 // corresponding rounding instruction instead.  And in those cases, the results
 // are rounded to the nearest integer, tie-to-even.
+static inline float nearest_integer(float x) {
+  if (x < 0x1p24f && x > -0x1p24f) {
+    float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f;
+    float 
diff  = x - r;
+    // The expression above is correct for the default rounding mode, round-to-
+    // nearest, tie-to-even.  For other rounding modes, it might be off by 1,
+    // which is corrected below.
+    if (unlikely(
diff  > 0.5f))
+      return r + 1.0f;
+    if (unlikely(
diff  < -0.5f))
+      return r - 1.0f;
+    return r;
+  }
+  return x;
+}
+
 static inline double nearest_integer(double x) {
   if (x < 0x1p53 && x > -0x1p53) {
     double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52;

diff  --git a/libc/src/__support/FPUtil/x86_64/nearest_integer.h b/libc/src/__support/FPUtil/x86_64/nearest_integer.h
index db9817a806616..e0c1b1a2d9e2d 100644
--- a/libc/src/__support/FPUtil/x86_64/nearest_integer.h
+++ b/libc/src/__support/FPUtil/x86_64/nearest_integer.h
@@ -24,6 +24,13 @@
 namespace __llvm_libc {
 namespace fputil {
 
+static inline float nearest_integer(float x) {
+  __m128 xmm = _mm_set_ss(x); // NOLINT
+  __m128 ymm =
+      _mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT
+  return ymm[0];
+}
+
 static inline double nearest_integer(double x) {
   __m128d xmm = _mm_set_sd(x); // NOLINT
   __m128d ymm =