[llvm-branch-commits] [compiler-rt] c288715 - [compiler-rt] [builtins] Use _Float16 on extendhfsf2, truncdfhf2 __truncsfhf2 if available

Thu Dec 3 11:15:06 PST 2020

Author: Adhemerval Zanella
Date: 2020-12-03T16:08:55-03:00
New Revision: c288715e952fe0e179d645a76b0f518e2403a42b

URL: https://github.com/llvm/llvm-project/commit/c288715e952fe0e179d645a76b0f518e2403a42b
DIFF: https://github.com/llvm/llvm-project/commit/c288715e952fe0e179d645a76b0f518e2403a42b.diff

LOG: [compiler-rt] [builtins] Use _Float16 on extendhfsf2, truncdfhf2 __truncsfhf2 if available

On AArch64 it allows use the native FP16 ABI (although libcalls are
not emitted for fptrunc/fpext lowering), while on other architectures
the expected current semantic is preserved (arm for instance).

For testing the _Float16 usage is enabled by architecture base,
currently only for arm, aarch64, and arm64.

This re-enabled revert done by https://reviews.llvm.org/rGb534beabeed3ba1777cd0ff9ce552d077e496726

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D92241

Added: 
    

Modified: 
    compiler-rt/cmake/builtin-config-ix.cmake
    compiler-rt/lib/builtins/CMakeLists.txt
    compiler-rt/lib/builtins/extendhfsf2.c
    compiler-rt/lib/builtins/fp_extend.h
    compiler-rt/lib/builtins/fp_trunc.h
    compiler-rt/lib/builtins/truncdfhf2.c
    compiler-rt/lib/builtins/truncsfhf2.c
    compiler-rt/test/builtins/CMakeLists.txt
    compiler-rt/test/builtins/Unit/extendhfsf2_test.c
    compiler-rt/test/builtins/Unit/fp_test.h
    compiler-rt/test/builtins/Unit/truncdfhf2_test.c
    compiler-rt/test/builtins/Unit/truncsfhf2_test.c

Removed: 
    


################################################################################
diff  --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 2eeedd49e392..ad3b98799c5c 100644

--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -22,6 +22,13 @@ int foo(int x, int y) {
 }
 ")
 
+builtin_check_c_compiler_source(COMPILER_RT_HAS_FLOAT16
+"
+_Float16 foo(_Float16 x) {
+ return x;
+}
+"
+)
 
 builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_LSE
 "

diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 7f3df6ff548d..f7a9b233d7c7 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -658,6 +658,8 @@ if (APPLE)
 else ()
   set(BUILTIN_CFLAGS "")
 
+  append_list_if(COMPILER_RT_HAS_FLOAT16 -DCOMPILER_RT_HAS_FLOAT16 BUILTIN_CFLAGS)
+
   append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS)
 
   # These flags would normally be added to CMAKE_C_FLAGS by the llvm

diff  --git a/compiler-rt/lib/builtins/extendhfsf2.c b/compiler-rt/lib/builtins/extendhfsf2.c
index 7c1a76eb5851..0159ab09d3eb 100644
--- a/compiler-rt/lib/builtins/extendhfsf2.c
+++ b/compiler-rt/lib/builtins/extendhfsf2.c
@@ -12,15 +12,15 @@
 
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
+COMPILER_RT_ABI NOINLINE float __extendhfsf2(src_t a) {
   return __extendXfYf2__(a);
 }
 
-COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { return __extendhfsf2(a); }
+COMPILER_RT_ABI float __gnu_h2f_ieee(src_t a) { return __extendhfsf2(a); }
 
 #if defined(__ARM_EABI__)
 #if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI float __aeabi_h2f(uint16_t a) { return __extendhfsf2(a); }
+AEABI_RTABI float __aeabi_h2f(src_t a) { return __extendhfsf2(a); }
 #else
 COMPILER_RT_ALIAS(__extendhfsf2, __aeabi_h2f)
 #endif

diff  --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h
index fb512672e35e..aad4436730dd 100644
--- a/compiler-rt/lib/builtins/fp_extend.h
+++ b/compiler-rt/lib/builtins/fp_extend.h
@@ -40,7 +40,11 @@ static __inline int src_rep_t_clz(src_rep_t a) {
 }
 
 #elif defined SRC_HALF
+#ifdef COMPILER_RT_HAS_FLOAT16
+typedef _Float16 src_t;
+#else
 typedef uint16_t src_t;
+#endif
 typedef uint16_t src_rep_t;
 #define SRC_REP_C UINT16_C
 static const int srcSigBits = 10;

diff  --git a/compiler-rt/lib/builtins/fp_trunc.h b/compiler-rt/lib/builtins/fp_trunc.h
index aca4c9b6e677..00595edd5e01 100644
--- a/compiler-rt/lib/builtins/fp_trunc.h
+++ b/compiler-rt/lib/builtins/fp_trunc.h
@@ -50,7 +50,11 @@ typedef uint32_t dst_rep_t;
 static const int dstSigBits = 23;
 
 #elif defined DST_HALF
+#ifdef COMPILER_RT_HAS_FLOAT16
+typedef _Float16 dst_t;
+#else
 typedef uint16_t dst_t;
+#endif
 typedef uint16_t dst_rep_t;
 #define DST_REP_C UINT16_C
 static const int dstSigBits = 10;

diff  --git a/compiler-rt/lib/builtins/truncdfhf2.c b/compiler-rt/lib/builtins/truncdfhf2.c
index 90c418a4387f..24c6e62f715f 100644
--- a/compiler-rt/lib/builtins/truncdfhf2.c
+++ b/compiler-rt/lib/builtins/truncdfhf2.c
@@ -10,11 +10,11 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
-COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { return __truncXfYf2__(a); }
+COMPILER_RT_ABI dst_t __truncdfhf2(double a) { return __truncXfYf2__(a); }
 
 #if defined(__ARM_EABI__)
 #if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI uint16_t __aeabi_d2h(double a) { return __truncdfhf2(a); }
+AEABI_RTABI dst_t __aeabi_d2h(double a) { return __truncdfhf2(a); }
 #else
 COMPILER_RT_ALIAS(__truncdfhf2, __aeabi_d2h)
 #endif

diff  --git a/compiler-rt/lib/builtins/truncsfhf2.c b/compiler-rt/lib/builtins/truncsfhf2.c
index 1f17194c38e5..379e7cb6f784 100644
--- a/compiler-rt/lib/builtins/truncsfhf2.c
+++ b/compiler-rt/lib/builtins/truncsfhf2.c
@@ -12,15 +12,15 @@
 
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
-COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
+COMPILER_RT_ABI NOINLINE dst_t __truncsfhf2(float a) {
   return __truncXfYf2__(a);
 }
 
-COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); }
+COMPILER_RT_ABI dst_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); }
 
 #if defined(__ARM_EABI__)
 #if defined(COMPILER_RT_ARMHF_TARGET)
-AEABI_RTABI uint16_t __aeabi_f2h(float a) { return __truncsfhf2(a); }
+AEABI_RTABI dst_t __aeabi_f2h(float a) { return __truncsfhf2(a); }
 #else
 COMPILER_RT_ALIAS(__truncsfhf2, __aeabi_f2h)
 #endif

diff  --git a/compiler-rt/test/builtins/CMakeLists.txt b/compiler-rt/test/builtins/CMakeLists.txt
index e3796f840a64..31d16312dd18 100644
--- a/compiler-rt/test/builtins/CMakeLists.txt
+++ b/compiler-rt/test/builtins/CMakeLists.txt
@@ -44,6 +44,11 @@ foreach(arch ${BUILTIN_TEST_ARCH})
     string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
   endif()
 
+  if (${arch} MATCHES "arm|aarch64|arm64" AND COMPILER_RT_HAS_FLOAT16)
+    list(APPEND BUILTINS_TEST_TARGET_CFLAGS -DCOMPILER_RT_HAS_FLOAT16)
+    string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
+  endif()
+
   # Compute builtins available in library and add them as lit features.
   if(APPLE)
     # TODO: Support other Apple platforms.

diff  --git a/compiler-rt/test/builtins/Unit/extendhfsf2_test.c b/compiler-rt/test/builtins/Unit/extendhfsf2_test.c
index 9a5258b2b46f..86150e8fb0d7 100644
--- a/compiler-rt/test/builtins/Unit/extendhfsf2_test.c
+++ b/compiler-rt/test/builtins/Unit/extendhfsf2_test.c
@@ -5,99 +5,83 @@
 
 #include "fp_test.h"
 
-float __extendhfsf2(uint16_t a);
+float __extendhfsf2(TYPE_FP16 a);
 
-int test__extendhfsf2(uint16_t a, float expected)
+int test__extendhfsf2(TYPE_FP16 a, uint32_t expected)
 {
     float x = __extendhfsf2(a);
-    int ret = compareResultH(x, expected);
+    int ret = compareResultF(x, expected);
 
     if (ret){
         printf("error in test__extendhfsf2(%#.4x) = %f, "
-               "expected %f\n", a, x, expected);
+               "expected %f\n", toRep16(a), x, fromRep32(expected));
     }
     return ret;
 }
 
-char assumption_1[sizeof(__fp16) * CHAR_BIT == 16] = {0};
+char assumption_1[sizeof(TYPE_FP16) * CHAR_BIT == 16] = {0};
 
 int main()
 {
     // qNaN
-    if (test__extendhfsf2(UINT16_C(0x7e00),
-                          makeQNaN32()))
+    if (test__extendhfsf2(fromRep16(0x7e00),
+                          UINT32_C(0x7fc00000)))
         return 1;
     // NaN
-    if (test__extendhfsf2(UINT16_C(0x7e00),
-                          makeNaN32(UINT32_C(0x8000))))
+    if (test__extendhfsf2(fromRep16(0x7f80),
+                          UINT32_C(0x7ff00000)))
         return 1;
     // inf
-    if (test__extendhfsf2(UINT16_C(0x7c00),
-                          makeInf32()))
+    if (test__extendhfsf2(fromRep16(0x7c00),
+                          UINT32_C(0x7f800000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0xfc00),
-                          -makeInf32()))
+    // -inf
+    if (test__extendhfsf2(fromRep16(0xfc00),
+                          UINT32_C(0xff800000)))
         return 1;
     // zero
-    if (test__extendhfsf2(UINT16_C(0x0),
-                          0.0f))
+    if (test__extendhfsf2(fromRep16(0x0),
+                          UINT32_C(0x00000000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x8000),
-                          -0.0f))
+    // -zero
+    if (test__extendhfsf2(fromRep16(0x8000),
+                          UINT32_C(0x80000000)))
         return 1;
-
-    if (test__extendhfsf2(UINT16_C(0x4248),
-                          3.1415926535f))
-        return 1;
-    if (test__extendhfsf2(UINT16_C(0xc248),
-                          -3.1415926535f))
+    if (test__extendhfsf2(fromRep16(0x4248),
+                          UINT32_C(0x40490000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x7c00),
-                          0x1.987124876876324p+100f))
+    if (test__extendhfsf2(fromRep16(0xc248),
+                          UINT32_C(0xc0490000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x6e62),
-                          0x1.988p+12f))
+    if (test__extendhfsf2(fromRep16(0x6e62),
+                          UINT32_C(0x45cc4000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x3c00),
-                          0x1.0p+0f))
+    if (test__extendhfsf2(fromRep16(0x3c00),
+                          UINT32_C(0x3f800000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x0400),
-                          0x1.0p-14f))
+    if (test__extendhfsf2(fromRep16(0x0400),
+                          UINT32_C(0x38800000)))
         return 1;
     // denormal
-    if (test__extendhfsf2(UINT16_C(0x0010),
-                          0x1.0p-20f))
-        return 1;
-    if (test__extendhfsf2(UINT16_C(0x0001),
-                          0x1.0p-24f))
+    if (test__extendhfsf2(fromRep16(0x0010),
+                          UINT32_C(0x35800000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x8001),
-                          -0x1.0p-24f))
+    if (test__extendhfsf2(fromRep16(0x0001),
+                          UINT32_C(0x33800000)))
         return 1;
-    if (test__extendhfsf2(UINT16_C(0x0001),
-                          0x1.5p-25f))
+    if (test__extendhfsf2(fromRep16(0x8001),
+                          UINT32_C(0xb3800000)))
         return 1;
-    // and back to zero
-    if (test__extendhfsf2(UINT16_C(0x0000),
-                          0x1.0p-25f))
-        return 1;
-    if (test__extendhfsf2(UINT16_C(0x8000),
-                          -0x1.0p-25f))
+    if (test__extendhfsf2(fromRep16(0x0001),
+                          UINT32_C(0x33800000)))
         return 1;
     // max (precise)
-    if (test__extendhfsf2(UINT16_C(0x7bff),
-                          65504.0f))
+    if (test__extendhfsf2(fromRep16(0x7bff),
+                          UINT32_C(0x477fe000)))
         return 1;
     // max (rounded)
-    if (test__extendhfsf2(UINT16_C(0x7bff),
-                          65504.0f))
-        return 1;
-    // max (to +inf)
-    if (test__extendhfsf2(UINT16_C(0x7c00),
-                          makeInf32()))
-        return 1;
-    if (test__extendhfsf2(UINT16_C(0xfc00),
-                          -makeInf32()))
+    if (test__extendhfsf2(fromRep16(0x7bff),
+                          UINT32_C(0x477fe000)))
         return 1;
     return 0;
 }

diff  --git a/compiler-rt/test/builtins/Unit/fp_test.h b/compiler-rt/test/builtins/Unit/fp_test.h
index 59d4ae5cf9db..7c5d5111f4e7 100644
--- a/compiler-rt/test/builtins/Unit/fp_test.h
+++ b/compiler-rt/test/builtins/Unit/fp_test.h
@@ -3,13 +3,25 @@
 #include <string.h>
 #include <stdint.h>
 
+#ifdef COMPILER_RT_HAS_FLOAT16
+#define TYPE_FP16 _Float16
+#else
+#define TYPE_FP16 uint16_t
+#endif
+
 enum EXPECTED_RESULT {
     LESS_0, LESS_EQUAL_0, EQUAL_0, GREATER_0, GREATER_EQUAL_0, NEQUAL_0
 };
 
-static inline uint16_t fromRep16(uint16_t x)
+static inline TYPE_FP16 fromRep16(uint16_t x)
 {
+#ifdef COMPILER_RT_HAS_FLOAT16
+    TYPE_FP16 ret;
+    memcpy(&ret, &x, sizeof(ret));
+    return ret;
+#else
     return x;
+#endif
 }
 
 static inline float fromRep32(uint32_t x)
@@ -36,9 +48,15 @@ static inline long double fromRep128(uint64_t hi, uint64_t lo)
 }
 #endif
 
-static inline uint16_t toRep16(uint16_t x)
+static inline uint16_t toRep16(TYPE_FP16 x)
 {
+#ifdef COMPILER_RT_HAS_FLOAT16
+    uint16_t ret;
+    memcpy(&ret, &x, sizeof(ret));
+    return ret;
+#else
     return x;
+#endif
 }
 
 static inline uint32_t toRep32(float x)
@@ -64,7 +82,7 @@ static inline __uint128_t toRep128(long double x)
 }
 #endif
 
-static inline int compareResultH(uint16_t result,
+static inline int compareResultH(TYPE_FP16 result,
                                  uint16_t expected)
 {
     uint16_t rep = toRep16(result);
@@ -199,7 +217,7 @@ static inline char *expectedStr(enum EXPECTED_RESULT expected)
     return "";
 }
 
-static inline uint16_t makeQNaN16(void)
+static inline TYPE_FP16 makeQNaN16(void)
 {
     return fromRep16(0x7e00U);
 }
@@ -221,7 +239,7 @@ static inline long double makeQNaN128(void)
 }
 #endif
 
-static inline uint16_t makeNaN16(uint16_t rand)
+static inline TYPE_FP16 makeNaN16(uint16_t rand)
 {
     return fromRep16(0x7c00U | (rand & 0x7fffU));
 }
@@ -243,7 +261,7 @@ static inline long double makeNaN128(uint64_t rand)
 }
 #endif
 
-static inline uint16_t makeInf16(void)
+static inline TYPE_FP16 makeInf16(void)
 {
     return fromRep16(0x7c00U);
 }

diff  --git a/compiler-rt/test/builtins/Unit/truncdfhf2_test.c b/compiler-rt/test/builtins/Unit/truncdfhf2_test.c
index 74e975eef60b..1990ec9503a7 100644
--- a/compiler-rt/test/builtins/Unit/truncdfhf2_test.c
+++ b/compiler-rt/test/builtins/Unit/truncdfhf2_test.c
@@ -5,16 +5,16 @@
 
 #include "fp_test.h"
 
-uint16_t __truncdfhf2(double a);
+TYPE_FP16 __truncdfhf2(double a);
 
 int test__truncdfhf2(double a, uint16_t expected)
 {
-    uint16_t x = __truncdfhf2(a);
+    TYPE_FP16 x = __truncdfhf2(a);
     int ret = compareResultH(x, expected);
 
     if (ret){
-        printf("error in test__truncdfhf2(%f) = %#.4x, "
-               "expected %#.4x\n", a, x, fromRep16(expected));
+        printf("error in test__truncdfhf2(%lf) = %#.4x, "
+               "expected %#.4x\n", a, toRep16(x), expected);
     }
     return ret;
 }

diff  --git a/compiler-rt/test/builtins/Unit/truncsfhf2_test.c b/compiler-rt/test/builtins/Unit/truncsfhf2_test.c
index 7fcccf1a4e69..c7c8063d5218 100644
--- a/compiler-rt/test/builtins/Unit/truncsfhf2_test.c
+++ b/compiler-rt/test/builtins/Unit/truncsfhf2_test.c
@@ -5,16 +5,16 @@
 
 #include "fp_test.h"
 
-uint16_t __truncsfhf2(float a);
+TYPE_FP16 __truncsfhf2(float a);
 
 int test__truncsfhf2(float a, uint16_t expected)
 {
-    uint16_t x = __truncsfhf2(a);
+    TYPE_FP16 x = __truncsfhf2(a);
     int ret = compareResultH(x, expected);
 
     if (ret){
         printf("error in test__truncsfhf2(%f) = %#.4x, "
-               "expected %#.4x\n", a, x, fromRep16(expected));
+               "expected %#.4x\n", a, toRep16(x), expected);
     }
     return ret;
 }