[llvm] Enable logf128 constant folding for hosts with 128bit long double (PR #104929)

Wed Aug 21 05:03:29 PDT 2024

https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/104929

>From 6bd959cb3757d3bd92d5c06ca4c43b215a62c58b Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 19 Aug 2024 15:52:21 +0000
Subject: [PATCH 1/4] Enable logf128 constant folding for hosts with 128bit
 long double

This is a reland of (#96287). This patch attempts to reduce
clang's compile time by removing #includes of float128.h and
inlining convertToQuad functions instead.
---
 llvm/CMakeLists.txt                   |  2 --
 llvm/cmake/config-ix.cmake            | 18 +++++++-----------
 llvm/include/llvm/ADT/APFloat.h       | 15 +++------------
 llvm/include/llvm/ADT/APInt.h         | 20 ++++++--------------
 llvm/include/llvm/Support/float128.h  | 14 ++++++--------
 llvm/lib/Analysis/CMakeLists.txt      |  6 ------
 llvm/lib/Analysis/ConstantFolding.cpp | 24 +++++++++++++++++++-----
 llvm/lib/Support/APFloat.cpp          | 24 ++----------------------
 8 files changed, 43 insertions(+), 80 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index d681b1ccab6299..b03d89a43c34b0 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -560,8 +560,6 @@ set(LLVM_USE_STATIC_ZSTD FALSE CACHE BOOL "Use static version of zstd. Can be TR
 
 set(LLVM_ENABLE_CURL "OFF" CACHE STRING "Use libcurl for the HTTP client if available. Can be ON, OFF, or FORCE_ON")
 
-set(LLVM_HAS_LOGF128 "OFF" CACHE STRING "Use logf128 to constant fold fp128 logarithm calls. Can be ON, OFF, or FORCE_ON")
-
 set(LLVM_ENABLE_HTTPLIB "OFF" CACHE STRING "Use cpp-httplib HTTP server library if available. Can be ON, OFF, or FORCE_ON")
 
 set(LLVM_Z3_INSTALL_DIR "" CACHE STRING "Install directory of the Z3 solver.")
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 0aae13e30f2ab4..976213bb9e948a 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -247,17 +247,6 @@ else()
   set(HAVE_LIBEDIT 0)
 endif()
 
-if(LLVM_HAS_LOGF128)
-  include(CheckCXXSymbolExists)
-  check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
-
-  if(LLVM_HAS_LOGF128 STREQUAL FORCE_ON AND NOT HAS_LOGF128)
-    message(FATAL_ERROR "Failed to configure logf128")
-  endif()
-
-  set(LLVM_HAS_LOGF128 "${HAS_LOGF128}")
-endif()
-
 # function checks
 check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM)
 find_package(Backtrace)
@@ -271,6 +260,13 @@ if(C_SUPPORTS_WERROR_UNGUARDED_AVAILABILITY_NEW)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unguarded-availability-new")
 endif()
 
+check_cxx_symbol_exists(logf128 cmath HAS_LOGF128)
+check_symbol_exists(__powerpc64le__ "" __PPC64LE)
+if(HAS_LOGF128 AND NOT __PPC64LE)
+    set(LLVM_HAS_LOGF128 On)
+    add_compile_definitions(HAS_LOGF128)
+endif()
+
 # Determine whether we can register EH tables.
 check_symbol_exists(__register_frame "${CMAKE_CURRENT_LIST_DIR}/unwind.h" HAVE_REGISTER_FRAME)
 check_symbol_exists(__deregister_frame "${CMAKE_CURRENT_LIST_DIR}/unwind.h" HAVE_DEREGISTER_FRAME)
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 7039e961bff82d..925d03d4c06670 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -19,7 +19,6 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/float128.h"
 #include <memory>
 
 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
@@ -378,9 +377,6 @@ class IEEEFloat final : public APFloatBase {
   Expected<opStatus> convertFromString(StringRef, roundingMode);
   APInt bitcastToAPInt() const;
   double convertToDouble() const;
-#ifdef HAS_IEE754_FLOAT128
-  float128 convertToQuad() const;
-#endif
   float convertToFloat() const;
 
   /// @}
@@ -1274,14 +1270,9 @@ class APFloat : public APFloatBase {
   /// shorter semantics, like IEEEsingle and others.
   double convertToDouble() const;
 
-  /// Converts this APFloat to host float value.
-  ///
-  /// \pre The APFloat must be built using semantics, that can be represented by
-  /// the host float type without loss of precision. It can be IEEEquad and
-  /// shorter semantics, like IEEEdouble and others.
-#ifdef HAS_IEE754_FLOAT128
-  float128 convertToQuad() const;
-#endif
+  /// Return true if this APFloat has quadruple precision floating point
+  /// semantics
+  bool isValidIEEEQuad() const;
 
   /// Converts this APFloat to host float value.
   ///
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 108df7e0eaeaa3..62e41ce88710b1 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -17,7 +17,6 @@
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/float128.h"
 #include <cassert>
 #include <climits>
 #include <cstring>
@@ -1679,13 +1678,6 @@ class [[nodiscard]] APInt {
   /// any bit width. Exactly 64 bits will be translated.
   double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); }
 
-#ifdef HAS_IEE754_FLOAT128
-  float128 bitsToQuad() const {
-    __uint128_t ul = ((__uint128_t)U.pVal[1] << 64) + U.pVal[0];
-    return llvm::bit_cast<float128>(ul);
-  }
-#endif
-
   /// Converts APInt bits to a float
   ///
   /// The conversion does not do a translation from integer to float, it just
@@ -1883,6 +1875,12 @@ class [[nodiscard]] APInt {
   /// Returns whether this instance allocated memory.
   bool needsCleanup() const { return !isSingleWord(); }
 
+  /// Get the word corresponding to a bit position
+  /// \returns the corresponding word for the specified bit position.
+  uint64_t getWord(unsigned bitPosition) const {
+    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
+  }
+
 private:
   /// This union is used to store the integer value. When the
   /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
@@ -1948,12 +1946,6 @@ class [[nodiscard]] APInt {
     return *this;
   }
 
-  /// Get the word corresponding to a bit position
-  /// \returns the corresponding word for the specified bit position.
-  uint64_t getWord(unsigned bitPosition) const {
-    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
-  }
-
   /// Utility method to change the bit width of this APInt to new bit width,
   /// allocating and/or deallocating as necessary. There is no guarantee on the
   /// value of any bits upon return. Caller should populate the bits after.
diff --git a/llvm/include/llvm/Support/float128.h b/llvm/include/llvm/Support/float128.h
index e15a98dc5a6779..618b320086ba59 100644
--- a/llvm/include/llvm/Support/float128.h
+++ b/llvm/include/llvm/Support/float128.h
@@ -9,18 +9,16 @@
 #ifndef LLVM_FLOAT128
 #define LLVM_FLOAT128
 
+#include <cmath>
+
 namespace llvm {
 
-#if defined(__clang__) && defined(__FLOAT128__) &&                             \
-    defined(__SIZEOF_INT128__) && !defined(__LONG_DOUBLE_IBM128__)
-#define HAS_IEE754_FLOAT128
-typedef __float128 float128;
-#elif defined(__FLOAT128__) && defined(__SIZEOF_INT128__) &&                   \
-    !defined(__LONG_DOUBLE_IBM128__) &&                                        \
-    (defined(__GNUC__) || defined(__GNUG__))
+#ifdef HAS_LOGF128
+#if !defined(__LONG_DOUBLE_IBM128__) && (__SIZEOF_INT128__ == 16)
+typedef decltype(logf128(0.)) float128;
 #define HAS_IEE754_FLOAT128
-typedef _Float128 float128;
 #endif
+#endif // HAS_LOGF128
 
 } // namespace llvm
 #endif // LLVM_FLOAT128
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 393803fad89383..3127f45cc54cb1 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -162,9 +162,3 @@ add_llvm_component_library(LLVMAnalysis
   Support
   TargetParser
   )
-
-include(CheckCXXSymbolExists)
-check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
-if(HAS_LOGF128)
- target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
-endif()
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index defcacdfa8b105..b3bea0722f7530 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -54,6 +54,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/float128.h"
 #include <cassert>
 #include <cerrno>
 #include <cfenv>
@@ -1741,7 +1742,7 @@ Constant *GetConstantFoldFPValue(double V, Type *Ty) {
   llvm_unreachable("Can only constant fold half/float/double");
 }
 
-#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
+#if defined(HAS_IEE754_FLOAT128)
 Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {
   if (Ty->isFP128Ty())
     return ConstantFP::get(Ty, V);
@@ -1781,11 +1782,18 @@ Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
   return GetConstantFoldFPValue(Result, Ty);
 }
 
-#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
+#if defined(HAS_IEE754_FLOAT128)
 Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
                             Type *Ty) {
   llvm_fenv_clearexcept();
-  float128 Result = NativeFP(V.convertToQuad());
+
+  if (!V.isValidIEEEQuad())
+    return nullptr;
+
+  APInt Api = V.bitcastToAPInt();
+  __uint128_t Int128 = ((__uint128_t)Api.getWord(64) << 64) + Api.getWord(0);
+  float128 Result = NativeFP(llvm::bit_cast<float128>(Int128));
+
   if (llvm_fenv_testexcept()) {
     llvm_fenv_clearexcept();
     return nullptr;
@@ -2114,10 +2122,16 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     if (IntrinsicID == Intrinsic::canonicalize)
       return constantFoldCanonicalize(Ty, Call, U);
 
-#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
+#if defined(HAS_IEE754_FLOAT128)
     if (Ty->isFP128Ty()) {
       if (IntrinsicID == Intrinsic::log) {
-        float128 Result = logf128(Op->getValueAPF().convertToQuad());
+        APFloat Value = Op->getValueAPF();
+        if (!Value.isValidIEEEQuad())
+          return nullptr;
+        APInt api = Value.bitcastToAPInt();
+        __uint128_t Int128 =
+            ((__uint128_t)api.getWord(64) << 64) + api.getWord(0);
+        float128 Result = logf128(llvm::bit_cast<float128>(Int128));
         return GetConstantFoldFPValue128(Result, Ty);
       }
 
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 7f68c5ab9b7cf7..2ddf99f56f88d5 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -3749,15 +3749,6 @@ double IEEEFloat::convertToDouble() const {
   return api.bitsToDouble();
 }
 
-#ifdef HAS_IEE754_FLOAT128
-float128 IEEEFloat::convertToQuad() const {
-  assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
-         "Float semantics are not IEEEquads");
-  APInt api = bitcastToAPInt();
-  return api.bitsToQuad();
-}
-#endif
-
 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
 /// does not support these bit patterns:
 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
@@ -5406,20 +5397,9 @@ double APFloat::convertToDouble() const {
   return Temp.getIEEE().convertToDouble();
 }
 
-#ifdef HAS_IEE754_FLOAT128
-float128 APFloat::convertToQuad() const {
-  if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
-    return getIEEE().convertToQuad();
-  assert(getSemantics().isRepresentableBy(semIEEEquad) &&
-         "Float semantics is not representable by IEEEquad");
-  APFloat Temp = *this;
-  bool LosesInfo;
-  opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
-  assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
-  (void)St;
-  return Temp.getIEEE().convertToQuad();
+bool APFloat::isValidIEEEQuad() const {
+  return (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad);
 }
-#endif
 
 float APFloat::convertToFloat() const {
   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)

>From 77bae1235c65ac90ecc15544b18c496812bc25a0 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Tue, 20 Aug 2024 14:36:44 +0000
Subject: [PATCH 2/4] Use Api.extractBitsAsZExtValue

---
 llvm/include/llvm/ADT/APInt.h         | 12 ++++++------
 llvm/lib/Analysis/ConstantFolding.cpp | 10 +++++++---
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 62e41ce88710b1..68e79ace3f78bf 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1875,12 +1875,6 @@ class [[nodiscard]] APInt {
   /// Returns whether this instance allocated memory.
   bool needsCleanup() const { return !isSingleWord(); }
 
-  /// Get the word corresponding to a bit position
-  /// \returns the corresponding word for the specified bit position.
-  uint64_t getWord(unsigned bitPosition) const {
-    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
-  }
-
 private:
   /// This union is used to store the integer value. When the
   /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
@@ -1946,6 +1940,12 @@ class [[nodiscard]] APInt {
     return *this;
   }
 
+  /// Get the word corresponding to a bit position
+  /// \returns the corresponding word for the specified bit position.
+  uint64_t getWord(unsigned bitPosition) const {
+    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
+  }
+
   /// Utility method to change the bit width of this APInt to new bit width,
   /// allocating and/or deallocating as necessary. There is no guarantee on the
   /// value of any bits upon return. Caller should populate the bits after.
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index b3bea0722f7530..48c8e5d70d8d93 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1791,7 +1791,9 @@ Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
     return nullptr;
 
   APInt Api = V.bitcastToAPInt();
-  __uint128_t Int128 = ((__uint128_t)Api.getWord(64) << 64) + Api.getWord(0);
+  ;
+  __uint128_t Int128 = ((__uint128_t)Api.extractBitsAsZExtValue(64, 64) << 64) +
+                       Api.extractBitsAsZExtValue(64, 0);
   float128 Result = NativeFP(llvm::bit_cast<float128>(Int128));
 
   if (llvm_fenv_testexcept()) {
@@ -2128,9 +2130,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
         APFloat Value = Op->getValueAPF();
         if (!Value.isValidIEEEQuad())
           return nullptr;
-        APInt api = Value.bitcastToAPInt();
+        APInt Api = Value.bitcastToAPInt();
         __uint128_t Int128 =
-            ((__uint128_t)api.getWord(64) << 64) + api.getWord(0);
+            ((__uint128_t)Api.extractBitsAsZExtValue(64, 64) << 64) +
+            Api.extractBitsAsZExtValue(64, 0);
+
         float128 Result = logf128(llvm::bit_cast<float128>(Int128));
         return GetConstantFoldFPValue128(Result, Ty);
       }

>From b45d146b32f5960e81ce7e1699a59478773eab85 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Tue, 20 Aug 2024 14:40:23 +0000
Subject: [PATCH 3/4] Remove stray semi-colon

---
 llvm/lib/Analysis/ConstantFolding.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 48c8e5d70d8d93..ba7203a452b3f7 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1791,7 +1791,6 @@ Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
     return nullptr;
 
   APInt Api = V.bitcastToAPInt();
-  ;
   __uint128_t Int128 = ((__uint128_t)Api.extractBitsAsZExtValue(64, 64) << 64) +
                        Api.extractBitsAsZExtValue(64, 0);
   float128 Result = NativeFP(llvm::bit_cast<float128>(Int128));

>From 5961bc5e90338848f7c2bc2fbaa15c0d3a555adc Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Wed, 21 Aug 2024 11:50:56 +0000
Subject: [PATCH 4/4] Move common code to ConvertToQuad

---
 llvm/lib/Analysis/ConstantFolding.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index ba7203a452b3f7..81c4d4ec5be412 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1782,18 +1782,24 @@ Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
   return GetConstantFoldFPValue(Result, Ty);
 }
 
+#if defined(HAS_IEE754_FLOAT128)
+float128 ConvertToQuad(const APFloat &Apf) {
+  APInt Api = Apf.bitcastToAPInt();
+  __uint128_t Uint128 =
+      ((__uint128_t)Api.extractBitsAsZExtValue(64, 64) << 64) +
+      Api.extractBitsAsZExtValue(64, 0);
+  return llvm::bit_cast<float128>(Uint128);
+}
+#endif
+
 #if defined(HAS_IEE754_FLOAT128)
 Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
                             Type *Ty) {
   llvm_fenv_clearexcept();
-
   if (!V.isValidIEEEQuad())
     return nullptr;
 
-  APInt Api = V.bitcastToAPInt();
-  __uint128_t Int128 = ((__uint128_t)Api.extractBitsAsZExtValue(64, 64) << 64) +
-                       Api.extractBitsAsZExtValue(64, 0);
-  float128 Result = NativeFP(llvm::bit_cast<float128>(Int128));
+  float128 Result = NativeFP(ConvertToQuad(V));
 
   if (llvm_fenv_testexcept()) {
     llvm_fenv_clearexcept();
@@ -2129,15 +2135,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
         APFloat Value = Op->getValueAPF();
         if (!Value.isValidIEEEQuad())
           return nullptr;
-        APInt Api = Value.bitcastToAPInt();
-        __uint128_t Int128 =
-            ((__uint128_t)Api.extractBitsAsZExtValue(64, 64) << 64) +
-            Api.extractBitsAsZExtValue(64, 0);
 
-        float128 Result = logf128(llvm::bit_cast<float128>(Int128));
+        float128 Result = logf128(ConvertToQuad(Value));
         return GetConstantFoldFPValue128(Result, Ty);
       }
-
       LibFunc Fp128Func = NotLibFunc;
       if (TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) &&
           Fp128Func == LibFunc_logl)