[llvm] WIP/POC: Constant Fold Logf128 calls (PR #84501)

Fri Mar 8 07:32:33 PST 2024

https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/84501

This is a proof of concept/work in progress patch.

This patch enables ConstantFolding of log FP128 calls.

This is achieved by querying with CMake if the host system has the logf128 symbol available. If so, replace the runtime call with the compile time constant returned from logf128.

This approach could be considered controversial as cross-compiled llvm executables using shared objects may not have the logf128 symbol available at runtime.

The implementation of logf128 may also yield different results on different targets, such as x86 using fp80 precision instead of the full fp128 range on other targets.

This approach relies on unit tests, as more commonplace Clang/C tests and opt/llc/IR tests are not applicable since they are ignorant to the result of the compile time CMake check.

>From 2e49b54123fed92e1a2188989a696310c9c4a725 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 26 Feb 2024 15:32:09 +0000
Subject: [PATCH] WIP/POC: Constant Fold Logf128 calls

This is a proof of concept/work in progress patch.

This patch enables ConstantFolding of log FP128 calls.

This is achieved by querying with CMake if the host
system has the logf128 symbol available. If so, replace
the runtime call with the compile time constant returned
from logf128.

This approach could be considered controversial as cross-compiled
llvm executables using shared objects may not have the logf128 symbol
available at runtime.

The implementation of logf128 may also yield different results
on different targets, such as x86 using fp80 precision instead
of the full fp128 range on other targets.

This approach relies on unit tests, as more commonplace Clang/C
tests and opt/llc/IR tests are not applicable since they
are ignorant to the result of the compile time CMake check.
---
 llvm/include/llvm/ADT/APFloat.h             | 10 +++
 llvm/include/llvm/ADT/APInt.h               | 15 +++++
 llvm/include/llvm/IR/Constants.h            |  2 +
 llvm/lib/Analysis/CMakeLists.txt            |  6 ++
 llvm/lib/Analysis/ConstantFolding.cpp       | 20 +++++-
 llvm/lib/IR/Constants.cpp                   | 16 +++++
 llvm/lib/Support/APFloat.cpp                | 24 +++++++
 llvm/unittests/Analysis/CMakeLists.txt      |  7 +++
 llvm/unittests/Analysis/ConstantLogf128.cpp | 70 +++++++++++++++++++++
 9 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 llvm/unittests/Analysis/ConstantLogf128.cpp

diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8c247bbcec90a2..1ce50fc6996683 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -299,6 +299,7 @@ class IEEEFloat final : public APFloatBase {
   IEEEFloat(const fltSemantics &, integerPart);
   IEEEFloat(const fltSemantics &, uninitializedTag);
   IEEEFloat(const fltSemantics &, const APInt &);
+  explicit IEEEFloat(long double ld);
   explicit IEEEFloat(double d);
   explicit IEEEFloat(float f);
   IEEEFloat(const IEEEFloat &);
@@ -354,6 +355,7 @@ class IEEEFloat final : public APFloatBase {
   Expected<opStatus> convertFromString(StringRef, roundingMode);
   APInt bitcastToAPInt() const;
   double convertToDouble() const;
+  long double convertToQuad() const;
   float convertToFloat() const;
 
   /// @}
@@ -942,6 +944,7 @@ class APFloat : public APFloatBase {
   APFloat(const fltSemantics &Semantics, uninitializedTag)
       : U(Semantics, uninitialized) {}
   APFloat(const fltSemantics &Semantics, const APInt &I) : U(Semantics, I) {}
+  explicit APFloat(long double ld) : U(IEEEFloat(ld), IEEEquad()) {}
   explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble()) {}
   explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle()) {}
   APFloat(const APFloat &RHS) = default;
@@ -1218,6 +1221,13 @@ class APFloat : public APFloatBase {
   /// shorter semantics, like IEEEsingle and others.
   double convertToDouble() const;
 
+  /// Converts this APFloat to host float value.
+  ///
+  /// \pre The APFloat must be built using semantics, that can be represented by
+  /// the host float type without loss of precision. It can be IEEEquad and
+  /// shorter semantics, like IEEEdouble and others.
+  long double convertToQuad() const;
+
   /// Converts this APFloat to host float value.
   ///
   /// \pre The APFloat must be built using semantics, that can be represented by
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 6c05367cecb1ea..8b74d906556894 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1663,6 +1663,11 @@ class [[nodiscard]] APInt {
   /// any bit width. Exactly 64 bits will be translated.
   double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); }
 
+  long double bitsToQuad() const {
+    __uint128_t ul = ((__uint128_t)U.pVal[1] << 64) + U.pVal[0];
+    return llvm::bit_cast<long double>(ul);
+  }
+
   /// Converts APInt bits to a float
   ///
   /// The conversion does not do a translation from integer to float, it just
@@ -1688,6 +1693,16 @@ class [[nodiscard]] APInt {
     return APInt(sizeof(float) * CHAR_BIT, llvm::bit_cast<uint32_t>(V));
   }
 
+  static APInt longDoubleToBits(long double V){
+    assert(sizeof(long double) == 16 && "Expected 16 byte long double");
+
+    const uint64_t Words[2] = {
+      static_cast<uint64_t>(V),
+      static_cast<uint64_t>(llvm::bit_cast<__uint128_t>(V) >> 64),
+    };
+    return APInt(sizeof(long double) * CHAR_BIT, 2, Words);
+  }
+
   /// @}
   /// \name Mathematics Operations
   /// @{
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index c0ac9a4aa6750c..782359f5445277 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -289,6 +289,8 @@ class ConstantFP final : public ConstantData {
   /// host double and as the target format.
   static Constant *get(Type *Ty, double V);
 
+  static Constant *get128(Type *Ty, long double V);
+
   /// If Ty is a vector type, return a Constant with a splat of the given
   /// value. Otherwise return a ConstantFP for the given value.
   static Constant *get(Type *Ty, const APFloat &V);
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 35ea03f42f82b1..4473e888afa979 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -161,3 +161,9 @@ add_llvm_component_library(LLVMAnalysis
   Support
   TargetParser
   )
+
+include(CheckCXXSymbolExists)
+check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
+if(HAS_LOGF128)
+ target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
+endif()
\ No newline at end of file
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 8b7031e7fe4a6f..12199b7461c48e 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1680,7 +1680,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case 'l':
     return Name == "log" || Name == "logf" ||
            Name == "log2" || Name == "log2f" ||
-           Name == "log10" || Name == "log10f";
+           Name == "log10" || Name == "log10f" ||
+           Name == "logl";
   case 'n':
     return Name == "nearbyint" || Name == "nearbyintf";
   case 'p':
@@ -1763,6 +1764,15 @@ inline bool llvm_fenv_testexcept() {
   return false;
 }
 
+Constant *ConstantFoldLogf128(const APFloat &V, Type *Ty){
+  #ifdef HAS_LOGF128
+    long double l = logf128(V.convertToQuad());
+    return ConstantFP::get128(Ty, l);
+  #else
+    return nullptr;
+  #endif
+}
+
 Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
                          Type *Ty) {
   llvm_fenv_clearexcept();
@@ -2094,7 +2104,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     if (IntrinsicID == Intrinsic::canonicalize)
       return constantFoldCanonicalize(Ty, Call, U);
 
-    if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+    if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isFP128Ty())
       return nullptr;
 
     // Use internal versions of these intrinsics.
@@ -2209,6 +2219,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     switch (IntrinsicID) {
       default: break;
       case Intrinsic::log:
+        if (Ty->isFP128Ty())
+          return ConstantFoldLogf128(APF, Ty);
         return ConstantFoldFP(log, APF, Ty);
       case Intrinsic::log2:
         // TODO: What about hosts that lack a C99 library?
@@ -2338,6 +2350,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
         return ConstantFoldFP(log, APF, Ty);
       break;
+    case LibFunc_logl:
+      if (!APF.isNegative() && !APF.isZero() && TLI->has(Func) && Ty->isFP128Ty())
+        return ConstantFoldLogf128(APF, Ty);
+      break;
     case LibFunc_log2:
     case LibFunc_log2f:
     case LibFunc_log2_finite:
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index e6b92aad392f66..8482fee86c0ac3 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -976,6 +976,22 @@ Constant *ConstantFP::get(Type *Ty, double V) {
   return C;
 }
 
+Constant *ConstantFP::get128(Type *Ty, long double V) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(V);
+  bool ignored;
+  FV.convert(Ty->getScalarType()->getFltSemantics(),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getElementCount(), C);
+
+  return C;
+}
+
 Constant *ConstantFP::get(Type *Ty, const APFloat &V) {
   ConstantFP *C = get(Ty->getContext(), V);
   assert(C->getType() == Ty->getScalarType() &&
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 0a4f5ac01553f1..b231906fee0761 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -3670,6 +3670,13 @@ double IEEEFloat::convertToDouble() const {
   return api.bitsToDouble();
 }
 
+long double IEEEFloat::convertToQuad() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEquad &&
+         "Float semantics are not IEEEquads");
+  APInt api = bitcastToAPInt();
+  return api.bitsToQuad();
+}
+
 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
 /// does not support these bit patterns:
 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
@@ -3958,6 +3965,10 @@ IEEEFloat::IEEEFloat(double d) {
   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
 }
 
+IEEEFloat::IEEEFloat(long double ld) {
+  initFromAPInt(&semIEEEquad, APInt::longDoubleToBits(ld));
+}
+
 namespace {
   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
     Buffer.append(Str.begin(), Str.end());
@@ -5265,6 +5276,19 @@ double APFloat::convertToDouble() const {
   return Temp.getIEEE().convertToDouble();
 }
 
+long double APFloat::convertToQuad() const {
+  if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
+    return getIEEE().convertToQuad();
+  assert(getSemantics().isRepresentableBy(semIEEEquad) &&
+         "Float semantics is not representable by IEEEquad");
+  APFloat Temp = *this;
+  bool LosesInfo;
+  opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
+  assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
+  (void)St;
+  return Temp.getIEEE().convertToQuad();
+}
+
 float APFloat::convertToFloat() const {
   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
     return getIEEE().convertToFloat();
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index b1aeaa6e71fd4c..796a31cc216812 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -51,6 +51,7 @@ set(ANALYSIS_TEST_SOURCES
   ValueLatticeTest.cpp
   ValueTrackingTest.cpp
   VectorUtilsTest.cpp
+  ConstantLogf128.cpp
   )
 
 set(MLGO_TESTS TFUtilsTest.cpp)
@@ -80,5 +81,11 @@ if(NOT WIN32)
   export_executable_symbols_for_plugins(AnalysisTests)
 endif()
 
+include(CheckCXXSymbolExists)
+check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
+if(HAS_LOGF128)
+  target_compile_definitions(AnalysisTests PRIVATE HAS_LOGF128)
+endif()
+
 add_subdirectory(InlineAdvisorPlugin)
 add_subdirectory(InlineOrderPlugin)
diff --git a/llvm/unittests/Analysis/ConstantLogf128.cpp b/llvm/unittests/Analysis/ConstantLogf128.cpp
new file mode 100644
index 00000000000000..bf0f447275c4a5
--- /dev/null
+++ b/llvm/unittests/Analysis/ConstantLogf128.cpp
@@ -0,0 +1,70 @@
+//===- unittests/CodeGen/BufferSourceTest.cpp - MemoryBuffer source tests -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ConstantFoldLogf128Fixture :public ::testing ::TestWithParam<std::string>{
+protected:
+  std::string FuncName;
+};
+
+TEST_P(ConstantFoldLogf128Fixture, ConstantFoldLogf128) {
+  LLVMContext Context;
+  IRBuilder<> Builder(Context);
+  Module MainModule("Logf128TestModule", Context);
+  MainModule.setTargetTriple("aarch64-unknown-linux");
+
+  Type* FP128Ty = Type::getFP128Ty(Context);
+  FunctionType* FP128Prototype = FunctionType::get(FP128Ty, false);
+  Function* Logf128TestFunction = Function::Create(FP128Prototype, Function::ExternalLinkage, "logf128test", MainModule);
+  BasicBlock *EntryBlock = BasicBlock::Create(Context, "entry", Logf128TestFunction);
+  Builder.SetInsertPoint(EntryBlock);
+
+  FunctionType* FP128FP128Prototype = FunctionType::get(FP128Ty, {FP128Ty}, false);
+  Constant* Constant2L = ConstantFP::get128(FP128Ty, 2.0L);
+
+  std::string FunctionName = GetParam();
+  Function* Logl = Function::Create(FP128FP128Prototype, Function::ExternalLinkage, FunctionName, MainModule);
+  CallInst* LoglCall = Builder.CreateCall(Logl, Constant2L);
+
+  TargetLibraryInfoImpl TLII(Triple(MainModule.getTargetTriple()));
+  TargetLibraryInfo TLI(TLII, Logf128TestFunction);
+  Constant* FoldResult = ConstantFoldCall(LoglCall, Logl, Constant2L, &TLI);
+
+#ifndef HAS_LOGF128
+  ASSERT_TRUE(FoldResult == nullptr);
+#else
+  auto ConstantLog = dyn_cast<ConstantFP>(FoldResult);
+  ASSERT_TRUE(ConstantLog);
+
+  APFloat APF = ConstantLog->getValueAPF();
+  char LongDoubleHexString[0xFF];
+  unsigned Size = APF.convertToHexString(LongDoubleHexString, 32, true, APFloatBase::roundingMode::NearestTiesToAway);
+  EXPECT_GT(Size, 0U);
+
+  ASSERT_STREQ(LongDoubleHexString, std::string("0X1.62E42FEFA39E0000000000000000000P-1").c_str());
+#endif
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  ConstantFoldLogf128,
+  ConstantFoldLogf128Fixture,
+  ::testing::Values("logl", "llvm.log.f128")
+);
+
+
+} // end anonymous namespace