[flang-commits] [flang] [flang] IEEE underflow control for Arm (PR #124807)

Tue Jan 28 10:02:59 PST 2025

https://github.com/vdonaldson created https://github.com/llvm/llvm-project/pull/124807

Update IEEE_SUPPORT_UNDERFLOW_CONTROL, IEEE_GET_UNDERFLOW_MODE, and IEEE_SET_UNDERFLOW_MODE code for Arm.

>From 15c630735fb9abfedae239d7649aefb15f4d94c7 Mon Sep 17 00:00:00 2001
From: V Donaldson <vdonaldson at nvidia.com>
Date: Tue, 28 Jan 2025 10:00:19 -0800
Subject: [PATCH] [flang] IEEE underflow control for Arm

Update IEEE_SUPPORT_UNDERFLOW_CONTROL, IEEE_GET_UNDERFLOW_MODE, and
IEEE_SET_UNDERFLOW_MODE code for Arm.
---
 flang/include/flang/Tools/TargetSetup.h | 31 ++++++++++-----------
 flang/runtime/exceptions.cpp            | 36 ++++++++++++++++++++-----
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/flang/include/flang/Tools/TargetSetup.h b/flang/include/flang/Tools/TargetSetup.h
index d1b0da3a42c897..5d23df6823a948 100644
--- a/flang/include/flang/Tools/TargetSetup.h
+++ b/flang/include/flang/Tools/TargetSetup.h
@@ -24,34 +24,35 @@ namespace Fortran::tools {
     const std::string &compilerVersion, const std::string &compilerOptions) {
 
   const llvm::Triple &targetTriple{targetMachine.getTargetTriple()};
-  // FIXME: Handle real(3) ?
-  if (targetTriple.getArch() != llvm::Triple::ArchType::x86_64) {
-    targetCharacteristics.DisableType(
-        Fortran::common::TypeCategory::Real, /*kind=*/10);
-  }
+
+  targetCharacteristics.set_ieeeFeature(evaluate::IeeeFeature::Halting, true);
+
   if (targetTriple.getArch() == llvm::Triple::ArchType::x86_64) {
     targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/3);
     targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/4);
     targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/8);
   }
+
   if (targetTriple.isARM() || targetTriple.isAArch64()) {
     targetCharacteristics.set_haltingSupportIsUnknownAtCompileTime();
     targetCharacteristics.set_ieeeFeature(
         evaluate::IeeeFeature::Halting, false);
-  } else {
-    targetCharacteristics.set_ieeeFeature(evaluate::IeeeFeature::Halting);
+    targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/3);
+    targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/4);
+    targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/8);
+  }
+
+  if (targetTriple.getArch() != llvm::Triple::ArchType::x86_64) {
+    targetCharacteristics.DisableType(
+        Fortran::common::TypeCategory::Real, /*kind=*/10);
   }
 
-  // Figure out if we can support F128: see
-  // flang/runtime/Float128Math/math-entries.h
-  // TODO: this should be taken from TargetInfo::getLongDoubleFormat to support
-  // cross-compilation
+  // Check for kind=16 support. See flang/runtime/Float128Math/math-entries.h.
+  // TODO: Take this from TargetInfo::getLongDoubleFormat for cross compilation.
 #ifdef FLANG_RUNTIME_F128_MATH_LIB
-  // we can use libquadmath wrappers
-  constexpr bool f128Support = true;
+  constexpr bool f128Support = true; // use libquadmath wrappers
 #elif HAS_LDBL128
-  // we can use libm wrappers
-  constexpr bool f128Support = true;
+  constexpr bool f128Support = true; // use libm wrappers
 #else
   constexpr bool f128Support = false;
 #endif
diff --git a/flang/runtime/exceptions.cpp b/flang/runtime/exceptions.cpp
index f541b8e844aded..b41c7cf438f569 100644
--- a/flang/runtime/exceptions.cpp
+++ b/flang/runtime/exceptions.cpp
@@ -11,7 +11,9 @@
 #include "flang/Runtime/exceptions.h"
 #include "terminator.h"
 #include <cfenv>
-#if __x86_64__
+#if defined(__aarch64__) && !defined(_WIN32)
+#include <fpu_control.h>
+#elif defined(__x86_64__)
 #include <xmmintrin.h>
 #endif
 
@@ -90,20 +92,40 @@ bool RTNAME(SupportHalting)([[maybe_unused]] uint32_t except) {
 #endif
 }
 
+// A hardware FZ (flush to zero) bit is the negation of the
+// ieee_[get|set]_underflow_mode GRADUAL argument.
+#if defined(_MM_FLUSH_ZERO_MASK)
+// The x86_64 MXCSR FZ bit affects computations of real kinds 3, 4, and 8.
+#elif defined(_FPU_GETCW)
+// The aarch64 FPCR FZ bit affects computations of real kinds 3, 4, and 8.
+// bit 24: FZ   -- single, double precision flush to zero bit
+// bit 19: FZ16 -- half precision flush to zero bit [not currently relevant]
+#define _FPU_FPCR_FZ_MASK_ 0x01080000
+#endif
+
 bool RTNAME(GetUnderflowMode)(void) {
-#if _MM_FLUSH_ZERO_MASK
-  // The MXCSR Flush to Zero flag is the negation of the ieee_get_underflow_mode
-  // GRADUAL argument. It affects real computations of kinds 3, 4, and 8.
+#if defined(_MM_FLUSH_ZERO_MASK)
   return _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_OFF;
+#elif defined(_FPU_GETCW)
+  uint64_t fpcr;
+  _FPU_GETCW(fpcr);
+  return (fpcr & _FPU_FPCR_FZ_MASK_) == 0;
 #else
   return false;
 #endif
 }
 void RTNAME(SetUnderflowMode)(bool flag) {
-#if _MM_FLUSH_ZERO_MASK
-  // The MXCSR Flush to Zero flag is the negation of the ieee_set_underflow_mode
-  // GRADUAL argument. It affects real computations of kinds 3, 4, and 8.
+#if defined(_MM_FLUSH_ZERO_MASK)
   _MM_SET_FLUSH_ZERO_MODE(flag ? _MM_FLUSH_ZERO_OFF : _MM_FLUSH_ZERO_ON);
+#elif defined(_FPU_GETCW)
+  uint64_t fpcr;
+  _FPU_GETCW(fpcr);
+  if (flag) {
+    fpcr &= ~_FPU_FPCR_FZ_MASK_;
+  } else {
+    fpcr |= _FPU_FPCR_FZ_MASK_;
+  }
+  _FPU_SETCW(fpcr);
 #endif
 }