[flang-commits] [flang] Reland "[flang] Avoid undefined behaviour when parsing format expressions (#147539)" (PR #148169)

Fri Jul 11 06:09:04 PDT 2025

https://github.com/DavidSpickett updated https://github.com/llvm/llvm-project/pull/148169

>From 1eac80b12fa0c2ca5c68132795b6d2f26e376855 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Wed, 9 Jul 2025 14:53:12 +0000
Subject: [PATCH 1/6] Reland "[flang] Avoid undefined behaviour when parsing
 format expressions (#147539)"

This reverts commit e8e5d07767c444913f837dd35846a92fcf520eab.

This previously failed because the flang-rt build could not find the
llvm header file. It passed on some machines but only because they
had globally installed copies of older llvm.

To fix this, I've copied the required routines from llvm into flang.

With the following justification:
* Flang can, and does, use llvm headers.
* Some Flang headers are also used in Flang-rt.
* Flang-rt cannot use llvm headers.
* Therefore any Flang header using in Flang-rt cannot use llvm headers either.

To support that conclusion, https://flang.llvm.org/docs/IORuntimeInternals.html
states:
"The Fortran I/O runtime support library is written in C++17, and uses some C++17 standard library facilities, but it is intended to not have any link-time dependences on the C++ runtime support library or any LLVM libraries."

This talks about libraries but I assume it applies to llvm in general.

Nothing in flang/include/flang/Common, or flang/include/flang/Common
includes any llvm header, and I see some very similar headers there
that duplicate llvm functionality. Like float128.h.

I can only assume this means these files must remain free of dependencies
on LLVM.

I have copied the two routines literally and put them in the flang::common
namespace, for lack of a better place for them. So they don't clash with something.

I have specialised the function to the 1 type flang needs, as it might
save a bit of compile time.
---
 flang/include/flang/Common/format.h | 75 +++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h
index 1650f56140b4d..78ddb1de2fa26 100644
--- a/flang/include/flang/Common/format.h
+++ b/flang/include/flang/Common/format.h
@@ -12,6 +12,7 @@
 #include "Fortran-consts.h"
 #include "enum-set.h"
 #include <cstring>
+#include <limits>
 
 // Define a FormatValidator class template to validate a format expression
 // of a given CHAR type.  To enable use in runtime library code as well as
@@ -28,6 +29,68 @@
 
 namespace Fortran::common {
 
+// AddOverflow and MulOverflow are copied from
+// llvm/include/llvm/Support/MathExtras.h. It is ok to include llvm headers in
+// flang, but this flang header is also used in flang-rt, which cannot use llvm
+// headers.
+
+/// Add two signed integers, computing the two's complement truncated result,
+/// returning true if overflow occurred.
+template <typename T>
+std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) {
+#if __has_builtin(__builtin_add_overflow)
+  return __builtin_add_overflow(X, Y, &Result);
+#else
+  // Perform the unsigned addition.
+  using U = std::make_unsigned_t<T>;
+  const U UX = static_cast<U>(X);
+  const U UY = static_cast<U>(Y);
+  const U UResult = UX + UY;
+
+  // Convert to signed.
+  Result = static_cast<T>(UResult);
+
+  // Adding two positive numbers should result in a positive number.
+  if (X > 0 && Y > 0)
+    return Result <= 0;
+  // Adding two negatives should result in a negative number.
+  if (X < 0 && Y < 0)
+    return Result >= 0;
+  return false;
+#endif
+}
+
+/// Multiply two signed integers, computing the two's complement truncated
+/// result, returning true if an overflow occurred.
+template <typename T>
+std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
+#if __has_builtin(__builtin_mul_overflow)
+  return __builtin_mul_overflow(X, Y, &Result);
+#else
+  // Perform the unsigned multiplication on absolute values.
+  using U = std::make_unsigned_t<T>;
+  const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
+  const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
+  const U UResult = UX * UY;
+
+  // Convert to signed.
+  const bool IsNegative = (X < 0) ^ (Y < 0);
+  Result = IsNegative ? (0 - UResult) : UResult;
+
+  // If any of the args was 0, result is 0 and no overflow occurs.
+  if (UX == 0 || UY == 0)
+    return false;
+
+  // UX and UY are in [1, 2^n], where n is the number of digits.
+  // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
+  // positive) divided by an argument compares to the other.
+  if (IsNegative)
+    return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
+  else
+    return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
+#endif
+}
+
 struct FormatMessage {
   const char *text; // message text; may have one %s argument
   const char *arg; // optional %s argument value
@@ -214,16 +277,18 @@ template <typename CHAR> void FormatValidator<CHAR>::NextToken() {
   case '7':
   case '8':
   case '9': {
-    int64_t lastValue;
     const CHAR *lastCursor;
     integerValue_ = 0;
     bool overflow{false};
     do {
-      lastValue = integerValue_;
       lastCursor = cursor_;
-      integerValue_ = 10 * integerValue_ + c - '0';
-      if (lastValue > integerValue_) {
-        overflow = true;
+      if (!overflow) {
+        overflow =
+            MulOverflow(static_cast<int64_t>(10), integerValue_, integerValue_);
+      }
+      if (!overflow) {
+        overflow = AddOverflow(
+            integerValue_, static_cast<int64_t>(c - '0'), integerValue_);
       }
       c = NextChar();
     } while (c >= '0' && c <= '9');

>From aa10d5dfe83482f41aa306741c8c6756fe913753 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Fri, 11 Jul 2025 08:20:44 +0000
Subject: [PATCH 2/6] manually specialise template

---
 flang/include/flang/Common/format.h | 30 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h
index 78ddb1de2fa26..3c70aea1ec721 100644
--- a/flang/include/flang/Common/format.h
+++ b/flang/include/flang/Common/format.h
@@ -30,25 +30,21 @@
 namespace Fortran::common {
 
 // AddOverflow and MulOverflow are copied from
-// llvm/include/llvm/Support/MathExtras.h. It is ok to include llvm headers in
-// flang, but this flang header is also used in flang-rt, which cannot use llvm
-// headers.
+// llvm/include/llvm/Support/MathExtras.h and specialised to int64_t.
 
 /// Add two signed integers, computing the two's complement truncated result,
 /// returning true if overflow occurred.
-template <typename T>
-std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) {
+bool AddOverflow(int64_t X, int64_t Y, int64_t &Result) {
 #if __has_builtin(__builtin_add_overflow)
   return __builtin_add_overflow(X, Y, &Result);
 #else
   // Perform the unsigned addition.
-  using U = std::make_unsigned_t<T>;
-  const U UX = static_cast<U>(X);
-  const U UY = static_cast<U>(Y);
-  const U UResult = UX + UY;
+  const uint64_t UX = static_cast<uint64_t>(X);
+  const uint64_t UY = static_cast<uint64_t>(Y);
+  const uint64_t UResult = UX + UY;
 
   // Convert to signed.
-  Result = static_cast<T>(UResult);
+  Result = static_cast<int64_t>(UResult);
 
   // Adding two positive numbers should result in a positive number.
   if (X > 0 && Y > 0)
@@ -62,16 +58,14 @@ std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) {
 
 /// Multiply two signed integers, computing the two's complement truncated
 /// result, returning true if an overflow occurred.
-template <typename T>
-std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
+int64_t MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
 #if __has_builtin(__builtin_mul_overflow)
   return __builtin_mul_overflow(X, Y, &Result);
 #else
   // Perform the unsigned multiplication on absolute values.
-  using U = std::make_unsigned_t<T>;
-  const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
-  const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
-  const U UResult = UX * UY;
+  const uint64_t UX = X < 0 ? (0 - static_cast<uint64_t>(X)) : static_cast<uint64_t>(X);
+  const uint64_t UY = Y < 0 ? (0 - static_cast<uint64_t>(Y)) : static_cast<uint64_t>(Y);
+  const uint64_t UResult = UX * UY;
 
   // Convert to signed.
   const bool IsNegative = (X < 0) ^ (Y < 0);
@@ -85,9 +79,9 @@ std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
   // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
   // positive) divided by an argument compares to the other.
   if (IsNegative)
-    return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
+    return UX > (static_cast<uint64_t>(std::numeric_limits<T>::max()) + uint64_t(1)) / UY;
   else
-    return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
+    return UX > (static_cast<uint64_t>(std::numeric_limits<T>::max())) / UY;
 #endif
 }
 

>From 89009a28fadef67310f0310cc90086ef05afb859 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Fri, 11 Jul 2025 09:23:29 +0000
Subject: [PATCH 3/6] format

---
 flang/include/flang/Common/format.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h
index 3c70aea1ec721..a853a636bdbe0 100644
--- a/flang/include/flang/Common/format.h
+++ b/flang/include/flang/Common/format.h
@@ -63,8 +63,10 @@ int64_t MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
   return __builtin_mul_overflow(X, Y, &Result);
 #else
   // Perform the unsigned multiplication on absolute values.
-  const uint64_t UX = X < 0 ? (0 - static_cast<uint64_t>(X)) : static_cast<uint64_t>(X);
-  const uint64_t UY = Y < 0 ? (0 - static_cast<uint64_t>(Y)) : static_cast<uint64_t>(Y);
+  const uint64_t UX =
+      X < 0 ? (0 - static_cast<uint64_t>(X)) : static_cast<uint64_t>(X);
+  const uint64_t UY =
+      Y < 0 ? (0 - static_cast<uint64_t>(Y)) : static_cast<uint64_t>(Y);
   const uint64_t UResult = UX * UY;
 
   // Convert to signed.
@@ -79,7 +81,9 @@ int64_t MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
   // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
   // positive) divided by an argument compares to the other.
   if (IsNegative)
-    return UX > (static_cast<uint64_t>(std::numeric_limits<T>::max()) + uint64_t(1)) / UY;
+    return UX >
+        (static_cast<uint64_t>(std::numeric_limits<T>::max()) + uint64_t(1)) /
+        UY;
   else
     return UX > (static_cast<uint64_t>(std::numeric_limits<T>::max())) / UY;
 #endif

>From b7fc8a5fdbe9a66a775519f25425f9297741042a Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Fri, 11 Jul 2025 12:09:01 +0000
Subject: [PATCH 4/6] fix limit template

---
 flang/include/flang/Common/format.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h
index a853a636bdbe0..91d1db2484557 100644
--- a/flang/include/flang/Common/format.h
+++ b/flang/include/flang/Common/format.h
@@ -82,10 +82,10 @@ int64_t MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
   // positive) divided by an argument compares to the other.
   if (IsNegative)
     return UX >
-        (static_cast<uint64_t>(std::numeric_limits<T>::max()) + uint64_t(1)) /
+        (static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + uint64_t(1)) /
         UY;
   else
-    return UX > (static_cast<uint64_t>(std::numeric_limits<T>::max())) / UY;
+    return UX > (static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) / UY;
 #endif
 }
 

>From 4616aed4c157063dea6a561397a28388ea567804 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Fri, 11 Jul 2025 13:01:52 +0000
Subject: [PATCH 5/6] Make static

---
 flang/include/flang/Common/format.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h
index 91d1db2484557..ed12e3a5f6273 100644
--- a/flang/include/flang/Common/format.h
+++ b/flang/include/flang/Common/format.h
@@ -34,7 +34,7 @@ namespace Fortran::common {
 
 /// Add two signed integers, computing the two's complement truncated result,
 /// returning true if overflow occurred.
-bool AddOverflow(int64_t X, int64_t Y, int64_t &Result) {
+static inline bool AddOverflow(int64_t X, int64_t Y, int64_t &Result) {
 #if __has_builtin(__builtin_add_overflow)
   return __builtin_add_overflow(X, Y, &Result);
 #else
@@ -58,7 +58,7 @@ bool AddOverflow(int64_t X, int64_t Y, int64_t &Result) {
 
 /// Multiply two signed integers, computing the two's complement truncated
 /// result, returning true if an overflow occurred.
-int64_t MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
+static inline bool MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
 #if __has_builtin(__builtin_mul_overflow)
   return __builtin_mul_overflow(X, Y, &Result);
 #else

>From 3baaad3723767f29b3b434b71e6d2b080b2e38a5 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Fri, 11 Jul 2025 13:08:19 +0000
Subject: [PATCH 6/6] format

---
 flang/include/flang/Common/format.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h
index ed12e3a5f6273..8b4ffe63c4459 100644
--- a/flang/include/flang/Common/format.h
+++ b/flang/include/flang/Common/format.h
@@ -81,11 +81,12 @@ static inline bool MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
   // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
   // positive) divided by an argument compares to the other.
   if (IsNegative)
-    return UX >
-        (static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + uint64_t(1)) /
+    return UX > (static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) +
+                    uint64_t(1)) /
         UY;
   else
-    return UX > (static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) / UY;
+    return UX >
+        (static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) / UY;
 #endif
 }