[clang] [llvm] [clang] Use separator for large numeric values in overflow diagnostic (PR #80939)

Tue Feb 6 21:44:49 PST 2024

https://github.com/Atousa created https://github.com/llvm/llvm-project/pull/80939

Add functionality to APInt::toString() that allows it to insert
separators between groups of digits, using the C++ litteral
separator ' between groups.
    
Fixes issue #58228


>From 6dcb144b92ad2cd4198a53aae40f77d3eba3dbca Mon Sep 17 00:00:00 2001
From: Atousa Duprat <atousa.p at gmail.com>
Date: Tue, 6 Feb 2024 21:02:05 -0800
Subject: [PATCH 1/2] [clang] Use separator for large numeric values in
 overflow diagnostic

Add functionality to APInt::toString() that allows it to insert
separators between groups of digits, using the C++ litteral
separator ' between groups.

Fixes issue #58228
---
 clang/lib/AST/ExprConstant.cpp       |  3 ++-
 llvm/include/llvm/ADT/APInt.h        |  3 ++-
 llvm/include/llvm/ADT/StringExtras.h |  6 +++--
 llvm/lib/Support/APInt.cpp           | 24 ++++++++++++++++++-
 llvm/unittests/ADT/APIntTest.cpp     | 35 ++++++++++++++++++++++++++++
 5 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 089bc2094567f7..a46887d6cf1be0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2774,7 +2774,8 @@ static bool CheckedIntArithmetic(EvalInfo &Info, const Expr *E,
     if (Info.checkingForUndefinedBehavior())
       Info.Ctx.getDiagnostics().Report(E->getExprLoc(),
                                        diag::warn_integer_constant_overflow)
-          << toString(Result, 10) << E->getType() << E->getSourceRange();
+          << toString(Result, 10, Result.isSigned(), false, true, true)
+          << E->getType() << E->getSourceRange();
     return HandleOverflow(Info, E, Value, E->getType());
   }
   return true;
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 6f2f25548cc84b..0d696e7a6f981b 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1626,7 +1626,8 @@ class [[nodiscard]] APInt {
   /// SmallString. If Radix > 10, UpperCase determine the case of letter
   /// digits.
   void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
-                bool formatAsCLiteral = false, bool UpperCase = true) const;
+                bool formatAsCLiteral = false, bool UpperCase = true, 
+                bool insertSeparators = false) const;
 
   /// Considers the APInt to be unsigned and converts it into a string in the
   /// radix given. The radix can be 2, 8, 10 16, or 36.
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index a24368924bc90a..f96b2032f048c4 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -329,9 +329,11 @@ inline std::string itostr(int64_t X) {
 }
 
 inline std::string toString(const APInt &I, unsigned Radix, bool Signed,
-                            bool formatAsCLiteral = false) {
+                            bool formatAsCLiteral = false,
+                            bool upperCase = true,
+                            bool addSeparators = false) {
   SmallString<40> S;
-  I.toString(S, Radix, Signed, formatAsCLiteral);
+  I.toString(S, Radix, Signed, formatAsCLiteral, upperCase, addSeparators);
   return std::string(S);
 }
 
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 05b1526da95ff7..e7ca82a6c126d5 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -2161,7 +2161,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
 }
 
 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
-                     bool formatAsCLiteral, bool UpperCase) const {
+                     bool formatAsCLiteral, bool UpperCase, 
+                     bool insertSeparators) const {
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
           Radix == 36) &&
          "Radix should be 2, 8, 10, 16, or 36!");
@@ -2187,6 +2188,12 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
     }
   }
 
+  // Number of digits in a group between separators
+  int Grouping = 4;
+  if (Radix == 8 || Radix == 10) {
+    Grouping = 3;
+  }
+
   // First, check for a zero value and just short circuit the logic below.
   if (isZero()) {
     while (*Prefix) {
@@ -2223,9 +2230,14 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
       ++Prefix;
     };
 
+    int Pos = 0;
     while (N) {
+      if (insertSeparators && Pos%Grouping==0 && Pos > 0) {
+        *--BufPtr = '\'';
+      }
       *--BufPtr = Digits[N % Radix];
       N /= Radix;
+      Pos++;
     }
     Str.append(BufPtr, std::end(Buffer));
     return;
@@ -2257,17 +2269,27 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
     unsigned MaskAmt = Radix - 1;
 
+    int Pos = 0;
     while (Tmp.getBoolValue()) {
       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+      if (insertSeparators && Pos%Grouping==0 && Pos>0) {
+        Str.push_back('\'');
+      }
       Str.push_back(Digits[Digit]);
       Tmp.lshrInPlace(ShiftAmt);
+      Pos++;
     }
   } else {
+    int Pos = 0;
     while (Tmp.getBoolValue()) {
       uint64_t Digit;
       udivrem(Tmp, Radix, Tmp, Digit);
       assert(Digit < Radix && "divide failed");
+      if (insertSeparators && Pos%Grouping==0 && Pos>0) {
+        Str.push_back('\'');
+      }
       Str.push_back(Digits[Digit]);
+      Pos++;
     }
   }
 
diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp
index 3b909f8f7d14e2..843a7c447c413f 100644
--- a/llvm/unittests/ADT/APIntTest.cpp
+++ b/llvm/unittests/ADT/APIntTest.cpp
@@ -1379,6 +1379,23 @@ TEST(APIntTest, toString) {
   EXPECT_EQ(std::string(S), "0");
   S.clear();
 
+  // with separators
+  APInt(64, 140).toString(S, 2, false, true, false, true);
+  EXPECT_EQ(std::string(S), "0b1000'1100");
+  S.clear();
+  APInt(64, 1024).toString(S, 8, false, true, false, true);
+  EXPECT_EQ(std::string(S), "02'000");
+  S.clear();
+  APInt(64, 1000000).toString(S, 10, false, true, false, true);
+  EXPECT_EQ(std::string(S), "1'000'000");
+  S.clear();
+  APInt(64, 1000000).toString(S, 16, false, true, true, true);
+  EXPECT_EQ(std::string(S), "0xF'4240");
+  S.clear();
+  APInt(64, 1'000'000'000).toString(S, 36, false, false, false, true);
+  EXPECT_EQ(std::string(S), "gj'dgxs");
+  S.clear();
+
   isSigned = false;
   APInt(8, 255, isSigned).toString(S, 2, isSigned, true);
   EXPECT_EQ(std::string(S), "0b11111111");
@@ -1415,6 +1432,24 @@ TEST(APIntTest, toString) {
   APInt(8, 255, isSigned).toString(S, 36, isSigned, false);
   EXPECT_EQ(std::string(S), "-1");
   S.clear();
+
+  // negative with separators
+  APInt(64, -140, isSigned).toString(S, 2, isSigned, true, false, true);
+  EXPECT_EQ(std::string(S), "-0b1000'1100");
+  S.clear();
+  APInt(64, -1024, isSigned).toString(S, 8, isSigned, true, false, true);
+  EXPECT_EQ(std::string(S), "-02'000");
+  S.clear();
+  APInt(64, -1000000, isSigned).toString(S, 10, isSigned, true, false, true);
+  EXPECT_EQ(std::string(S), "-1'000'000");
+  S.clear();
+  APInt(64, -1000000, isSigned).toString(S, 16, isSigned, true, true, true);
+  EXPECT_EQ(std::string(S), "-0xF'4240");
+  S.clear();
+  APInt(64, -1'000'000'000, isSigned).toString(S, 36, isSigned, false, false, true);
+  EXPECT_EQ(std::string(S), "-gj'dgxs");
+  S.clear();
+
 }
 
 TEST(APIntTest, Log2) {

>From c3efdb00f58ef91e587d37b08d06982a81ccefc4 Mon Sep 17 00:00:00 2001
From: Atousa Duprat <atousa.p at gmail.com>
Date: Tue, 6 Feb 2024 21:20:18 -0800
Subject: [PATCH 2/2] [clang] Clarify integer overflow diagnostic

Compiling this simple program:

int main()
{
  uint32_t x2 =  10 * 1'000'000'000;
}

Gives the following misleading warning:
warning: overflow in expression; result is 1410065408 with type 'int'
   uint32_t x2 =  10 * 1'000'000'000;

With the fix applied, we get the more clear:
warning: overflow in expression; result is 10'000'000'000 with type 'int'
   uint32_t x2 =  10 * 1'000'000'000;

The wrapped value is in the range of an int, whereas the unwrapped value
is more clearly overflowed.
---
 clang/lib/AST/ExprConstant.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a46887d6cf1be0..b6c5d13c330b82 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2774,7 +2774,7 @@ static bool CheckedIntArithmetic(EvalInfo &Info, const Expr *E,
     if (Info.checkingForUndefinedBehavior())
       Info.Ctx.getDiagnostics().Report(E->getExprLoc(),
                                        diag::warn_integer_constant_overflow)
-          << toString(Result, 10, Result.isSigned(), false, true, true)
+          << toString(Value, 10, Value.isSigned(), false, true, true)
           << E->getType() << E->getSourceRange();
     return HandleOverflow(Info, E, Value, E->getType());
   }