[llvm] 8cba721 - Implement literal suffixes for _BitInt

Aaron Ballman via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 14 06:24:27 PDT 2022


Author: Aaron Ballman
Date: 2022-03-14T09:24:19-04:00
New Revision: 8cba72177dcd8de5d37177dbaf2347e5c1f0f1e8

URL: https://github.com/llvm/llvm-project/commit/8cba72177dcd8de5d37177dbaf2347e5c1f0f1e8
DIFF: https://github.com/llvm/llvm-project/commit/8cba72177dcd8de5d37177dbaf2347e5c1f0f1e8.diff

LOG: Implement literal suffixes for _BitInt

WG14 adopted N2775 (http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf)
at our Feb 2022 meeting. This paper adds a literal suffix for
bit-precise types that automatically sizes the bit-precise type to be
the smallest possible legal _BitInt type that can represent the literal
value. The suffix chosen is wb (for a signed bit-precise type) which
can be combined with the u suffix (for an unsigned bit-precise type).

The preprocessor continues to operate as-if all integer types were
intmax_t/uintmax_t, including bit-precise integer types. It is a
constraint violation if the bit-precise literal is too large to fit
within that type in the context of the preprocessor (when still using
a pp-number preprocessing token), but it is not a constraint violation
in other circumstances. This allows you to make bit-precise integer
literals that are wider than what the preprocessor currently supports
in order to initialize variables, etc.

Added: 
    clang/test/AST/bitint-suffix.c
    clang/test/Lexer/bitint-constants-compat.c
    clang/test/Lexer/bitint-constants.c

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/DiagnosticCommonKinds.td
    clang/include/clang/Lex/LiteralSupport.h
    clang/lib/AST/StmtPrinter.cpp
    clang/lib/Lex/LiteralSupport.cpp
    clang/lib/Lex/PPExpressions.cpp
    clang/lib/Sema/SemaExpr.cpp
    llvm/include/llvm/ADT/APInt.h
    llvm/lib/Support/APInt.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 53d07c03af0f9..2422c8f2cba7a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -111,6 +111,8 @@ C2x Feature Support
 
 - Implemented `WG14 N2674 The noreturn attribute <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2764.pdf>`_.
 - Implemented `WG14 N2935 Make false and true first-class language features <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2935.pdf>`_.
+- Implemented `WG14 N2763 Adding a fundamental type for N-bit integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2763.pdf>`_.
+- Implemented `WG14 N2775 Literal suffixes for bit-precise integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf>`_.
 
 C++ Language Changes in Clang
 -----------------------------

diff  --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 421527827a4bd..66defc1d8ca5f 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -207,6 +207,12 @@ def err_cxx2b_size_t_suffix: Error<
 def err_size_t_literal_too_large: Error<
   "%select{signed |}0'size_t' literal is out of range of possible "
   "%select{signed |}0'size_t' values">;
+def ext_c2x_bitint_suffix : ExtWarn<
+  "'_BitInt' suffix for literals is a C2x extension">,
+  InGroup<C2x>;
+def warn_c2x_compat_bitint_suffix : Warning<
+  "'_BitInt' suffix for literals is incompatible with C standards before C2x">,
+  InGroup<CPre2xCompat>, DefaultIgnore;
 def err_integer_literal_too_large : Error<
   "integer literal is too large to be represented in any %select{signed |}0"
   "integer type">;

diff  --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 32471969f5967..977963dcbbba0 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -69,10 +69,11 @@ class NumericLiteralParser {
   bool isImaginary : 1;     // 1.0i
   bool isFloat16 : 1;       // 1.0f16
   bool isFloat128 : 1;      // 1.0q
-  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
-
   bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
   bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
+  bool isBitInt : 1;        // 1wb, 1uwb (C2x)
+  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
+
 
   bool isFixedPointLiteral() const {
     return (saw_period || saw_exponent) && saw_fixed_point_suffix;
@@ -120,6 +121,13 @@ class NumericLiteralParser {
   /// calculating the digit sequence of the exponent.
   bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
 
+  /// Get the digits that comprise the literal. This excludes any prefix or
+  /// suffix associated with the literal.
+  StringRef getLiteralDigits() const {
+    assert(!hadError && "cannot reliably get the literal digits with an error");
+    return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
+  }
+
 private:
 
   void ParseNumberStartingWithZero(SourceLocation TokLoc);

diff  --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 1cee3a1b4650c..254273a5af218 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1153,6 +1153,11 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) {
   bool isSigned = Node->getType()->isSignedIntegerType();
   OS << toString(Node->getValue(), 10, isSigned);
 
+  if (isa<BitIntType>(Node->getType())) {
+    OS << (isSigned ? "wb" : "uwb");
+    return;
+  }
+
   // Emit suffixes.  Integer literals are always a builtin integer type.
   switch (Node->getType()->castAs<BuiltinType>()->getKind()) {
   default: llvm_unreachable("Unexpected type for integer literal!");

diff  --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 76c8b324671d7..6bab51250adb1 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -711,6 +711,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   isFract = false;
   isAccum = false;
   hadError = false;
+  isBitInt = false;
 
   // This routine assumes that the range begin/end matches the regex for integer
   // and FP constants (specifically, the 'pp-number' regex), and assumes that
@@ -895,6 +896,24 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       if (isImaginary) break;   // Cannot be repeated.
       isImaginary = true;
       continue;  // Success.
+    case 'w':
+    case 'W':
+      if (isFPConstant)
+        break; // Invalid for floats.
+      if (HasSize)
+        break; // Invalid if we already have a size for the literal.
+
+      // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
+      // explicitly do not support the suffix in C++ as an extension because a
+      // library-based UDL that resolves to a library type may be more
+      // appropriate there.
+      if (!LangOpts.CPlusPlus && (s[0] == 'w' && s[1] == 'b') ||
+          (s[0] == 'W' && s[1] == 'B')) {
+        isBitInt = true;
+        HasSize = true;
+        ++s; // Skip both characters (2nd char skipped on continue).
+        continue; // Success.
+      }
     }
     // If we reached here, there was an error or a ud-suffix.
     break;
@@ -916,6 +935,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
         isFloat16 = false;
         isHalf = false;
         isImaginary = false;
+        isBitInt = false;
         MicrosoftInteger = 0;
         saw_fixed_point_suffix = false;
         isFract = false;
@@ -1145,8 +1165,14 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
   // floating point constant, the radix will change to 10. Octal floating
   // point constants are not permitted (only decimal and hexadecimal).
   radix = 8;
-  DigitsBegin = s;
+  const char *PossibleNewDigitStart = s;
   s = SkipOctalDigits(s);
+  // When the value is 0 followed by a suffix (like 0wb), we want to leave 0
+  // as the start of the digits. So if skipping octal digits does not skip
+  // anything, we leave the digit start where it was.
+  if (s != PossibleNewDigitStart)
+    DigitsBegin = PossibleNewDigitStart;
+
   if (s == ThisTokEnd)
     return; // Done, simple octal number like 01234
 

diff  --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 424cccfdb9eef..3c33369ed5f2c 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -331,6 +331,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
                                  : diag::ext_cxx2b_size_t_suffix
                            : diag::err_cxx2b_size_t_suffix);
 
+    // 'wb/uwb' literals are a C2x feature. We explicitly do not support the
+    // suffix in C++ as an extension because a library-based UDL that resolves
+    // to a library type may be more appropriate there.
+    if (Literal.isBitInt)
+      PP.Diag(PeekTok, PP.getLangOpts().C2x
+                           ? diag::warn_c2x_compat_bitint_suffix
+                           : diag::ext_c2x_bitint_suffix);
+
     // Parse the integer literal into Result.
     if (Literal.GetIntegerValue(Result.Val)) {
       // Overflow parsing integer literal.

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 96471f6c5fb1f..8f831c378a4c8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3922,9 +3922,27 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
                                         : diag::ext_cxx2b_size_t_suffix
                                   : diag::err_cxx2b_size_t_suffix);
 
-    // Get the value in the widest-possible width.
-    unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth();
-    llvm::APInt ResultVal(MaxWidth, 0);
+    // 'wb/uwb' literals are a C2x feature. We support _BitInt as a type in C++,
+    // but we do not currently support the suffix in C++ mode because it's not
+    // entirely clear whether WG21 will prefer this suffix to return a library
+    // type such as std::bit_int instead of returning a _BitInt.
+    if (Literal.isBitInt && !getLangOpts().CPlusPlus)
+      PP.Diag(Tok.getLocation(), getLangOpts().C2x
+                                     ? diag::warn_c2x_compat_bitint_suffix
+                                     : diag::ext_c2x_bitint_suffix);
+
+    // Get the value in the widest-possible width. What is "widest" depends on
+    // whether the literal is a bit-precise integer or not. For a bit-precise
+    // integer type, try to scan the source to determine how many bits are
+    // needed to represent the value. This may seem a bit expensive, but trying
+    // to get the integer value from an overly-wide APInt is *extremely*
+    // expensive, so the naive approach of assuming
+    // llvm::IntegerType::MAX_INT_BITS is a big performance hit.
+    unsigned BitsNeeded =
+        Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded(
+                               Literal.getLiteralDigits(), Literal.getRadix())
+                         : Context.getTargetInfo().getIntMaxTWidth();
+    llvm::APInt ResultVal(BitsNeeded, 0);
 
     if (Literal.GetIntegerValue(ResultVal)) {
       // If this value didn't fit into uintmax_t, error and force to ull.
@@ -3956,6 +3974,32 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
         }
       }
 
+      // Bit-precise integer literals are automagically-sized based on the
+      // width required by the literal.
+      if (Literal.isBitInt) {
+        // The signed version has one more bit for the sign value. There are no
+        // zero-width bit-precise integers, even if the literal value is 0.
+        Width = Literal.isUnsigned ? std::max(ResultVal.getActiveBits(), 1u)
+                                   : std::max(ResultVal.getMinSignedBits(), 2u);
+
+        // Diagnose if the width of the constant is larger than BITINT_MAXWIDTH,
+        // and reset the type to the largest supported width.
+        unsigned int MaxBitIntWidth =
+            Context.getTargetInfo().getMaxBitIntWidth();
+        if (Width > MaxBitIntWidth) {
+          Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
+              << Literal.isUnsigned;
+          Width = MaxBitIntWidth;
+        }
+
+        // Reset the result value to the smaller APInt and select the correct
+        // type to be used. Note, we zext even for signed values because the
+        // literal itself is always an unsigned value (a preceeding - is a
+        // unary operator, not part of the literal).
+        ResultVal = ResultVal.zextOrTrunc(Width);
+        Ty = Context.getBitIntType(Literal.isUnsigned, Width);
+      }
+
       // Check C++2b size_t literals.
       if (Literal.isSizeT) {
         assert(!Literal.MicrosoftInteger &&

diff  --git a/clang/test/AST/bitint-suffix.c b/clang/test/AST/bitint-suffix.c
new file mode 100644
index 0000000000000..f72d3a6dab507
--- /dev/null
+++ b/clang/test/AST/bitint-suffix.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c2x -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void (void)'
+void func(void) {
+  // Ensure that we calculate the correct type from the literal suffix.
+
+  // Note: 0wb should create an _BitInt(2) because a signed bit-precise
+  // integer requires one bit for the sign and one bit for the value,
+  // at a minimum.
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 zero_wb 'typeof (0wb)':'_BitInt(2)'
+  typedef __typeof__(0wb) zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+  typedef __typeof__(-0wb) neg_zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 one_wb 'typeof (1wb)':'_BitInt(2)'
+  typedef __typeof__(1wb) one_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+  typedef __typeof__(-1wb) neg_one_wb;
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(0uwb) zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(-0uwb) neg_zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(1uwb) one_uwb;
+
+  // Try a value that is too large to fit in [u]intmax_t.
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:47> col:47 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+  typedef __typeof__(18446744073709551616uwb) huge_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:46> col:46 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+  typedef __typeof__(18446744073709551616wb) huge_wb;
+}
+
+// Test the examples from the paper.
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 from_paper 'void (void)'
+void from_paper(void) {
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_three_wb 'typeof (-3wb)':'_BitInt(3)'
+  typedef __typeof__(-3wb) neg_three_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_three_hex_wb 'typeof (-3wb)':'_BitInt(3)'
+  typedef __typeof__(-0x3wb) neg_three_hex_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 three_wb 'typeof (3wb)':'_BitInt(3)'
+  typedef __typeof__(3wb) three_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 three_uwb 'typeof (3uwb)':'unsigned _BitInt(2)'
+  typedef __typeof__(3uwb) three_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_three_uwb 'typeof (-3uwb)':'unsigned _BitInt(2)'
+  typedef __typeof__(-3uwb) neg_three_uwb;
+}

diff  --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c
new file mode 100644
index 0000000000000..6429c3c173912
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants-compat.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
+// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+
+#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
+                               compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
+                               cpp-error {{invalid suffix 'uwb' on integer constant}}
+#endif
+
+void func(void) {
+  18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
+                             compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
+                             cpp-error {{invalid suffix 'wb' on integer constant}}
+}

diff  --git a/clang/test/Lexer/bitint-constants.c b/clang/test/Lexer/bitint-constants.c
new file mode 100644
index 0000000000000..243f8c0377c27
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants.c
@@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616uwb;
+
+void ValidSuffix(void) {
+  // Decimal literals.
+  1wb;
+  1WB;
+  -1wb;
+  _Static_assert((int)1wb == 1, "not 1?");
+  _Static_assert((int)-1wb == -1, "not -1?");
+
+  1uwb;
+  1uWB;
+  1Uwb;
+  1UWB;
+  _Static_assert((unsigned int)1uwb == 1u, "not 1?");
+
+  1'2wb;
+  1'2uwb;
+  _Static_assert((int)1'2wb == 12, "not 12?");
+  _Static_assert((unsigned int)1'2uwb == 12u, "not 12?");
+
+  // Hexadecimal literals.
+  0x1wb;
+  0x1uwb;
+  0x0'1'2'3wb;
+  0xA'B'c'duwb;
+  _Static_assert((int)0x0'1'2'3wb == 0x0123, "not 0x0123");
+  _Static_assert((unsigned int)0xA'B'c'duwb == 0xABCDu, "not 0xABCD");
+
+  // Binary literals.
+  0b1wb;
+  0b1uwb;
+  0b1'0'1'0'0'1wb;
+  0b0'1'0'1'1'0uwb;
+  _Static_assert((int)0b1wb == 1, "not 1?");
+  _Static_assert((unsigned int)0b1uwb == 1u, "not 1?");
+
+  // Octal literals.
+  01wb;
+  01uwb;
+  0'6'0wb;
+  0'0'1uwb;
+  0wbu;
+  0WBu;
+  0wbU;
+  0WBU;
+  0wb;
+  _Static_assert((int)0wb == 0, "not 0?");
+  _Static_assert((unsigned int)0wbu == 0u, "not 0?");
+
+  // Imaginary or Complex. These are allowed because _Complex can work with any
+  // integer type, and that includes _BitInt.
+  1iwb;
+  1wbj;
+}
+
+void InvalidSuffix(void) {
+  // Can't mix the case of wb or WB, and can't rearrange the letters.
+  0wB; // expected-error {{invalid suffix 'wB' on integer constant}}
+  0Wb; // expected-error {{invalid suffix 'Wb' on integer constant}}
+  0bw; // expected-error {{invalid digit 'b' in octal constant}}
+  0BW; // expected-error {{invalid digit 'B' in octal constant}}
+
+  // Trailing digit separators should still diagnose.
+  1'2'wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+  1'2'uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+  // Long.
+  1lwb; // expected-error {{invalid suffix}}
+  1wbl; // expected-error {{invalid suffix}}
+  1luwb; // expected-error {{invalid suffix}}
+  1ulwb;  // expected-error {{invalid suffix}}
+
+  // Long long.
+  1llwb; // expected-error {{invalid suffix}}
+  1uwbll; // expected-error {{invalid suffix}}
+
+  // Floating point.
+  0.1wb;   // expected-error {{invalid suffix}}
+  0.1fwb;   // expected-error {{invalid suffix}}
+
+  // Repetitive suffix.
+  1wbwb; // expected-error {{invalid suffix}}
+  1uwbuwb; // expected-error {{invalid suffix}}
+  1wbuwb; // expected-error {{invalid suffix}}
+  1uwbwb; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+  // This is a valid suffix, but the value is larger than one that fits within
+  // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+  // test cases should pick a new value that can't be represented by a _BitInt,
+  // but also add a test case that a 129-bit literal still behaves as-expected.
+  _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+	             "Need to pick a bigger constant for the test case below.");
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}

diff  --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index a475e27c797d2..e14fab47a8093 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1506,6 +1506,11 @@ class LLVM_NODISCARD APInt {
   /// equivalent of the string given by \p str.
   static unsigned getBitsNeeded(StringRef str, uint8_t radix);
 
+  /// Get the bits that are sufficient to represent the string value. This may
+  /// over estimate the amount of bits required, but it does not require
+  /// parsing the value in the string.
+  static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
+
   /// The APInt version of the countLeadingZeros functions in
   ///   MathExtras.h.
   ///

diff  --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index b536e9a9a6d02..401b4f7a76206 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -502,12 +502,51 @@ uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
   return retBits;
 }
 
+unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
+  assert(!Str.empty() && "Invalid string length");
+  size_t StrLen = Str.size();
+
+  // Each computation below needs to know if it's negative.
+  unsigned IsNegative = false;
+  if (Str[0] == '-' || Str[0] == '+') {
+    IsNegative = Str[0] == '-';
+    StrLen--;
+    assert(StrLen && "String is only a sign, needs a value.");
+  }
+
+  // For radixes of power-of-two values, the bits required is accurately and
+  // easily computed.
+  if (Radix == 2)
+    return StrLen + IsNegative;
+  if (Radix == 8)
+    return StrLen * 3 + IsNegative;
+  if (Radix == 16)
+    return StrLen * 4 + IsNegative;
+
+  // Compute a sufficient number of bits that is always large enough but might
+  // be too large. This avoids the assertion in the constructor. This
+  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+  // bits in that case.
+  if (Radix == 10)
+    return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;
+
+  assert(Radix == 36);
+  return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;
+}
+
 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
-  assert(!str.empty() && "Invalid string length");
-  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
-          radix == 36) &&
-         "Radix should be 2, 8, 10, 16, or 36!");
+  // Compute a sufficient number of bits that is always large enough but might
+  // be too large.
+  unsigned sufficient = getSufficientBitsNeeded(str, radix);
+
+  // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
+  // return the value directly. For bases 10 and 36, we need to do extra work.
+  if (radix == 2 || radix == 8 || radix == 16)
+    return sufficient;
 
+  // This is grossly inefficient but accurate. We could probably do something
+  // with a computation of roughly slen*64/20 and then adjust by the value of
+  // the first few digits. But, I'm not sure how accurate that could be.
   size_t slen = str.size();
 
   // Each computation below needs to know if it's negative.
@@ -519,28 +558,6 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
     assert(slen && "String is only a sign, needs a value.");
   }
 
-  // For radixes of power-of-two values, the bits required is accurately and
-  // easily computed
-  if (radix == 2)
-    return slen + isNegative;
-  if (radix == 8)
-    return slen * 3 + isNegative;
-  if (radix == 16)
-    return slen * 4 + isNegative;
-
-  // FIXME: base 36
-
-  // This is grossly inefficient but accurate. We could probably do something
-  // with a computation of roughly slen*64/20 and then adjust by the value of
-  // the first few digits. But, I'm not sure how accurate that could be.
-
-  // Compute a sufficient number of bits that is always large enough but might
-  // be too large. This avoids the assertion in the constructor. This
-  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
-  // bits in that case.
-  unsigned sufficient
-    = radix == 10? (slen == 1 ? 4 : slen * 64/18)
-                 : (slen == 1 ? 7 : slen * 16/3);
 
   // Convert to the actual binary value.
   APInt tmp(sufficient, StringRef(p, slen), radix);


        


More information about the llvm-commits mailing list