[llvm] 8cba721 - Implement literal suffixes for _BitInt
Aaron Ballman via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 14 06:24:27 PDT 2022
Author: Aaron Ballman
Date: 2022-03-14T09:24:19-04:00
New Revision: 8cba72177dcd8de5d37177dbaf2347e5c1f0f1e8
URL: https://github.com/llvm/llvm-project/commit/8cba72177dcd8de5d37177dbaf2347e5c1f0f1e8
DIFF: https://github.com/llvm/llvm-project/commit/8cba72177dcd8de5d37177dbaf2347e5c1f0f1e8.diff
LOG: Implement literal suffixes for _BitInt
WG14 adopted N2775 (http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf)
at our Feb 2022 meeting. This paper adds a literal suffix for
bit-precise types that automatically sizes the bit-precise type to be
the smallest possible legal _BitInt type that can represent the literal
value. The suffix chosen is wb (for a signed bit-precise type) which
can be combined with the u suffix (for an unsigned bit-precise type).
The preprocessor continues to operate as-if all integer types were
intmax_t/uintmax_t, including bit-precise integer types. It is a
constraint violation if the bit-precise literal is too large to fit
within that type in the context of the preprocessor (when still using
a pp-number preprocessing token), but it is not a constraint violation
in other circumstances. This allows you to make bit-precise integer
literals that are wider than what the preprocessor currently supports
in order to initialize variables, etc.
Added:
clang/test/AST/bitint-suffix.c
clang/test/Lexer/bitint-constants-compat.c
clang/test/Lexer/bitint-constants.c
Modified:
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/DiagnosticCommonKinds.td
clang/include/clang/Lex/LiteralSupport.h
clang/lib/AST/StmtPrinter.cpp
clang/lib/Lex/LiteralSupport.cpp
clang/lib/Lex/PPExpressions.cpp
clang/lib/Sema/SemaExpr.cpp
llvm/include/llvm/ADT/APInt.h
llvm/lib/Support/APInt.cpp
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 53d07c03af0f9..2422c8f2cba7a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -111,6 +111,8 @@ C2x Feature Support
- Implemented `WG14 N2674 The noreturn attribute <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2764.pdf>`_.
- Implemented `WG14 N2935 Make false and true first-class language features <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2935.pdf>`_.
+- Implemented `WG14 N2763 Adding a fundamental type for N-bit integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2763.pdf>`_.
+- Implemented `WG14 N2775 Literal suffixes for bit-precise integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf>`_.
C++ Language Changes in Clang
-----------------------------
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 421527827a4bd..66defc1d8ca5f 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -207,6 +207,12 @@ def err_cxx2b_size_t_suffix: Error<
def err_size_t_literal_too_large: Error<
"%select{signed |}0'size_t' literal is out of range of possible "
"%select{signed |}0'size_t' values">;
+def ext_c2x_bitint_suffix : ExtWarn<
+ "'_BitInt' suffix for literals is a C2x extension">,
+ InGroup<C2x>;
+def warn_c2x_compat_bitint_suffix : Warning<
+ "'_BitInt' suffix for literals is incompatible with C standards before C2x">,
+ InGroup<CPre2xCompat>, DefaultIgnore;
def err_integer_literal_too_large : Error<
"integer literal is too large to be represented in any %select{signed |}0"
"integer type">;
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 32471969f5967..977963dcbbba0 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -69,10 +69,11 @@ class NumericLiteralParser {
bool isImaginary : 1; // 1.0i
bool isFloat16 : 1; // 1.0f16
bool isFloat128 : 1; // 1.0q
- uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
-
bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
+ bool isBitInt : 1; // 1wb, 1uwb (C2x)
+ uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
+
bool isFixedPointLiteral() const {
return (saw_period || saw_exponent) && saw_fixed_point_suffix;
@@ -120,6 +121,13 @@ class NumericLiteralParser {
/// calculating the digit sequence of the exponent.
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
+ /// Get the digits that comprise the literal. This excludes any prefix or
+ /// suffix associated with the literal.
+ StringRef getLiteralDigits() const {
+ assert(!hadError && "cannot reliably get the literal digits with an error");
+ return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
+ }
+
private:
void ParseNumberStartingWithZero(SourceLocation TokLoc);
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 1cee3a1b4650c..254273a5af218 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1153,6 +1153,11 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) {
bool isSigned = Node->getType()->isSignedIntegerType();
OS << toString(Node->getValue(), 10, isSigned);
+ if (isa<BitIntType>(Node->getType())) {
+ OS << (isSigned ? "wb" : "uwb");
+ return;
+ }
+
// Emit suffixes. Integer literals are always a builtin integer type.
switch (Node->getType()->castAs<BuiltinType>()->getKind()) {
default: llvm_unreachable("Unexpected type for integer literal!");
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 76c8b324671d7..6bab51250adb1 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -711,6 +711,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isFract = false;
isAccum = false;
hadError = false;
+ isBitInt = false;
// This routine assumes that the range begin/end matches the regex for integer
// and FP constants (specifically, the 'pp-number' regex), and assumes that
@@ -895,6 +896,24 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
if (isImaginary) break; // Cannot be repeated.
isImaginary = true;
continue; // Success.
+ case 'w':
+ case 'W':
+ if (isFPConstant)
+ break; // Invalid for floats.
+ if (HasSize)
+ break; // Invalid if we already have a size for the literal.
+
+ // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
+ // explicitly do not support the suffix in C++ as an extension because a
+ // library-based UDL that resolves to a library type may be more
+ // appropriate there.
+ if (!LangOpts.CPlusPlus && (s[0] == 'w' && s[1] == 'b') ||
+ (s[0] == 'W' && s[1] == 'B')) {
+ isBitInt = true;
+ HasSize = true;
+ ++s; // Skip both characters (2nd char skipped on continue).
+ continue; // Success.
+ }
}
// If we reached here, there was an error or a ud-suffix.
break;
@@ -916,6 +935,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isFloat16 = false;
isHalf = false;
isImaginary = false;
+ isBitInt = false;
MicrosoftInteger = 0;
saw_fixed_point_suffix = false;
isFract = false;
@@ -1145,8 +1165,14 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// floating point constant, the radix will change to 10. Octal floating
// point constants are not permitted (only decimal and hexadecimal).
radix = 8;
- DigitsBegin = s;
+ const char *PossibleNewDigitStart = s;
s = SkipOctalDigits(s);
+ // When the value is 0 followed by a suffix (like 0wb), we want to leave 0
+ // as the start of the digits. So if skipping octal digits does not skip
+ // anything, we leave the digit start where it was.
+ if (s != PossibleNewDigitStart)
+ DigitsBegin = PossibleNewDigitStart;
+
if (s == ThisTokEnd)
return; // Done, simple octal number like 01234
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 424cccfdb9eef..3c33369ed5f2c 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -331,6 +331,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
: diag::ext_cxx2b_size_t_suffix
: diag::err_cxx2b_size_t_suffix);
+ // 'wb/uwb' literals are a C2x feature. We explicitly do not support the
+ // suffix in C++ as an extension because a library-based UDL that resolves
+ // to a library type may be more appropriate there.
+ if (Literal.isBitInt)
+ PP.Diag(PeekTok, PP.getLangOpts().C2x
+ ? diag::warn_c2x_compat_bitint_suffix
+ : diag::ext_c2x_bitint_suffix);
+
// Parse the integer literal into Result.
if (Literal.GetIntegerValue(Result.Val)) {
// Overflow parsing integer literal.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 96471f6c5fb1f..8f831c378a4c8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3922,9 +3922,27 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
: diag::ext_cxx2b_size_t_suffix
: diag::err_cxx2b_size_t_suffix);
- // Get the value in the widest-possible width.
- unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth();
- llvm::APInt ResultVal(MaxWidth, 0);
+ // 'wb/uwb' literals are a C2x feature. We support _BitInt as a type in C++,
+ // but we do not currently support the suffix in C++ mode because it's not
+ // entirely clear whether WG21 will prefer this suffix to return a library
+ // type such as std::bit_int instead of returning a _BitInt.
+ if (Literal.isBitInt && !getLangOpts().CPlusPlus)
+ PP.Diag(Tok.getLocation(), getLangOpts().C2x
+ ? diag::warn_c2x_compat_bitint_suffix
+ : diag::ext_c2x_bitint_suffix);
+
+ // Get the value in the widest-possible width. What is "widest" depends on
+ // whether the literal is a bit-precise integer or not. For a bit-precise
+ // integer type, try to scan the source to determine how many bits are
+ // needed to represent the value. This may seem a bit expensive, but trying
+ // to get the integer value from an overly-wide APInt is *extremely*
+ // expensive, so the naive approach of assuming
+ // llvm::IntegerType::MAX_INT_BITS is a big performance hit.
+ unsigned BitsNeeded =
+ Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded(
+ Literal.getLiteralDigits(), Literal.getRadix())
+ : Context.getTargetInfo().getIntMaxTWidth();
+ llvm::APInt ResultVal(BitsNeeded, 0);
if (Literal.GetIntegerValue(ResultVal)) {
// If this value didn't fit into uintmax_t, error and force to ull.
@@ -3956,6 +3974,32 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
}
}
+ // Bit-precise integer literals are automagically-sized based on the
+ // width required by the literal.
+ if (Literal.isBitInt) {
+ // The signed version has one more bit for the sign value. There are no
+ // zero-width bit-precise integers, even if the literal value is 0.
+ Width = Literal.isUnsigned ? std::max(ResultVal.getActiveBits(), 1u)
+ : std::max(ResultVal.getMinSignedBits(), 2u);
+
+ // Diagnose if the width of the constant is larger than BITINT_MAXWIDTH,
+ // and reset the type to the largest supported width.
+ unsigned int MaxBitIntWidth =
+ Context.getTargetInfo().getMaxBitIntWidth();
+ if (Width > MaxBitIntWidth) {
+ Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
+ << Literal.isUnsigned;
+ Width = MaxBitIntWidth;
+ }
+
+ // Reset the result value to the smaller APInt and select the correct
+ // type to be used. Note, we zext even for signed values because the
+ // literal itself is always an unsigned value (a preceeding - is a
+ // unary operator, not part of the literal).
+ ResultVal = ResultVal.zextOrTrunc(Width);
+ Ty = Context.getBitIntType(Literal.isUnsigned, Width);
+ }
+
// Check C++2b size_t literals.
if (Literal.isSizeT) {
assert(!Literal.MicrosoftInteger &&
diff --git a/clang/test/AST/bitint-suffix.c b/clang/test/AST/bitint-suffix.c
new file mode 100644
index 0000000000000..f72d3a6dab507
--- /dev/null
+++ b/clang/test/AST/bitint-suffix.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c2x -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void (void)'
+void func(void) {
+ // Ensure that we calculate the correct type from the literal suffix.
+
+ // Note: 0wb should create an _BitInt(2) because a signed bit-precise
+ // integer requires one bit for the sign and one bit for the value,
+ // at a minimum.
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 zero_wb 'typeof (0wb)':'_BitInt(2)'
+ typedef __typeof__(0wb) zero_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+ typedef __typeof__(-0wb) neg_zero_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 one_wb 'typeof (1wb)':'_BitInt(2)'
+ typedef __typeof__(1wb) one_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+ typedef __typeof__(-1wb) neg_one_wb;
+
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(0uwb) zero_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(-0uwb) neg_zero_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(1uwb) one_uwb;
+
+ // Try a value that is too large to fit in [u]intmax_t.
+
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:47> col:47 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+ typedef __typeof__(18446744073709551616uwb) huge_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:46> col:46 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+ typedef __typeof__(18446744073709551616wb) huge_wb;
+}
+
+// Test the examples from the paper.
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 from_paper 'void (void)'
+void from_paper(void) {
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_three_wb 'typeof (-3wb)':'_BitInt(3)'
+ typedef __typeof__(-3wb) neg_three_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_three_hex_wb 'typeof (-3wb)':'_BitInt(3)'
+ typedef __typeof__(-0x3wb) neg_three_hex_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 three_wb 'typeof (3wb)':'_BitInt(3)'
+ typedef __typeof__(3wb) three_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 three_uwb 'typeof (3uwb)':'unsigned _BitInt(2)'
+ typedef __typeof__(3uwb) three_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_three_uwb 'typeof (-3uwb)':'unsigned _BitInt(2)'
+ typedef __typeof__(-3uwb) neg_three_uwb;
+}
diff --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c
new file mode 100644
index 0000000000000..6429c3c173912
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants-compat.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
+// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+
+#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
+ compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
+ cpp-error {{invalid suffix 'uwb' on integer constant}}
+#endif
+
+void func(void) {
+ 18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
+ compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
+ cpp-error {{invalid suffix 'wb' on integer constant}}
+}
diff --git a/clang/test/Lexer/bitint-constants.c b/clang/test/Lexer/bitint-constants.c
new file mode 100644
index 0000000000000..243f8c0377c27
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants.c
@@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616uwb;
+
+void ValidSuffix(void) {
+ // Decimal literals.
+ 1wb;
+ 1WB;
+ -1wb;
+ _Static_assert((int)1wb == 1, "not 1?");
+ _Static_assert((int)-1wb == -1, "not -1?");
+
+ 1uwb;
+ 1uWB;
+ 1Uwb;
+ 1UWB;
+ _Static_assert((unsigned int)1uwb == 1u, "not 1?");
+
+ 1'2wb;
+ 1'2uwb;
+ _Static_assert((int)1'2wb == 12, "not 12?");
+ _Static_assert((unsigned int)1'2uwb == 12u, "not 12?");
+
+ // Hexadecimal literals.
+ 0x1wb;
+ 0x1uwb;
+ 0x0'1'2'3wb;
+ 0xA'B'c'duwb;
+ _Static_assert((int)0x0'1'2'3wb == 0x0123, "not 0x0123");
+ _Static_assert((unsigned int)0xA'B'c'duwb == 0xABCDu, "not 0xABCD");
+
+ // Binary literals.
+ 0b1wb;
+ 0b1uwb;
+ 0b1'0'1'0'0'1wb;
+ 0b0'1'0'1'1'0uwb;
+ _Static_assert((int)0b1wb == 1, "not 1?");
+ _Static_assert((unsigned int)0b1uwb == 1u, "not 1?");
+
+ // Octal literals.
+ 01wb;
+ 01uwb;
+ 0'6'0wb;
+ 0'0'1uwb;
+ 0wbu;
+ 0WBu;
+ 0wbU;
+ 0WBU;
+ 0wb;
+ _Static_assert((int)0wb == 0, "not 0?");
+ _Static_assert((unsigned int)0wbu == 0u, "not 0?");
+
+ // Imaginary or Complex. These are allowed because _Complex can work with any
+ // integer type, and that includes _BitInt.
+ 1iwb;
+ 1wbj;
+}
+
+void InvalidSuffix(void) {
+ // Can't mix the case of wb or WB, and can't rearrange the letters.
+ 0wB; // expected-error {{invalid suffix 'wB' on integer constant}}
+ 0Wb; // expected-error {{invalid suffix 'Wb' on integer constant}}
+ 0bw; // expected-error {{invalid digit 'b' in octal constant}}
+ 0BW; // expected-error {{invalid digit 'B' in octal constant}}
+
+ // Trailing digit separators should still diagnose.
+ 1'2'wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+ 1'2'uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+ // Long.
+ 1lwb; // expected-error {{invalid suffix}}
+ 1wbl; // expected-error {{invalid suffix}}
+ 1luwb; // expected-error {{invalid suffix}}
+ 1ulwb; // expected-error {{invalid suffix}}
+
+ // Long long.
+ 1llwb; // expected-error {{invalid suffix}}
+ 1uwbll; // expected-error {{invalid suffix}}
+
+ // Floating point.
+ 0.1wb; // expected-error {{invalid suffix}}
+ 0.1fwb; // expected-error {{invalid suffix}}
+
+ // Repetitive suffix.
+ 1wbwb; // expected-error {{invalid suffix}}
+ 1uwbuwb; // expected-error {{invalid suffix}}
+ 1wbuwb; // expected-error {{invalid suffix}}
+ 1uwbwb; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+ // This is a valid suffix, but the value is larger than one that fits within
+ // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+ // test cases should pick a new value that can't be represented by a _BitInt,
+ // but also add a test case that a 129-bit literal still behaves as-expected.
+ _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+ "Need to pick a bigger constant for the test case below.");
+ 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+ 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index a475e27c797d2..e14fab47a8093 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1506,6 +1506,11 @@ class LLVM_NODISCARD APInt {
/// equivalent of the string given by \p str.
static unsigned getBitsNeeded(StringRef str, uint8_t radix);
+ /// Get the bits that are sufficient to represent the string value. This may
+ /// over estimate the amount of bits required, but it does not require
+ /// parsing the value in the string.
+ static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
+
/// The APInt version of the countLeadingZeros functions in
/// MathExtras.h.
///
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index b536e9a9a6d02..401b4f7a76206 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -502,12 +502,51 @@ uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
return retBits;
}
+unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
+ assert(!Str.empty() && "Invalid string length");
+ size_t StrLen = Str.size();
+
+ // Each computation below needs to know if it's negative.
+ unsigned IsNegative = false;
+ if (Str[0] == '-' || Str[0] == '+') {
+ IsNegative = Str[0] == '-';
+ StrLen--;
+ assert(StrLen && "String is only a sign, needs a value.");
+ }
+
+ // For radixes of power-of-two values, the bits required is accurately and
+ // easily computed.
+ if (Radix == 2)
+ return StrLen + IsNegative;
+ if (Radix == 8)
+ return StrLen * 3 + IsNegative;
+ if (Radix == 16)
+ return StrLen * 4 + IsNegative;
+
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large. This avoids the assertion in the constructor. This
+ // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+ // bits in that case.
+ if (Radix == 10)
+ return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;
+
+ assert(Radix == 36);
+ return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;
+}
+
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
- assert(!str.empty() && "Invalid string length");
- assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
- radix == 36) &&
- "Radix should be 2, 8, 10, 16, or 36!");
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large.
+ unsigned sufficient = getSufficientBitsNeeded(str, radix);
+
+ // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
+ // return the value directly. For bases 10 and 36, we need to do extra work.
+ if (radix == 2 || radix == 8 || radix == 16)
+ return sufficient;
+ // This is grossly inefficient but accurate. We could probably do something
+ // with a computation of roughly slen*64/20 and then adjust by the value of
+ // the first few digits. But, I'm not sure how accurate that could be.
size_t slen = str.size();
// Each computation below needs to know if it's negative.
@@ -519,28 +558,6 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(slen && "String is only a sign, needs a value.");
}
- // For radixes of power-of-two values, the bits required is accurately and
- // easily computed
- if (radix == 2)
- return slen + isNegative;
- if (radix == 8)
- return slen * 3 + isNegative;
- if (radix == 16)
- return slen * 4 + isNegative;
-
- // FIXME: base 36
-
- // This is grossly inefficient but accurate. We could probably do something
- // with a computation of roughly slen*64/20 and then adjust by the value of
- // the first few digits. But, I'm not sure how accurate that could be.
-
- // Compute a sufficient number of bits that is always large enough but might
- // be too large. This avoids the assertion in the constructor. This
- // calculation doesn't work appropriately for the numbers 0-9, so just use 4
- // bits in that case.
- unsigned sufficient
- = radix == 10? (slen == 1 ? 4 : slen * 64/18)
- : (slen == 1 ? 7 : slen * 16/3);
// Convert to the actual binary value.
APInt tmp(sufficient, StringRef(p, slen), radix);
More information about the llvm-commits
mailing list