[clang] [BitInt] Expose a _BitInt literal suffix in C++ (PR #86586)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 25 14:34:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (js324)
<details>
<summary>Changes</summary>
Hi,
This PR is an attempt for #<!-- -->85223 to expose _BitInt literal suffixes as an extension in C++ as `__wb`. There is a new Extension warning, and the tests are essentially the same as the existing _BitInt literal tests for C but with a few additional cases.
---
Full diff: https://github.com/llvm/llvm-project/pull/86586.diff
11 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+1)
- (modified) clang/include/clang/Basic/DiagnosticCommonKinds.td (+3)
- (modified) clang/include/clang/Basic/DiagnosticGroups.td (+2)
- (modified) clang/include/clang/Basic/DiagnosticParseKinds.td (+1-1)
- (modified) clang/include/clang/Lex/LiteralSupport.h (+1-1)
- (modified) clang/lib/Lex/LiteralSupport.cpp (+28-7)
- (modified) clang/lib/Lex/PPExpressions.cpp (+7-6)
- (modified) clang/lib/Sema/SemaExpr.cpp (+4-4)
- (added) clang/test/AST/bitint-suffix.cpp (+32)
- (modified) clang/test/Lexer/bitint-constants-compat.c (+10-1)
- (added) clang/test/Lexer/bitint-constants.cpp (+172)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7fbe2fec6ca065..15164ef5b68b5e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -88,6 +88,7 @@ sections with improvements to Clang's support for those languages.
C++ Language Changes
--------------------
+- Implemented _BitInt literal suffixes as ``__wb`` or ``__WB`` with unsigned modifiers also allowed. (#GH85223).
C++20 Feature Support
^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index a52bf62e24202c..5466e1472fbf5c 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -234,6 +234,9 @@ def err_cxx23_size_t_suffix: Error<
def err_size_t_literal_too_large: Error<
"%select{signed |}0'size_t' literal is out of range of possible "
"%select{signed |}0'size_t' values">;
+def ext_cpp_bitint_suffix : Extension<
+ "'_BitInt' suffix for literals is a Clang extension">,
+ InGroup<BitIntExtension>;
def ext_c23_bitint_suffix : ExtWarn<
"'_BitInt' suffix for literals is a C23 extension">,
InGroup<C23>;
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 44035e2fd16f2e..e38c8f009efe79 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1516,3 +1516,5 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon
// Warnings and notes InstallAPI verification.
def InstallAPIViolation : DiagGroup<"installapi-violation">;
+// Warnings related to _BitInt extension
+def BitIntExtension : DiagGroup<"bit-int-extension">;
\ No newline at end of file
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 46a44418a3153b..6759f923564adf 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1646,7 +1646,7 @@ def warn_ext_int_deprecated : Warning<
"'_ExtInt' is deprecated; use '_BitInt' instead">, InGroup<DeprecatedType>;
def ext_bit_int : Extension<
"'_BitInt' in %select{C17 and earlier|C++}0 is a Clang extension">,
- InGroup<DiagGroup<"bit-int-extension">>;
+ InGroup<BitIntExtension>;
} // end of Parse Issue category.
let CategoryName = "Modules Issue" in {
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 643ddbdad8c87d..4e7e6c77003049 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -80,7 +80,7 @@ class NumericLiteralParser {
bool isFloat128 : 1; // 1.0q
bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
- bool isBitInt : 1; // 1wb, 1uwb (C23)
+ bool isBitInt : 1; // 1wb, 1uwb (C23) or 1__wb, 1__uwb (C++)
uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 438c6d772e6e04..4d605fd88e81db 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -974,6 +974,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
bool isFixedPointConstant = isFixedPointLiteral();
bool isFPConstant = isFloatingLiteral();
bool HasSize = false;
+ bool possibleBitInt = false;
// Loop over all of the characters of the suffix. If we see something bad,
// we break out of the loop.
@@ -1117,6 +1118,26 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
if (isImaginary) break; // Cannot be repeated.
isImaginary = true;
continue; // Success.
+ case '_':
+ if (isFPConstant)
+ break; // Invalid for floats
+ if (HasSize)
+ break;
+ if (possibleBitInt)
+ break; // Cannot be repeated.
+ if (LangOpts.CPlusPlus && s[1] == '_') {
+ // Scan ahead to find possible rest of BitInt suffix
+ for (const char *c = s; c != ThisTokEnd; ++c) {
+ if (*c == 'w' || *c == 'W') {
+ possibleBitInt = true;
+ ++s;
+ break;
+ }
+ }
+ if (possibleBitInt)
+ continue;
+ }
+ break;
case 'w':
case 'W':
if (isFPConstant)
@@ -1124,12 +1145,10 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
if (HasSize)
break; // Invalid if we already have a size for the literal.
- // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
- // explicitly do not support the suffix in C++ as an extension because a
- // library-based UDL that resolves to a library type may be more
- // appropriate there.
- if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
- (s[0] == 'W' && s[1] == 'B'))) {
+ // wb and WB are allowed, but a mixture of cases like Wb or wB is not.
+ // The same rules apply for __wb/__WB.
+ if ((!LangOpts.CPlusPlus || possibleBitInt) &&
+ ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {
isBitInt = true;
HasSize = true;
++s; // Skip both characters (2nd char skipped on continue).
@@ -1241,7 +1260,9 @@ bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
return false;
// By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
- if (Suffix[0] == '_')
+ // Suffixes starting with '__' (double underscore) are for use by
+ // implementation.
+ if (Suffix[0] == '_' && Suffix[1] != '_')
return true;
// In C++11, there are no library suffixes.
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 8f25c67ec9dfbe..5ec68228b2b801 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -333,13 +333,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
: diag::ext_cxx23_size_t_suffix
: diag::err_cxx23_size_t_suffix);
- // 'wb/uwb' literals are a C23 feature. We explicitly do not support the
- // suffix in C++ as an extension because a library-based UDL that resolves
- // to a library type may be more appropriate there.
+ // 'wb/uwb' literals are a C23 feature.
+ // '__wb/__uwb' are a C++ extension.
if (Literal.isBitInt)
- PP.Diag(PeekTok, PP.getLangOpts().C23
- ? diag::warn_c23_compat_bitint_suffix
- : diag::ext_c23_bitint_suffix);
+ PP.Diag(PeekTok, !PP.getLangOpts().CPlusPlus
+ ? PP.getLangOpts().C23
+ ? diag::warn_c23_compat_bitint_suffix
+ : diag::ext_c23_bitint_suffix
+ : diag::ext_cpp_bitint_suffix);
// Parse the integer literal into Result.
if (Literal.GetIntegerValue(Result.Val)) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5f03b981428251..8b350e04e7c2d2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4161,14 +4161,14 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
: diag::ext_cxx23_size_t_suffix
: diag::err_cxx23_size_t_suffix);
- // 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++,
- // but we do not currently support the suffix in C++ mode because it's not
- // entirely clear whether WG21 will prefer this suffix to return a library
- // type such as std::bit_int instead of returning a _BitInt.
+ // 'wb/uwb' literals are a C23 feature.
+ // '__wb/__uwb' literals are a C++ extension.
if (Literal.isBitInt && !getLangOpts().CPlusPlus)
PP.Diag(Tok.getLocation(), getLangOpts().C23
? diag::warn_c23_compat_bitint_suffix
: diag::ext_c23_bitint_suffix);
+ else if (Literal.isBitInt)
+ PP.Diag(Tok.getLocation(), diag::ext_cpp_bitint_suffix);
// Get the value in the widest-possible width. What is "widest" depends on
// whether the literal is a bit-precise integer or not. For a bit-precise
diff --git a/clang/test/AST/bitint-suffix.cpp b/clang/test/AST/bitint-suffix.cpp
new file mode 100644
index 00000000000000..081bb67409abdf
--- /dev/null
+++ b/clang/test/AST/bitint-suffix.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void ()'
+void func() {
+ // Ensure that we calculate the correct type from the literal suffix.
+
+ // Note: 0__wb should create an _BitInt(2) because a signed bit-precise
+ // integer requires one bit for the sign and one bit for the value,
+ // at a minimum.
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 zero_wb 'typeof (0wb)':'_BitInt(2)'
+ typedef __typeof__(0__wb) zero_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+ typedef __typeof__(-0__wb) neg_zero_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 one_wb 'typeof (1wb)':'_BitInt(2)'
+ typedef __typeof__(1__wb) one_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+ typedef __typeof__(-1__wb) neg_one_wb;
+
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(0__uwb) zero_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:31> col:31 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(-0__uwb) neg_zero_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(1__uwb) one_uwb;
+
+ // Try a value that is too large to fit in [u]intmax_t.
+
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:49> col:49 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+ typedef __typeof__(18446744073709551616__uwb) huge_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:48> col:48 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+ typedef __typeof__(18446744073709551616__wb) huge_wb;
+}
\ No newline at end of file
diff --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c
index 607ae88a6188bb..d8bff94ef88caa 100644
--- a/clang/test/Lexer/bitint-constants-compat.c
+++ b/clang/test/Lexer/bitint-constants-compat.c
@@ -1,14 +1,23 @@
// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
-// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wbit-int-extension -Wno-unused -x c++ %s
#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
cpp-error {{invalid suffix 'uwb' on integer constant}}
#endif
+#if 18446744073709551615__uwb // ext-error {{invalid suffix '__uwb' on integer constant}} \
+ compat-error {{invalid suffix '__uwb' on integer constant}} \
+ cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
+#endif
+
void func(void) {
18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
cpp-error {{invalid suffix 'wb' on integer constant}}
+
+ 18446744073709551615__wb; // ext-error {{invalid suffix '__wb' on integer constant}} \
+ compat-error {{invalid suffix '__wb' on integer constant}} \
+ cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
}
diff --git a/clang/test/Lexer/bitint-constants.cpp b/clang/test/Lexer/bitint-constants.cpp
new file mode 100644
index 00000000000000..da89c5d30a13eb
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants.cpp
@@ -0,0 +1,172 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1__wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1__uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1__wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1__uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615__uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616__wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616__uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615__uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615__wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616__uwb;
+
+void ValidSuffix(void) {
+ // Decimal literals.
+ 1__wb;
+ 1__WB;
+ -1__wb;
+ _Static_assert((int)1__wb == 1, "not 1?");
+ _Static_assert((int)-1__wb == -1, "not -1?");
+
+ 1__uwb;
+ 1__uWB;
+ 1__Uwb;
+ 1__UWB;
+ 1u__wb;
+ 1__WBu;
+ 1U__WB;
+ _Static_assert((unsigned int)1__uwb == 1u, "not 1?");
+
+ 1'2__wb;
+ 1'2__uwb;
+ _Static_assert((int)1'2__wb == 12, "not 12?");
+ _Static_assert((unsigned int)1'2__uwb == 12u, "not 12?");
+
+ // Hexadecimal literals.
+ 0x1__wb;
+ 0x1__uwb;
+ 0x0'1'2'3__wb;
+ 0xA'B'c'd__uwb;
+ _Static_assert((int)0x0'1'2'3__wb == 0x0123, "not 0x0123");
+ _Static_assert((unsigned int)0xA'B'c'd__uwb == 0xABCDu, "not 0xABCD");
+
+ // Binary literals.
+ 0b1__wb;
+ 0b1__uwb;
+ 0b1'0'1'0'0'1__wb;
+ 0b0'1'0'1'1'0__uwb;
+ _Static_assert((int)0b1__wb == 1, "not 1?");
+ _Static_assert((unsigned int)0b1__uwb == 1u, "not 1?");
+
+ // Octal literals.
+ 01__wb;
+ 01__uwb;
+ 0'6'0__wb;
+ 0'0'1__uwb;
+ 0__wbu;
+ 0__WBu;
+ 0U__wb;
+ 0U__WB;
+ 0__wb;
+ _Static_assert((int)0__wb == 0, "not 0?");
+ _Static_assert((unsigned int)0__wbu == 0u, "not 0?");
+
+ // Imaginary or Complex. These are allowed because _Complex can work with any
+ // integer type, and that includes _BitInt.
+ 1__iwb;
+ 1i__wb;
+ 1__wbj;
+}
+
+void InvalidSuffix(void) {
+ // Can't mix the case of wb or WB, and can't rearrange the letters.
+ 0__wB; // expected-error {{invalid suffix '__wB' on integer constant}}
+ 0__Wb; // expected-error {{invalid suffix '__Wb' on integer constant}}
+ 0__bw; // expected-error {{invalid suffix '__bw' on integer constant}}
+ 0__BW; // expected-error {{invalid suffix '__BW' on integer constant}}
+
+ // Trailing digit separators should still diagnose.
+ 1'2'__wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+ 1'2'__uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+ // Long.
+ 1l__wb; // expected-error {{invalid suffix}}
+ 1__wbl; // expected-error {{invalid suffix}}
+ 1l__uwb; // expected-error {{invalid suffix}}
+ 1__l; // expected-error {{invalid suffix}}
+ 1ul__wb; // expected-error {{invalid suffix}}
+
+ // Long long.
+ 1ll__wb; // expected-error {{invalid suffix}}
+ 1__uwbll; // expected-error {{invalid suffix}}
+
+ // Floating point.
+ 0.1__wb; // expected-error {{invalid suffix}}
+ 0.1f__wb; // expected-error {{invalid suffix}}
+
+ // Repetitive suffix.
+ 1__wb__wb; // expected-error {{invalid suffix}}
+ 1__uwbuwb; // expected-error {{invalid suffix}}
+ 1__wbuwb; // expected-error {{invalid suffix}}
+ 1__uwbwb; // expected-error {{invalid suffix}}
+
+ // Missing or extra characters in suffix.
+ 1__; // expected-error {{invalid suffix}}
+ 1___; // expected-error {{invalid suffix}}
+ 1___WB; // expected-error {{invalid suffix}}
+ 1__wb__; // expected-error {{invalid suffix}}
+ 1__w; // expected-error {{invalid suffix}}
+ 1__b; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+ // This is a valid suffix, but the value is larger than one that fits within
+ // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+ // test cases should pick a new value that can't be represented by a _BitInt,
+ // but also add a test case that a 129-bit literal still behaves as-expected.
+ _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+ "Need to pick a bigger constant for the test case below.");
+ 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+ 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}
+
+void TestTypes(void) {
+ // 2 value bits, one sign bit
+ _Static_assert(__is_same(decltype(3__wb), _BitInt(3)));
+ // 2 value bits, one sign bit
+ _Static_assert(__is_same(decltype(-3__wb), _BitInt(3)));
+ // 2 value bits, no sign bit
+ _Static_assert(__is_same(decltype(3__uwb), unsigned _BitInt(2)));
+ // 4 value bits, one sign bit
+ _Static_assert(__is_same(decltype(0xF__wb), _BitInt(5)));
+ // 4 value bits, one sign bit
+ _Static_assert(__is_same(decltype(-0xF__wb), _BitInt(5)));
+ // 4 value bits, no sign bit
+ _Static_assert(__is_same(decltype(0xF__uwb), unsigned _BitInt(4)));
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/86586
More information about the cfe-commits
mailing list