[clang] [BitInt] Expose a _BitInt literal suffix in C++ (PR #86586)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 16 12:58:59 PDT 2024
https://github.com/js324 updated https://github.com/llvm/llvm-project/pull/86586
>From c822eaf87526567825e9c4403ae9f01dd4ff58a3 Mon Sep 17 00:00:00 2001
From: Jin S <jins918 at gmail.com>
Date: Mon, 25 Mar 2024 17:19:41 -0400
Subject: [PATCH] [BitInt] Expose a _BitInt literal suffix in C++
---
clang/docs/ReleaseNotes.rst | 1 +
.../clang/Basic/DiagnosticCommonKinds.td | 3 +
clang/include/clang/Basic/DiagnosticGroups.td | 2 +
.../clang/Basic/DiagnosticParseKinds.td | 2 +-
clang/include/clang/Lex/LiteralSupport.h | 5 +-
clang/lib/Lex/LiteralSupport.cpp | 36 +++-
clang/lib/Lex/PPExpressions.cpp | 8 +-
clang/lib/Sema/SemaExpr.cpp | 12 +-
clang/test/AST/bitint-suffix.cpp | 32 ++++
clang/test/Lexer/bitint-constants-compat.c | 11 +-
clang/test/Lexer/bitint-constants.cpp | 178 ++++++++++++++++++
11 files changed, 274 insertions(+), 16 deletions(-)
create mode 100644 clang/test/AST/bitint-suffix.cpp
create mode 100644 clang/test/Lexer/bitint-constants.cpp
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7fbe2fec6ca065..d40c86a15ac2da 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -88,6 +88,7 @@ sections with improvements to Clang's support for those languages.
C++ Language Changes
--------------------
+- Implemented ``_BitInt`` literal suffixes ``__wb`` or ``__WB`` as a Clang extension with ``unsigned`` modifiers also allowed. (#GH85223).
C++20 Feature Support
^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index a52bf62e24202c..0738f43ca555c8 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -234,6 +234,9 @@ def err_cxx23_size_t_suffix: Error<
def err_size_t_literal_too_large: Error<
"%select{signed |}0'size_t' literal is out of range of possible "
"%select{signed |}0'size_t' values">;
+def ext_cxx_bitint_suffix : Extension<
+ "'_BitInt' suffix for literals is a Clang extension">,
+ InGroup<BitIntExtension>;
def ext_c23_bitint_suffix : ExtWarn<
"'_BitInt' suffix for literals is a C23 extension">,
InGroup<C23>;
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 44035e2fd16f2e..37f56ed6289d27 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1516,3 +1516,5 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon
// Warnings and notes InstallAPI verification.
def InstallAPIViolation : DiagGroup<"installapi-violation">;
+// Warnings related to _BitInt extension
+def BitIntExtension : DiagGroup<"bit-int-extension">;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 46a44418a3153b..6759f923564adf 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1646,7 +1646,7 @@ def warn_ext_int_deprecated : Warning<
"'_ExtInt' is deprecated; use '_BitInt' instead">, InGroup<DeprecatedType>;
def ext_bit_int : Extension<
"'_BitInt' in %select{C17 and earlier|C++}0 is a Clang extension">,
- InGroup<DiagGroup<"bit-int-extension">>;
+ InGroup<BitIntExtension>;
} // end of Parse Issue category.
let CategoryName = "Modules Issue" in {
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 643ddbdad8c87d..e7c3aaae9fc35e 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -80,7 +80,10 @@ class NumericLiteralParser {
bool isFloat128 : 1; // 1.0q
bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
- bool isBitInt : 1; // 1wb, 1uwb (C23)
+ // clang-format off
+ bool isBitInt : 1; // 1wb, 1uwb (C23) or 1__wb, 1__uwb (Clang extension in C++
+ // mode)
+ // clang-format on
uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 438c6d772e6e04..9c0cbea5052cb2 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -974,6 +974,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
bool isFixedPointConstant = isFixedPointLiteral();
bool isFPConstant = isFloatingLiteral();
bool HasSize = false;
+ bool DoubleUnderscore = false;
// Loop over all of the characters of the suffix. If we see something bad,
// we break out of the loop.
@@ -1117,6 +1118,31 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
if (isImaginary) break; // Cannot be repeated.
isImaginary = true;
continue; // Success.
+ case '_':
+ if (isFPConstant)
+ break; // Invalid for floats
+ if (HasSize)
+ break;
+ if (DoubleUnderscore)
+ break; // Cannot be repeated.
+ if (LangOpts.CPlusPlus && s + 2 < ThisTokEnd &&
+ s[1] == '_') { // s + 2 < ThisTokEnd to ensure some character exists
+ // after __
+ DoubleUnderscore = true;
+ s += 2; // Skip both '_'
+ if (s + 1 < ThisTokEnd &&
+ (*s == 'u' || *s == 'U')) { // Ensure some character after 'u'/'U'
+ isUnsigned = true;
+ ++s;
+ }
+ if (s + 1 < ThisTokEnd &&
+ ((*s == 'w' && *(++s) == 'b') || (*s == 'W' && *(++s) == 'B'))) {
+ isBitInt = true;
+ HasSize = true;
+ continue;
+ }
+ }
+ break;
case 'w':
case 'W':
if (isFPConstant)
@@ -1127,9 +1153,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
// wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
// explicitly do not support the suffix in C++ as an extension because a
// library-based UDL that resolves to a library type may be more
- // appropriate there.
- if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
- (s[0] == 'W' && s[1] == 'B'))) {
+ // appropriate there. The same rules apply for __wb/__WB.
+ if ((!LangOpts.CPlusPlus || DoubleUnderscore) && s + 1 < ThisTokEnd &&
+ ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {
isBitInt = true;
HasSize = true;
++s; // Skip both characters (2nd char skipped on continue).
@@ -1241,7 +1267,9 @@ bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
return false;
// By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
- if (Suffix[0] == '_')
+ // Suffixes starting with '__' (double underscore) are for use by
+ // the implementation.
+ if (Suffix.starts_with("_") && !Suffix.starts_with("__"))
return true;
// In C++11, there are no library suffixes.
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 8f25c67ec9dfbe..f267efabd617fd 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -333,11 +333,11 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
: diag::ext_cxx23_size_t_suffix
: diag::err_cxx23_size_t_suffix);
- // 'wb/uwb' literals are a C23 feature. We explicitly do not support the
- // suffix in C++ as an extension because a library-based UDL that resolves
- // to a library type may be more appropriate there.
+ // 'wb/uwb' literals are a C23 feature.
+ // '__wb/__uwb' are a C++ extension.
if (Literal.isBitInt)
- PP.Diag(PeekTok, PP.getLangOpts().C23
+ PP.Diag(PeekTok, PP.getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
+ : PP.getLangOpts().C23
? diag::warn_c23_compat_bitint_suffix
: diag::ext_c23_bitint_suffix);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5f03b981428251..ada4214b64ec37 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4164,11 +4164,13 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
// 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++,
// but we do not currently support the suffix in C++ mode because it's not
// entirely clear whether WG21 will prefer this suffix to return a library
- // type such as std::bit_int instead of returning a _BitInt.
- if (Literal.isBitInt && !getLangOpts().CPlusPlus)
- PP.Diag(Tok.getLocation(), getLangOpts().C23
- ? diag::warn_c23_compat_bitint_suffix
- : diag::ext_c23_bitint_suffix);
+ // type such as std::bit_int instead of returning a _BitInt. '__wb/__uwb'
+ // literals are a C++ extension.
+ if (Literal.isBitInt)
+ PP.Diag(Tok.getLocation(),
+ getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
+ : getLangOpts().C23 ? diag::warn_c23_compat_bitint_suffix
+ : diag::ext_c23_bitint_suffix);
// Get the value in the widest-possible width. What is "widest" depends on
// whether the literal is a bit-precise integer or not. For a bit-precise
diff --git a/clang/test/AST/bitint-suffix.cpp b/clang/test/AST/bitint-suffix.cpp
new file mode 100644
index 00000000000000..dab2b16c74235d
--- /dev/null
+++ b/clang/test/AST/bitint-suffix.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void ()'
+void func() {
+ // Ensure that we calculate the correct type from the literal suffix.
+
+ // Note: 0__wb should create an _BitInt(2) because a signed bit-precise
+ // integer requires one bit for the sign and one bit for the value,
+ // at a minimum.
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 zero_wb 'typeof (0wb)':'_BitInt(2)'
+ typedef __typeof__(0__wb) zero_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+ typedef __typeof__(-0__wb) neg_zero_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 one_wb 'typeof (1wb)':'_BitInt(2)'
+ typedef __typeof__(1__wb) one_wb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+ typedef __typeof__(-1__wb) neg_one_wb;
+
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(0__uwb) zero_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:31> col:31 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(-0__uwb) neg_zero_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+ typedef __typeof__(1__uwb) one_uwb;
+
+ // Try a value that is too large to fit in [u]intmax_t.
+
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:49> col:49 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+ typedef __typeof__(18446744073709551616__uwb) huge_uwb;
+ // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:48> col:48 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+ typedef __typeof__(18446744073709551616__wb) huge_wb;
+}
diff --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c
index 607ae88a6188bb..d8bff94ef88caa 100644
--- a/clang/test/Lexer/bitint-constants-compat.c
+++ b/clang/test/Lexer/bitint-constants-compat.c
@@ -1,14 +1,23 @@
// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
-// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wbit-int-extension -Wno-unused -x c++ %s
#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
cpp-error {{invalid suffix 'uwb' on integer constant}}
#endif
+#if 18446744073709551615__uwb // ext-error {{invalid suffix '__uwb' on integer constant}} \
+ compat-error {{invalid suffix '__uwb' on integer constant}} \
+ cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
+#endif
+
void func(void) {
18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
cpp-error {{invalid suffix 'wb' on integer constant}}
+
+ 18446744073709551615__wb; // ext-error {{invalid suffix '__wb' on integer constant}} \
+ compat-error {{invalid suffix '__wb' on integer constant}} \
+ cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
}
diff --git a/clang/test/Lexer/bitint-constants.cpp b/clang/test/Lexer/bitint-constants.cpp
new file mode 100644
index 00000000000000..fb6ac35467cd67
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants.cpp
@@ -0,0 +1,178 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-unknown -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1__wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1__uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1__wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1__uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615__uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616__wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616__uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615__uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615__wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616__uwb;
+// UDL test to make sure underscore parsing is correct
+unsigned operator ""_(const char *);
+
+void ValidSuffix(void) {
+ // Decimal literals.
+ 1__wb;
+ 1__WB;
+ -1__wb;
+ _Static_assert((int)1__wb == 1, "not 1?");
+ _Static_assert((int)-1__wb == -1, "not -1?");
+
+ 1__uwb;
+ 1__uWB;
+ 1__Uwb;
+ 1__UWB;
+ 1u__wb;
+ 1__WBu;
+ 1U__WB;
+ _Static_assert((unsigned int)1__uwb == 1u, "not 1?");
+
+ 1'2__wb;
+ 1'2__uwb;
+ _Static_assert((int)1'2__wb == 12, "not 12?");
+ _Static_assert((unsigned int)1'2__uwb == 12u, "not 12?");
+
+ // Hexadecimal literals.
+ 0x1__wb;
+ 0x1__uwb;
+ 0x0'1'2'3__wb;
+ 0xA'B'c'd__uwb;
+ _Static_assert((int)0x0'1'2'3__wb == 0x0123, "not 0x0123");
+ _Static_assert((unsigned int)0xA'B'c'd__uwb == 0xABCDu, "not 0xABCD");
+
+ // Binary literals.
+ 0b1__wb;
+ 0b1__uwb;
+ 0b1'0'1'0'0'1__wb;
+ 0b0'1'0'1'1'0__uwb;
+ _Static_assert((int)0b1__wb == 1, "not 1?");
+ _Static_assert((unsigned int)0b1__uwb == 1u, "not 1?");
+
+ // Octal literals.
+ 01__wb;
+ 01__uwb;
+ 0'6'0__wb;
+ 0'0'1__uwb;
+ 0__wbu;
+ 0__WBu;
+ 0U__wb;
+ 0U__WB;
+ 0__wb;
+ _Static_assert((int)0__wb == 0, "not 0?");
+ _Static_assert((unsigned int)0__wbu == 0u, "not 0?");
+
+ // Imaginary or Complex. These are allowed because _Complex can work with any
+ // integer type, and that includes _BitInt.
+ 1__wbi;
+ 1i__wb;
+ 1__wbj;
+
+ //UDL test as single underscore
+ unsigned i = 1.0_;
+}
+
+void InvalidSuffix(void) {
+ // Can't mix the case of wb or WB, and can't rearrange the letters.
+ 0__wB; // expected-error {{invalid suffix '__wB' on integer constant}}
+ 0__Wb; // expected-error {{invalid suffix '__Wb' on integer constant}}
+ 0__bw; // expected-error {{invalid suffix '__bw' on integer constant}}
+ 0__BW; // expected-error {{invalid suffix '__BW' on integer constant}}
+
+ // Trailing digit separators should still diagnose.
+ 1'2'__wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+ 1'2'__uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+ // Long.
+ 1l__wb; // expected-error {{invalid suffix}}
+ 1__wbl; // expected-error {{invalid suffix}}
+ 1l__uwb; // expected-error {{invalid suffix}}
+ 1__l; // expected-error {{invalid suffix}}
+ 1ul__wb; // expected-error {{invalid suffix}}
+
+ // Long long.
+ 1ll__wb; // expected-error {{invalid suffix}}
+ 1__uwbll; // expected-error {{invalid suffix}}
+
+ // Floating point.
+ 0.1__wb; // expected-error {{invalid suffix}}
+ 0.1f__wb; // expected-error {{invalid suffix}}
+
+ // Repetitive suffix.
+ 1__wb__wb; // expected-error {{invalid suffix}}
+ 1__uwbuwb; // expected-error {{invalid suffix}}
+ 1__wbuwb; // expected-error {{invalid suffix}}
+ 1__uwbwb; // expected-error {{invalid suffix}}
+
+ // Missing or extra characters in suffix.
+ 1__; // expected-error {{invalid suffix}}
+ 1__u; // expected-error {{invalid suffix}}
+ 1___; // expected-error {{invalid suffix}}
+ 1___WB; // expected-error {{invalid suffix}}
+ 1__wb__; // expected-error {{invalid suffix}}
+ 1__w; // expected-error {{invalid suffix}}
+ 1__b; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+ // This is a valid suffix, but the value is larger than one that fits within
+ // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+ // test cases should pick a new value that can't be represented by a _BitInt,
+ // but also add a test case that a 129-bit literal still behaves as-expected.
+ _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+ "Need to pick a bigger constant for the test case below.");
+ 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+ 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}
+
+void TestTypes(void) {
+ // 2 value bits, one sign bit
+ _Static_assert(__is_same(decltype(3__wb), _BitInt(3)));
+ // 2 value bits, one sign bit
+ _Static_assert(__is_same(decltype(-3__wb), _BitInt(3)));
+ // 2 value bits, no sign bit
+ _Static_assert(__is_same(decltype(3__uwb), unsigned _BitInt(2)));
+ // 4 value bits, one sign bit
+ _Static_assert(__is_same(decltype(0xF__wb), _BitInt(5)));
+ // 4 value bits, one sign bit
+ _Static_assert(__is_same(decltype(-0xF__wb), _BitInt(5)));
+ // 4 value bits, no sign bit
+ _Static_assert(__is_same(decltype(0xF__uwb), unsigned _BitInt(4)));
+}
More information about the cfe-commits
mailing list