[clang] [BitInt] Expose a _BitInt literal suffix in C++ (PR #86586)

via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 25 14:34:36 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (js324)

<details>
<summary>Changes</summary>

Hi,

This PR is an attempt for #<!-- -->85223 to expose _BitInt literal suffixes as an extension in C++ as `__wb`. There is a new Extension warning, and the tests are essentially the same as the existing _BitInt literal tests for C but with a few additional cases. 


---
Full diff: https://github.com/llvm/llvm-project/pull/86586.diff


11 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+1) 
- (modified) clang/include/clang/Basic/DiagnosticCommonKinds.td (+3) 
- (modified) clang/include/clang/Basic/DiagnosticGroups.td (+2) 
- (modified) clang/include/clang/Basic/DiagnosticParseKinds.td (+1-1) 
- (modified) clang/include/clang/Lex/LiteralSupport.h (+1-1) 
- (modified) clang/lib/Lex/LiteralSupport.cpp (+28-7) 
- (modified) clang/lib/Lex/PPExpressions.cpp (+7-6) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+4-4) 
- (added) clang/test/AST/bitint-suffix.cpp (+32) 
- (modified) clang/test/Lexer/bitint-constants-compat.c (+10-1) 
- (added) clang/test/Lexer/bitint-constants.cpp (+172) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7fbe2fec6ca065..15164ef5b68b5e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -88,6 +88,7 @@ sections with improvements to Clang's support for those languages.
 
 C++ Language Changes
 --------------------
+- Implemented _BitInt literal suffixes as ``__wb`` or ``__WB`` with unsigned modifiers also allowed. (#GH85223).
 
 C++20 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index a52bf62e24202c..5466e1472fbf5c 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -234,6 +234,9 @@ def err_cxx23_size_t_suffix: Error<
 def err_size_t_literal_too_large: Error<
   "%select{signed |}0'size_t' literal is out of range of possible "
   "%select{signed |}0'size_t' values">;
+def ext_cpp_bitint_suffix : Extension<
+  "'_BitInt' suffix for literals is a Clang extension">,
+  InGroup<BitIntExtension>;
 def ext_c23_bitint_suffix : ExtWarn<
   "'_BitInt' suffix for literals is a C23 extension">,
   InGroup<C23>;
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 44035e2fd16f2e..e38c8f009efe79 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1516,3 +1516,5 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon
 // Warnings and notes InstallAPI verification.
 def InstallAPIViolation : DiagGroup<"installapi-violation">;
 
+// Warnings related to _BitInt extension 
+def BitIntExtension : DiagGroup<"bit-int-extension">;
\ No newline at end of file
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 46a44418a3153b..6759f923564adf 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1646,7 +1646,7 @@ def warn_ext_int_deprecated : Warning<
   "'_ExtInt' is deprecated; use '_BitInt' instead">, InGroup<DeprecatedType>;
 def ext_bit_int : Extension<
   "'_BitInt' in %select{C17 and earlier|C++}0 is a Clang extension">,
-  InGroup<DiagGroup<"bit-int-extension">>;
+  InGroup<BitIntExtension>;
 } // end of Parse Issue category.
 
 let CategoryName = "Modules Issue" in {
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 643ddbdad8c87d..4e7e6c77003049 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -80,7 +80,7 @@ class NumericLiteralParser {
   bool isFloat128 : 1;      // 1.0q
   bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
   bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
-  bool isBitInt : 1;        // 1wb, 1uwb (C23)
+  bool isBitInt : 1;        // 1wb, 1uwb (C23) or 1__wb, 1__uwb (C++)
   uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
 
 
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 438c6d772e6e04..4d605fd88e81db 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -974,6 +974,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   bool isFixedPointConstant = isFixedPointLiteral();
   bool isFPConstant = isFloatingLiteral();
   bool HasSize = false;
+  bool possibleBitInt = false;
 
   // Loop over all of the characters of the suffix.  If we see something bad,
   // we break out of the loop.
@@ -1117,6 +1118,26 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       if (isImaginary) break;   // Cannot be repeated.
       isImaginary = true;
       continue;  // Success.
+    case '_':
+      if (isFPConstant)
+        break; // Invalid for floats
+      if (HasSize)
+        break;
+      if (possibleBitInt)
+        break; // Cannot be repeated.
+      if (LangOpts.CPlusPlus && s[1] == '_') {
+        // Scan ahead to find possible rest of BitInt suffix
+        for (const char *c = s; c != ThisTokEnd; ++c) {
+          if (*c == 'w' || *c == 'W') {
+            possibleBitInt = true;
+            ++s;
+            break;
+          }
+        }
+        if (possibleBitInt)
+          continue;
+      }
+      break;
     case 'w':
     case 'W':
       if (isFPConstant)
@@ -1124,12 +1145,10 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       if (HasSize)
         break; // Invalid if we already have a size for the literal.
 
-      // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
-      // explicitly do not support the suffix in C++ as an extension because a
-      // library-based UDL that resolves to a library type may be more
-      // appropriate there.
-      if (!LangOpts.CPlusPlus && ((s[0] == 'w' && s[1] == 'b') ||
-          (s[0] == 'W' && s[1] == 'B'))) {
+      // wb and WB are allowed, but a mixture of cases like Wb or wB is not.
+      // The same rules apply for __wb/__WB.
+      if ((!LangOpts.CPlusPlus || possibleBitInt) &&
+          ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {
         isBitInt = true;
         HasSize = true;
         ++s; // Skip both characters (2nd char skipped on continue).
@@ -1241,7 +1260,9 @@ bool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
     return false;
 
   // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
-  if (Suffix[0] == '_')
+  // Suffixes starting with '__' (double underscore) are for use by
+  // implementation.
+  if (Suffix[0] == '_' && Suffix[1] != '_')
     return true;
 
   // In C++11, there are no library suffixes.
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 8f25c67ec9dfbe..5ec68228b2b801 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -333,13 +333,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
                                  : diag::ext_cxx23_size_t_suffix
                            : diag::err_cxx23_size_t_suffix);
 
-    // 'wb/uwb' literals are a C23 feature. We explicitly do not support the
-    // suffix in C++ as an extension because a library-based UDL that resolves
-    // to a library type may be more appropriate there.
+    // 'wb/uwb' literals are a C23 feature.
+    // '__wb/__uwb' are a C++ extension.
     if (Literal.isBitInt)
-      PP.Diag(PeekTok, PP.getLangOpts().C23
-                           ? diag::warn_c23_compat_bitint_suffix
-                           : diag::ext_c23_bitint_suffix);
+      PP.Diag(PeekTok, !PP.getLangOpts().CPlusPlus
+                           ? PP.getLangOpts().C23
+                                 ? diag::warn_c23_compat_bitint_suffix
+                                 : diag::ext_c23_bitint_suffix
+                           : diag::ext_cpp_bitint_suffix);
 
     // Parse the integer literal into Result.
     if (Literal.GetIntegerValue(Result.Val)) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5f03b981428251..8b350e04e7c2d2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4161,14 +4161,14 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
                                         : diag::ext_cxx23_size_t_suffix
                                   : diag::err_cxx23_size_t_suffix);
 
-    // 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++,
-    // but we do not currently support the suffix in C++ mode because it's not
-    // entirely clear whether WG21 will prefer this suffix to return a library
-    // type such as std::bit_int instead of returning a _BitInt.
+    // 'wb/uwb' literals are a C23 feature.
+    // '__wb/__uwb' literals are a C++ extension.
     if (Literal.isBitInt && !getLangOpts().CPlusPlus)
       PP.Diag(Tok.getLocation(), getLangOpts().C23
                                      ? diag::warn_c23_compat_bitint_suffix
                                      : diag::ext_c23_bitint_suffix);
+    else if (Literal.isBitInt)
+      PP.Diag(Tok.getLocation(), diag::ext_cpp_bitint_suffix);
 
     // Get the value in the widest-possible width. What is "widest" depends on
     // whether the literal is a bit-precise integer or not. For a bit-precise
diff --git a/clang/test/AST/bitint-suffix.cpp b/clang/test/AST/bitint-suffix.cpp
new file mode 100644
index 00000000000000..081bb67409abdf
--- /dev/null
+++ b/clang/test/AST/bitint-suffix.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void ()'
+void func() {
+  // Ensure that we calculate the correct type from the literal suffix.
+
+  // Note: 0__wb should create an _BitInt(2) because a signed bit-precise
+  // integer requires one bit for the sign and one bit for the value,
+  // at a minimum.
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 zero_wb 'typeof (0wb)':'_BitInt(2)'
+  typedef __typeof__(0__wb) zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+  typedef __typeof__(-0__wb) neg_zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 one_wb 'typeof (1wb)':'_BitInt(2)'
+  typedef __typeof__(1__wb) one_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+  typedef __typeof__(-1__wb) neg_one_wb;
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(0__uwb) zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:31> col:31 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(-0__uwb) neg_zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(1__uwb) one_uwb;
+
+  // Try a value that is too large to fit in [u]intmax_t.
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:49> col:49 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+  typedef __typeof__(18446744073709551616__uwb) huge_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:48> col:48 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+  typedef __typeof__(18446744073709551616__wb) huge_wb;
+}
\ No newline at end of file
diff --git a/clang/test/Lexer/bitint-constants-compat.c b/clang/test/Lexer/bitint-constants-compat.c
index 607ae88a6188bb..d8bff94ef88caa 100644
--- a/clang/test/Lexer/bitint-constants-compat.c
+++ b/clang/test/Lexer/bitint-constants-compat.c
@@ -1,14 +1,23 @@
 // RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
 // RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
-// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wbit-int-extension -Wno-unused -x c++ %s
 
 #if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
                                compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
                                cpp-error {{invalid suffix 'uwb' on integer constant}}
 #endif
 
+#if 18446744073709551615__uwb // ext-error {{invalid suffix '__uwb' on integer constant}} \
+                               compat-error {{invalid suffix '__uwb' on integer constant}} \
+                               cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
+#endif
+
 void func(void) {
   18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C23 extension}} \
                              compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C23}} \
                              cpp-error {{invalid suffix 'wb' on integer constant}}
+
+  18446744073709551615__wb; // ext-error {{invalid suffix '__wb' on integer constant}} \
+                             compat-error {{invalid suffix '__wb' on integer constant}} \
+                             cpp-warning {{'_BitInt' suffix for literals is a Clang extension}}
 }
diff --git a/clang/test/Lexer/bitint-constants.cpp b/clang/test/Lexer/bitint-constants.cpp
new file mode 100644
index 00000000000000..da89c5d30a13eb
--- /dev/null
+++ b/clang/test/Lexer/bitint-constants.cpp
@@ -0,0 +1,172 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1__wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1__uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1__wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1__uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615__uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616__wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616__uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615__uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615__wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616__uwb;
+
+void ValidSuffix(void) {
+  // Decimal literals.
+  1__wb;
+  1__WB;
+  -1__wb;
+  _Static_assert((int)1__wb == 1, "not 1?");
+  _Static_assert((int)-1__wb == -1, "not -1?");
+
+  1__uwb;
+  1__uWB;
+  1__Uwb;
+  1__UWB;
+  1u__wb;
+  1__WBu;
+  1U__WB;
+  _Static_assert((unsigned int)1__uwb == 1u, "not 1?");
+
+  1'2__wb;
+  1'2__uwb;
+  _Static_assert((int)1'2__wb == 12, "not 12?");
+  _Static_assert((unsigned int)1'2__uwb == 12u, "not 12?");
+
+  // Hexadecimal literals.
+  0x1__wb;
+  0x1__uwb;
+  0x0'1'2'3__wb;
+  0xA'B'c'd__uwb;
+  _Static_assert((int)0x0'1'2'3__wb == 0x0123, "not 0x0123");
+  _Static_assert((unsigned int)0xA'B'c'd__uwb == 0xABCDu, "not 0xABCD");
+
+  // Binary literals.
+  0b1__wb;
+  0b1__uwb;
+  0b1'0'1'0'0'1__wb;
+  0b0'1'0'1'1'0__uwb;
+  _Static_assert((int)0b1__wb == 1, "not 1?");
+  _Static_assert((unsigned int)0b1__uwb == 1u, "not 1?");
+
+  // Octal literals.
+  01__wb;
+  01__uwb;
+  0'6'0__wb;
+  0'0'1__uwb;
+  0__wbu;
+  0__WBu;
+  0U__wb;
+  0U__WB;
+  0__wb;
+  _Static_assert((int)0__wb == 0, "not 0?");
+  _Static_assert((unsigned int)0__wbu == 0u, "not 0?");
+
+  // Imaginary or Complex. These are allowed because _Complex can work with any
+  // integer type, and that includes _BitInt.
+  1__iwb;
+  1i__wb;
+  1__wbj;
+}
+
+void InvalidSuffix(void) {
+  // Can't mix the case of wb or WB, and can't rearrange the letters.
+  0__wB; // expected-error {{invalid suffix '__wB' on integer constant}}
+  0__Wb; // expected-error {{invalid suffix '__Wb' on integer constant}}
+  0__bw; // expected-error {{invalid suffix '__bw' on integer constant}}
+  0__BW; // expected-error {{invalid suffix '__BW' on integer constant}}
+
+  // Trailing digit separators should still diagnose.
+  1'2'__wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+  1'2'__uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+  // Long.
+  1l__wb; // expected-error {{invalid suffix}}
+  1__wbl; // expected-error {{invalid suffix}}
+  1l__uwb; // expected-error {{invalid suffix}}
+  1__l; // expected-error {{invalid suffix}}
+  1ul__wb;  // expected-error {{invalid suffix}}
+
+  // Long long.
+  1ll__wb; // expected-error {{invalid suffix}}
+  1__uwbll; // expected-error {{invalid suffix}}
+
+  // Floating point.
+  0.1__wb;   // expected-error {{invalid suffix}}
+  0.1f__wb;   // expected-error {{invalid suffix}}
+
+  // Repetitive suffix.
+  1__wb__wb; // expected-error {{invalid suffix}}
+  1__uwbuwb; // expected-error {{invalid suffix}}
+  1__wbuwb; // expected-error {{invalid suffix}}
+  1__uwbwb; // expected-error {{invalid suffix}}
+
+  // Missing or extra characters in suffix.
+  1__; // expected-error {{invalid suffix}}
+  1___; // expected-error {{invalid suffix}}
+  1___WB; // expected-error {{invalid suffix}}
+  1__wb__; // expected-error {{invalid suffix}}
+  1__w; // expected-error {{invalid suffix}}
+  1__b; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+  // This is a valid suffix, but the value is larger than one that fits within
+  // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+  // test cases should pick a new value that can't be represented by a _BitInt,
+  // but also add a test case that a 129-bit literal still behaves as-expected.
+  _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+	             "Need to pick a bigger constant for the test case below.");
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1__uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}
+
+void TestTypes(void) {
+  // 2 value bits, one sign bit
+  _Static_assert(__is_same(decltype(3__wb), _BitInt(3)));
+  // 2 value bits, one sign bit
+  _Static_assert(__is_same(decltype(-3__wb), _BitInt(3)));
+  // 2 value bits, no sign bit
+  _Static_assert(__is_same(decltype(3__uwb), unsigned _BitInt(2)));
+  // 4 value bits, one sign bit
+  _Static_assert(__is_same(decltype(0xF__wb), _BitInt(5)));
+  // 4 value bits, one sign bit
+  _Static_assert(__is_same(decltype(-0xF__wb), _BitInt(5)));
+  // 4 value bits, no sign bit
+  _Static_assert(__is_same(decltype(0xF__uwb), unsigned _BitInt(4)));
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/86586


More information about the cfe-commits mailing list