[clang] 88501dc - [Sema] -Wformat: support C23 format specifier %b %B

Fangrui Song via cfe-commits cfe-commits at lists.llvm.org
Thu Aug 4 10:26:36 PDT 2022


Author: Fangrui Song
Date: 2022-08-04T10:26:31-07:00
New Revision: 88501dc74911b00186298fe1fffe8dfb1f09b1c1

URL: https://github.com/llvm/llvm-project/commit/88501dc74911b00186298fe1fffe8dfb1f09b1c1
DIFF: https://github.com/llvm/llvm-project/commit/88501dc74911b00186298fe1fffe8dfb1f09b1c1.diff

LOG: [Sema] -Wformat: support C23 format specifier %b %B

Close #56885: WG14 N2630 added %b to fprintf/fscanf and recommended %B for
fprintf. This patch teaches -Wformat %b for the printf/scanf family of functions
and %B for the printf family of functions.

glibc 2.35 and latest Android bionic added %b/%B printf support. From
https://www.openwall.com/lists/libc-coord/2022/07/ no scanf support is available
yet.

Like GCC, we don't test library support.

GCC 12 -Wformat -pedantic emits a warning:

> warning: ISO C17 does not support the ‘%b’ gnu_printf format [-Wformat=]

The behavior is not ported.

Note: `freebsd_kernel_printf` uses %b differently.

Reviewed By: aaron.ballman, dim, enh

Differential Revision: https://reviews.llvm.org/D131057

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/AST/FormatString.h
    clang/lib/AST/FormatString.cpp
    clang/lib/AST/PrintfFormatString.cpp
    clang/lib/AST/ScanfFormatString.cpp
    clang/test/Sema/format-strings-fixit.c
    clang/test/Sema/format-strings-scanf.c
    clang/test/Sema/format-strings.c
    clang/test/SemaObjC/format-strings-objc.m

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 85d360eb9cdac..b9729a6a047f0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -69,6 +69,9 @@ Improvements to Clang's diagnostics
   `Issue 50055: <https://github.com/llvm/llvm-project/issues/50055>`_.
 - Clang will now check compile-time determinable string literals as format strings.
   This fixes `Issue 55805: <https://github.com/llvm/llvm-project/issues/55805>`_.
+- ``-Wformat`` now recognizes ``%b`` for the ``printf``/``scanf`` family of
+  functions and ``%B`` for the ``printf`` family of functions. Fixes
+  `Issue 56885: <https://github.com/llvm/llvm-project/issues/56885>`_.
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------

diff  --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index d7933382f13d6..0f1f6b1340ff1 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -127,8 +127,12 @@ class ConversionSpecifier {
     dArg,
     DArg, // Apple extension
     iArg,
+    // C23 conversion specifiers.
+    bArg,
+    BArg,
+
     IntArgBeg = dArg,
-    IntArgEnd = iArg,
+    IntArgEnd = BArg,
 
     oArg,
     OArg, // Apple extension

diff  --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index 1a1dddcbacfd9..1a763434c7561 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -624,6 +624,8 @@ analyze_format_string::LengthModifier::toString() const {
 
 const char *ConversionSpecifier::toString() const {
   switch (kind) {
+  case bArg: return "b";
+  case BArg: return "B";
   case dArg: return "d";
   case DArg: return "D";
   case iArg: return "i";
@@ -753,6 +755,8 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target,
     case LengthModifier::AsSizeT:
     case LengthModifier::AsPtrDiff:
       switch (CS.getKind()) {
+        case ConversionSpecifier::bArg:
+        case ConversionSpecifier::BArg:
         case ConversionSpecifier::dArg:
         case ConversionSpecifier::DArg:
         case ConversionSpecifier::iArg:
@@ -908,6 +912,8 @@ bool FormatSpecifier::hasStandardLengthModifier() const {
 bool FormatSpecifier::hasStandardConversionSpecifier(
     const LangOptions &LangOpt) const {
   switch (CS.getKind()) {
+    case ConversionSpecifier::bArg:
+    case ConversionSpecifier::BArg:
     case ConversionSpecifier::cArg:
     case ConversionSpecifier::dArg:
     case ConversionSpecifier::iArg:

diff  --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index d972c120ebac5..4f5fbdc9da25e 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -326,6 +326,14 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
     case 's': k = ConversionSpecifier::sArg; break;
     case 'u': k = ConversionSpecifier::uArg; break;
     case 'x': k = ConversionSpecifier::xArg; break;
+    // C23.
+    case 'b':
+      if (isFreeBSDKPrintf)
+        k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
+      else
+        k = ConversionSpecifier::bArg;
+      break;
+    case 'B': k = ConversionSpecifier::BArg; break;
     // POSIX specific.
     case 'C': k = ConversionSpecifier::CArg; break;
     case 'S': k = ConversionSpecifier::SArg; break;
@@ -337,11 +345,6 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
     case '@': k = ConversionSpecifier::ObjCObjArg; break;
     // Glibc specific.
     case 'm': k = ConversionSpecifier::PrintErrno; break;
-    // FreeBSD kernel specific.
-    case 'b':
-      if (isFreeBSDKPrintf)
-        k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
-      break;
     case 'r':
       if (isFreeBSDKPrintf)
         k = ConversionSpecifier::FreeBSDrArg; // int
@@ -961,8 +964,10 @@ bool PrintfSpecifier::hasValidAlternativeForm() const {
   if (!HasAlternativeForm)
     return true;
 
-  // Alternate form flag only valid with the oxXaAeEfFgG conversions
+  // Alternate form flag only valid with the bBoxXaAeEfFgG conversions
   switch (CS.getKind()) {
+  case ConversionSpecifier::bArg:
+  case ConversionSpecifier::BArg:
   case ConversionSpecifier::oArg:
   case ConversionSpecifier::OArg:
   case ConversionSpecifier::xArg:
@@ -988,8 +993,10 @@ bool PrintfSpecifier::hasValidLeadingZeros() const {
   if (!HasLeadingZeroes)
     return true;
 
-  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
+  // Leading zeroes flag only valid with the bBdiouxXaAeEfFgG conversions
   switch (CS.getKind()) {
+  case ConversionSpecifier::bArg:
+  case ConversionSpecifier::BArg:
   case ConversionSpecifier::dArg:
   case ConversionSpecifier::DArg:
   case ConversionSpecifier::iArg:
@@ -1080,8 +1087,10 @@ bool PrintfSpecifier::hasValidPrecision() const {
   if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
     return true;
 
-  // Precision is only valid with the diouxXaAeEfFgGsP conversions
+  // Precision is only valid with the bBdiouxXaAeEfFgGsP conversions
   switch (CS.getKind()) {
+  case ConversionSpecifier::bArg:
+  case ConversionSpecifier::BArg:
   case ConversionSpecifier::dArg:
   case ConversionSpecifier::DArg:
   case ConversionSpecifier::iArg:

diff  --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index 7679f02e04abd..73454a2c554e6 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -161,6 +161,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
     default:
       break;
     case '%': k = ConversionSpecifier::PercentArg;   break;
+    case 'b': k = ConversionSpecifier::bArg; break;
     case 'A': k = ConversionSpecifier::AArg; break;
     case 'E': k = ConversionSpecifier::EArg; break;
     case 'F': k = ConversionSpecifier::FArg; break;
@@ -267,6 +268,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
       llvm_unreachable("Unsupported LengthModifier Type");
 
     // Unsigned int.
+    case ConversionSpecifier::bArg:
     case ConversionSpecifier::oArg:
     case ConversionSpecifier::OArg:
     case ConversionSpecifier::uArg:

diff  --git a/clang/test/Sema/format-strings-fixit.c b/clang/test/Sema/format-strings-fixit.c
index e6d638b75a71d..64614e1c53fcd 100644
--- a/clang/test/Sema/format-strings-fixit.c
+++ b/clang/test/Sema/format-strings-fixit.c
@@ -21,6 +21,7 @@ void test(void) {
   printf("%s", (int) 123);
   printf("abc%0f", "testing testing 123");
   printf("%u", (long) -12);
+  printf("%b", (long) -13);
   printf("%p", 123);
   printf("%c\n", "x");
   printf("%c\n", 1.23);
@@ -160,6 +161,7 @@ void test2(int intSAParm[static 2]) {
   scanf("%f", (my_int_type*)&intVar);
 
   // Preserve the original formatting.
+  scanf("%b", &longVar);
   scanf("%o", &longVar);
   scanf("%u", &longVar);
   scanf("%x", &longVar);
@@ -179,6 +181,7 @@ void test2(int intSAParm[static 2]) {
 // CHECK: printf("%d", (int) 123);
 // CHECK: printf("abc%s", "testing testing 123");
 // CHECK: printf("%ld", (long) -12);
+// CHECK: printf("%ld", (long) -13);
 // CHECK: printf("%d", 123);
 // CHECK: printf("%s\n", "x");
 // CHECK: printf("%f\n", 1.23);
@@ -246,6 +249,7 @@ void test2(int intSAParm[static 2]) {
 // CHECK: scanf("%ju", (my_uintmax_type*)&uIntmaxVar);
 // CHECK: scanf("%td", (my_ptr
diff _type*)&ptr
diff Var);
 // CHECK: scanf("%d", (my_int_type*)&intVar);
+// CHECK: scanf("%ld", &longVar);
 // CHECK: scanf("%lo", &longVar);
 // CHECK: scanf("%lu", &longVar);
 // CHECK: scanf("%lx", &longVar);

diff  --git a/clang/test/Sema/format-strings-scanf.c b/clang/test/Sema/format-strings-scanf.c
index 7b864bfdd237e..d6e403148a5c4 100644
--- a/clang/test/Sema/format-strings-scanf.c
+++ b/clang/test/Sema/format-strings-scanf.c
@@ -40,9 +40,11 @@ void test(const char *s, int *i) {
   scanf("%0d", i); // expected-warning{{zero field width in scanf format string is unused}}
   scanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}}
   scanf("%d%[asdfasdfd", i, s); // expected-warning{{no closing ']' for '%[' in scanf format string}}
+  scanf("%B", i); // expected-warning{{invalid conversion specifier 'B'}}
 
   unsigned short s_x;
   scanf ("%" "hu" "\n", &s_x); // no-warning
+  scanf("%hb", &s_x);
   scanf("%y", i); // expected-warning{{invalid conversion specifier 'y'}}
   scanf("%%"); // no-warning
   scanf("%%%1$d", i); // no-warning
@@ -57,6 +59,7 @@ void test(const char *s, int *i) {
   scanf("%s", (signed char*)0); // no-warning
   scanf("%s", (unsigned char*)0); // no-warning
   scanf("%hhu", (signed char*)0); // no-warning
+  scanf("%hhb", (signed char*)0); // no-warning
 }
 
 void bad_length_modifiers(char *s, void *p, wchar_t *ws, long double *ld) {
@@ -193,6 +196,7 @@ void test_qualifiers(const int *cip, volatile int* vip,
 void test_size_types(void) {
   size_t s = 0;
   scanf("%zu", &s); // No warning.
+  scanf("%zb", &s);
 
   double d1 = 0.;
   scanf("%zu", &d1); // expected-warning-re{{format specifies type 'size_t *' (aka '{{.+}}') but the argument has type 'double *'}}
@@ -213,6 +217,7 @@ void test_size_types(void) {
 void test_ptr
diff _t_types(void) {
   __UNSIGNED_PTRDIFF_TYPE__ p1 = 0;
   scanf("%tu", &p1); // No warning.
+  scanf("%tb", &p1);
 
   double d1 = 0.;
   scanf("%tu", &d1); // expected-warning-re{{format specifies type 'unsigned ptr
diff _t *' (aka '{{.+}}') but the argument has type 'double *'}}

diff  --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index 85052d57fc47c..919bd74de392f 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -199,7 +199,7 @@ void check_writeback_specifier(void)
 
 void check_invalid_specifier(FILE* fp, char *buf)
 {
-  printf("%s%lb%d","unix",10,20); // expected-warning {{invalid conversion specifier 'b'}} expected-warning {{data argument not used by format string}}
+  printf("%s%lv%d","unix",10,20); // expected-warning {{invalid conversion specifier 'v'}} expected-warning {{data argument not used by format string}}
   fprintf(fp,"%%%l"); // expected-warning {{incomplete format specifier}}
   sprintf(buf,"%%%%%ld%d%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
   snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} expected-warning {{invalid conversion specifier ';'}} expected-warning {{data argument not used by format string}}
@@ -304,6 +304,7 @@ void test10(int x, float f, int i, long long lli) {
   printf("%qp", (void *)0); // expected-warning{{length modifier 'q' results in undefined behavior or no effect with 'p' conversion specifier}}
   printf("hhX %hhX", (unsigned char)10); // no-warning
   printf("llX %llX", (long long) 10); // no-warning
+  printf("%llb %llB", (long long) 10, (long long) 10); // no-warning
   // This is fine, because there is an implicit conversion to an int.
   printf("%d", (unsigned char) 10); // no-warning
   printf("%d", (long long) 10); // expected-warning{{format specifies type 'int' but the argument has type 'long long'}}
@@ -429,6 +430,7 @@ void bug7377_bad_length_mod_usage(void) {
   // Bad flag usage
   printf("%#p", (void *) 0); // expected-warning{{flag '#' results in undefined behavior with 'p' conversion specifier}}
   printf("%0d", -1); // no-warning
+  printf("%0b%0B", -1u, -1u); // no-warning
   printf("%-p", (void *) 0); // no-warning
 #if !defined(__ANDROID__) && !defined(__Fuchsia__)
   printf("%#n", (int *) 0); // expected-warning{{flag '#' results in undefined behavior with 'n' conversion specifier}}
@@ -499,6 +501,7 @@ void rdar8332221(va_list ap, int *x, long *y) {
 void pr8641(void) {
   printf("%#x\n", 10);
   printf("%#X\n", 10);
+  printf("%#b %#15.8B\n", 10, 10u);
 }
 
 void posix_extensions(void) {
@@ -508,6 +511,8 @@ void posix_extensions(void) {
   printf("%'i\n", 123456789); // no-warning
   printf("%'f\n", (float) 1.0); // no-warning
   printf("%'p\n", (void*) 0); // expected-warning{{results in undefined behavior with 'p' conversion specifier}}
+  printf("%'b\n", 123456789); // expected-warning{{results in undefined behavior with 'b' conversion specifier}}
+  printf("%'B\n", 123456789); // expected-warning{{results in undefined behavior with 'B' conversion specifier}}
 }
 
 // PR8486
@@ -540,6 +545,7 @@ void check_char(unsigned char x, signed char y) {
   printf("%hhi", x); // no-warning
   printf("%c", x); // no-warning
   printf("%hhu", y); // no-warning
+  printf("%hhb %hhB", x, x); // no-warning
 }
 
 // Test suppression of individual warnings.

diff  --git a/clang/test/SemaObjC/format-strings-objc.m b/clang/test/SemaObjC/format-strings-objc.m
index 71f22c07e2323..51638f9fa4a08 100644
--- a/clang/test/SemaObjC/format-strings-objc.m
+++ b/clang/test/SemaObjC/format-strings-objc.m
@@ -56,7 +56,7 @@ +(instancetype)stringWithFormat:(NSAttributedString *)fmt, ...
 
 void check_nslog(unsigned k) {
   NSLog(@"%d%%", k); // no-warning
-  NSLog(@"%s%lb%d", "unix", 10, 20); // expected-warning {{invalid conversion specifier 'b'}} expected-warning {{data argument not used by format string}}
+  NSLog(@"%s%lv%d", "unix", 10, 20); // expected-warning {{invalid conversion specifier 'v'}} expected-warning {{data argument not used by format string}}
 }
 
 // Check type validation


        


More information about the cfe-commits mailing list