[libc] [llvm] [clang-tools-extra] [libcxx] [compiler-rt] [lld] [libunwind] [mlir] [clang] [lldb] [flang] Fix clang to recognize new C23 modifiers %w and %wf when printing and scanning (PR #71771)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 23:49:26 PST 2023


https://github.com/ZijunZhaoCCK updated https://github.com/llvm/llvm-project/pull/71771

>From 06c4cf02dfb4b20c8349c5f3c7209276f6d56edf Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Thu, 9 Nov 2023 02:21:46 +0000
Subject: [PATCH 1/5] Fix clang to recognize new C23 modifiers %w and %wf when
 printing

---
 clang/include/clang/AST/FormatString.h | 16 +++++++-
 clang/lib/AST/FormatString.cpp         | 52 +++++++++++++++++++++++++-
 clang/lib/AST/PrintfFormatString.cpp   | 19 ++++++++++
 clang/test/Sema/format-strings-ms.c    | 28 ++++++++++++++
 4 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5c4ad9baaef60..6a886854650f1 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -81,8 +81,10 @@ class LengthModifier {
     AsLongDouble, // 'L'
     AsAllocate,   // for '%as', GNU extension to C90 scanf
     AsMAllocate,  // for '%ms', GNU extension to scanf
-    AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
-    AsWideChar = AsLong // for '%ls', only makes sense for printf
+    AsWide,       // 'w' (1. MSVCRT, like l but only for c, C, s, S, or Z on windows
+                  // 2. for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+    AsWideFast,   // 'wf' (for b, d, i, o, u, x, or X)
+    AsWideChar = AsLong, // for '%ls', only makes sense for printf
   };
 
   LengthModifier()
@@ -417,6 +419,7 @@ class FormatSpecifier {
   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
   bool UsesPositionalArg;
   unsigned argIndex;
+  unsigned size;
 public:
   FormatSpecifier(bool isPrintf)
     : CS(isPrintf), VectorNumElts(false),
@@ -460,6 +463,15 @@ class FormatSpecifier {
     FieldWidth = Amt;
   }
 
+  void setSize(unsigned s) {
+    size = s;
+  }
+
+  unsigned getSize() const {
+    return size;
+  }
+
+
   bool usesPositionalArg() const { return UsesPositionalArg; }
 
   bool hasValidLengthModifier(const TargetInfo &Target,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index e0c9e18cfe3a2..ebc136e780717 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -286,7 +286,33 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
       lmKind = LengthModifier::AsInt3264;
       break;
     case 'w':
-      lmKind = LengthModifier::AsWide; ++I; break;
+      ++I;
+      if (I == E) return false;
+      if (*I == 'f') {
+        lmKind = LengthModifier::AsWideFast;
+        ++I;
+      } else {
+        lmKind = LengthModifier::AsWide;
+      }
+
+      if (I == E) return false;
+      int s = 0;
+      while (unsigned(*I - '0') <= 9) {
+        s = 10 * s + unsigned(*I - '0');
+        ++I;
+      }
+
+      // s == 0 is MSVCRT case, like l but only for c, C, s, S, or Z on windows
+      // s != 0 for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+      if (s != 0) {
+        std::set<int> supported_list {8, 16, 32, 64};
+        if (supported_list.count(s) == 0) {
+          return false;
+        }
+        FS.setSize(s);
+      }
+
+      break;
   }
   LengthModifier lm(lmPosition, lmKind);
   FS.setLengthModifier(lm);
@@ -703,6 +729,8 @@ analyze_format_string::LengthModifier::toString() const {
     return "m";
   case AsWide:
     return "w";
+  case AsWideFast:
+    return "wf";
   case None:
     return "";
   }
@@ -970,6 +998,27 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target,
         case ConversionSpecifier::SArg:
         case ConversionSpecifier::ZArg:
           return Target.getTriple().isOSMSVCRT();
+        case ConversionSpecifier::bArg:
+        case ConversionSpecifier::dArg:
+        case ConversionSpecifier::iArg:
+        case ConversionSpecifier::oArg:
+        case ConversionSpecifier::uArg:
+        case ConversionSpecifier::xArg:
+        case ConversionSpecifier::XArg:
+          return true;
+        default:
+          return false;
+      }
+    case LengthModifier::AsWideFast:
+      switch (CS.getKind()) {
+        case ConversionSpecifier::bArg:
+        case ConversionSpecifier::dArg:
+        case ConversionSpecifier::iArg:
+        case ConversionSpecifier::oArg:
+        case ConversionSpecifier::uArg:
+        case ConversionSpecifier::xArg:
+        case ConversionSpecifier::XArg:
+          return true;
         default:
           return false;
       }
@@ -996,6 +1045,7 @@ bool FormatSpecifier::hasStandardLengthModifier() const {
     case LengthModifier::AsInt3264:
     case LengthModifier::AsInt64:
     case LengthModifier::AsWide:
+    case LengthModifier::AsWideFast:
     case LengthModifier::AsShortLong: // ???
       return false;
   }
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index f0b9d0ecaf234..4b9111e8bcf50 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -537,7 +537,16 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
             ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
       case LengthModifier::AsAllocate:
       case LengthModifier::AsMAllocate:
+        return ArgType::Invalid();
       case LengthModifier::AsWide:
+      case LengthModifier::AsWideFast:
+        int s = getSize();
+        bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.LongLongTy : Ctx.IntTy;
+        if (s == 8) return Ctx.CharTy;
+        if (s == 16) return fast? fastType : Ctx.ShortTy;
+        if (s == 32) return fast? fastType : Ctx.IntTy;
+        if (s == 64) return Ctx.LongLongTy;
         return ArgType::Invalid();
     }
 
@@ -572,7 +581,16 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
             ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
       case LengthModifier::AsAllocate:
       case LengthModifier::AsMAllocate:
+        return ArgType::Invalid();
       case LengthModifier::AsWide:
+      case LengthModifier::AsWideFast:
+        int s = getSize();
+        bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.UnsignedLongLongTy : Ctx.UnsignedIntTy;
+        if (s == 8) return Ctx.UnsignedCharTy;
+        if (s == 16) return fast? fastType : Ctx.UnsignedShortTy;
+        if (s == 32) return fast? fastType : Ctx.UnsignedIntTy;
+        if (s == 64) return Ctx.UnsignedLongLongTy;
         return ArgType::Invalid();
     }
 
@@ -621,6 +639,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
       case LengthModifier::AsInt3264:
       case LengthModifier::AsInt64:
       case LengthModifier::AsWide:
+      case LengthModifier::AsWideFast:
         return ArgType::Invalid();
       case LengthModifier::AsShortLong:
         llvm_unreachable("only used for OpenCL which doesn not handle nArg");
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index 697032673d4e7..59ae930bb734e 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -Wformat-non-iso -DNON_ISO_WARNING %s
 
+#include <stdint.h>
 int printf(const char *format, ...) __attribute__((format(printf, 1, 2)));
 int scanf(const char * restrict, ...) ;
 typedef unsigned short wchar_t;
@@ -85,4 +86,31 @@ void z_test(void *p) {
   scanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}}
 }
 
+void w_int_test(void) {
+  int8_t a = 0b101;
+  int16_t b = 2;
+  uint32_t c = 123;
+  int64_t d = 0x3b;
+
+  // for %w
+  printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
+  printf("%w16i", b);
+  printf("%w32u", c);
+  printf("%w64x", d);
+
+}
+
+void wf_test(void) {
+  int_fast8_t a = 0b101;
+  uint_fast16_t b = 2;
+  int_fast32_t c = 021;
+  int_fast64_t d = 0x3a;
+
+  // for %wf
+  printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
+  printf("%wf16u", b);
+  printf("%wf32o", c);
+  printf("%wf64X", d);
+}
+
 #endif

>From 2ec84a88a9ba6e4576a855b419dd6bafa9f3d721 Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Tue, 14 Nov 2023 00:38:41 +0000
Subject: [PATCH 2/5] Update code and tests. %w and %wf are available in
 printf() and scanf()

---
 clang/include/clang/AST/FormatString.h |  4 ++-
 clang/lib/AST/PrintfFormatString.cpp   | 34 +++++++++++++++++---------
 clang/lib/AST/ScanfFormatString.cpp    | 14 +++++++++--
 clang/test/Sema/format-strings-ms.c    | 25 ++++++++++++-------
 4 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 6a886854650f1..2e48a8ddfde4d 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -471,7 +471,6 @@ class FormatSpecifier {
     return size;
   }
 
-
   bool usesPositionalArg() const { return UsesPositionalArg; }
 
   bool hasValidLengthModifier(const TargetInfo &Target,
@@ -792,6 +791,9 @@ bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
                                               const LangOptions &LO,
                                               const TargetInfo &Target);
 
+ArgType wToArgType(int size, bool fast, ASTContext &C);
+ArgType wToArgTypeUnsigned(int size, bool fast, ASTContext &C);
+
 } // end analyze_format_string namespace
 } // end clang namespace
 #endif
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 4b9111e8bcf50..e6e47403198af 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -484,6 +484,26 @@ bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
   return false;
 }
 
+ArgType clang::analyze_format_string::wToArgType(
+    int size, bool fast, ASTContext &C) {
+  ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
+  if (size == 8) return C.CharTy;
+  if (size == 16) return fast? fastType : C.ShortTy;
+  if (size == 32) return fast? fastType : C.IntTy;
+  if (size == 64) return C.LongLongTy;
+  return ArgType::Invalid();
+}
+
+ArgType clang::analyze_format_string::wToArgTypeUnsigned(
+    int size, bool fast, ASTContext &C) {
+  ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
+  if (size == 8) return C.UnsignedCharTy;
+  if (size == 16) return fast? fastType : C.UnsignedShortTy;
+  if (size == 32) return fast? fastType : C.UnsignedIntTy;
+  if (size == 64) return C.UnsignedLongLongTy;
+  return ArgType::Invalid();
+}
+
 //===----------------------------------------------------------------------===//
 // Methods on PrintfSpecifier.
 //===----------------------------------------------------------------------===//
@@ -542,12 +562,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
       case LengthModifier::AsWideFast:
         int s = getSize();
         bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-        ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.LongLongTy : Ctx.IntTy;
-        if (s == 8) return Ctx.CharTy;
-        if (s == 16) return fast? fastType : Ctx.ShortTy;
-        if (s == 32) return fast? fastType : Ctx.IntTy;
-        if (s == 64) return Ctx.LongLongTy;
-        return ArgType::Invalid();
+        return clang::analyze_format_string::wToArgType(s, fast, Ctx);
     }
 
   if (CS.isUIntArg())
@@ -586,12 +601,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
       case LengthModifier::AsWideFast:
         int s = getSize();
         bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-        ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.UnsignedLongLongTy : Ctx.UnsignedIntTy;
-        if (s == 8) return Ctx.UnsignedCharTy;
-        if (s == 16) return fast? fastType : Ctx.UnsignedShortTy;
-        if (s == 32) return fast? fastType : Ctx.UnsignedIntTy;
-        if (s == 64) return Ctx.UnsignedLongLongTy;
-        return ArgType::Invalid();
+        return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
     }
 
   if (CS.isDoubleArg()) {
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index 64c430e623b57..e640bc2de4d25 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -261,9 +261,13 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
         case LengthModifier::AsMAllocate:
         case LengthModifier::AsInt32:
         case LengthModifier::AsInt3264:
-        case LengthModifier::AsWide:
         case LengthModifier::AsShortLong:
           return ArgType::Invalid();
+        case LengthModifier::AsWide:
+        case LengthModifier::AsWideFast:
+          int s = getSize();
+          bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+          return clang::analyze_format_string::wToArgType(s, fast, Ctx);
       }
       llvm_unreachable("Unsupported LengthModifier Type");
 
@@ -303,9 +307,15 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
         case LengthModifier::AsMAllocate:
         case LengthModifier::AsInt32:
         case LengthModifier::AsInt3264:
-        case LengthModifier::AsWide:
         case LengthModifier::AsShortLong:
           return ArgType::Invalid();
+        case LengthModifier::AsWide:
+        case LengthModifier::AsWideFast:
+          int s = getSize();
+          bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+          if (CS.getKind() == ConversionSpecifier::uArg or CS.getKind() == ConversionSpecifier::UArg)
+            return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
+          return clang::analyze_format_string::wToArgType(s, fast, Ctx);
       }
       llvm_unreachable("Unsupported LengthModifier Type");
 
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index 59ae930bb734e..bca824533d128 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -87,30 +87,37 @@ void z_test(void *p) {
 }
 
 void w_int_test(void) {
-  int8_t a = 0b101;
-  int16_t b = 2;
-  uint32_t c = 123;
-  int64_t d = 0x3b;
+  int8_t a;
+  int16_t b;
+  uint32_t c;
+  int64_t d;
 
   // for %w
   printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
   printf("%w16i", b);
   printf("%w32u", c);
   printf("%w64x", d);
-
+  scanf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+  scanf("%w16i", b); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
+  scanf("%w32u", c);
+  scanf("%w64x", d);
 }
 
 void wf_test(void) {
-  int_fast8_t a = 0b101;
-  uint_fast16_t b = 2;
-  int_fast32_t c = 021;
-  int_fast64_t d = 0x3a;
+  int_fast8_t a;
+  uint_fast16_t b;
+  int_fast32_t c;
+  int_fast64_t d;
 
   // for %wf
   printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
   printf("%wf16u", b);
   printf("%wf32o", c);
   printf("%wf64X", d);
+  scanf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+  scanf("%wf16u", b);
+  scanf("%wf32o", c);
+  scanf("%wf64X", d);
 }
 
 #endif

>From 1431133f1e48bb58fa407e4a6d985fca24e9410b Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Tue, 14 Nov 2023 00:38:41 +0000
Subject: [PATCH 3/5] Update code and tests. %w and %wf are available in
 printf() and scanf()

---
 clang/include/clang/AST/FormatString.h | 14 ++++++-------
 clang/lib/AST/FormatString.cpp         |  2 +-
 clang/lib/AST/PrintfFormatString.cpp   | 28 +++++++++++++-------------
 clang/lib/AST/ScanfFormatString.cpp    |  4 ++--
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 2e48a8ddfde4d..dba973ffcaa4e 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -419,7 +419,7 @@ class FormatSpecifier {
   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
   bool UsesPositionalArg;
   unsigned argIndex;
-  unsigned size;
+  unsigned ExplicitlyFixedSize;
 public:
   FormatSpecifier(bool isPrintf)
     : CS(isPrintf), VectorNumElts(false),
@@ -463,12 +463,12 @@ class FormatSpecifier {
     FieldWidth = Amt;
   }
 
-  void setSize(unsigned s) {
-    size = s;
+  void setExplicitlyFixedSize(unsigned s) {
+    ExplicitlyFixedSize = s;
   }
 
-  unsigned getSize() const {
-    return size;
+  unsigned getExplicitlyFixedSize() const {
+    return ExplicitlyFixedSize;
   }
 
   bool usesPositionalArg() const { return UsesPositionalArg; }
@@ -791,8 +791,8 @@ bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
                                               const LangOptions &LO,
                                               const TargetInfo &Target);
 
-ArgType wToArgType(int size, bool fast, ASTContext &C);
-ArgType wToArgTypeUnsigned(int size, bool fast, ASTContext &C);
+ArgType wToArgType(int Size, bool Fast, ASTContext &C);
+ArgType wToArgTypeUnsigned(int Size, bool Fast, ASTContext &C);
 
 } // end analyze_format_string namespace
 } // end clang namespace
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index ebc136e780717..88d0f1c98ed0d 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -309,7 +309,7 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
         if (supported_list.count(s) == 0) {
           return false;
         }
-        FS.setSize(s);
+        FS.setExplicitlyFixedSize(s);
       }
 
       break;
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index e6e47403198af..ad92fb3aab97d 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -485,22 +485,22 @@ bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
 }
 
 ArgType clang::analyze_format_string::wToArgType(
-    int size, bool fast, ASTContext &C) {
-  ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
-  if (size == 8) return C.CharTy;
-  if (size == 16) return fast? fastType : C.ShortTy;
-  if (size == 32) return fast? fastType : C.IntTy;
-  if (size == 64) return C.LongLongTy;
+    int Size, bool Fast, ASTContext &C) {
+  ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
+  if (Size == 8) return C.CharTy;
+  if (Size == 16) return Fast? FastType : C.ShortTy;
+  if (Size == 32) return Fast? FastType : C.IntTy;
+  if (Size == 64) return C.LongLongTy;
   return ArgType::Invalid();
 }
 
 ArgType clang::analyze_format_string::wToArgTypeUnsigned(
-    int size, bool fast, ASTContext &C) {
-  ArgType fastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
-  if (size == 8) return C.UnsignedCharTy;
-  if (size == 16) return fast? fastType : C.UnsignedShortTy;
-  if (size == 32) return fast? fastType : C.UnsignedIntTy;
-  if (size == 64) return C.UnsignedLongLongTy;
+    int Size, bool Fast, ASTContext &C) {
+  ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
+  if (Size == 8) return C.UnsignedCharTy;
+  if (Size == 16) return Fast? FastType : C.UnsignedShortTy;
+  if (Size == 32) return Fast? FastType : C.UnsignedIntTy;
+  if (Size == 64) return C.UnsignedLongLongTy;
   return ArgType::Invalid();
 }
 
@@ -560,7 +560,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
         return ArgType::Invalid();
       case LengthModifier::AsWide:
       case LengthModifier::AsWideFast:
-        int s = getSize();
+        int s = getExplicitlyFixedSize();
         bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
         return clang::analyze_format_string::wToArgType(s, fast, Ctx);
     }
@@ -599,7 +599,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
         return ArgType::Invalid();
       case LengthModifier::AsWide:
       case LengthModifier::AsWideFast:
-        int s = getSize();
+        int s = getExplicitlyFixedSize();
         bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
         return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
     }
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index e640bc2de4d25..6f856f1b0bedb 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -265,7 +265,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
           return ArgType::Invalid();
         case LengthModifier::AsWide:
         case LengthModifier::AsWideFast:
-          int s = getSize();
+          int s = getExplicitlyFixedSize();
           bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
           return clang::analyze_format_string::wToArgType(s, fast, Ctx);
       }
@@ -311,7 +311,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
           return ArgType::Invalid();
         case LengthModifier::AsWide:
         case LengthModifier::AsWideFast:
-          int s = getSize();
+          int s = getExplicitlyFixedSize();
           bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
           if (CS.getKind() == ConversionSpecifier::uArg or CS.getKind() == ConversionSpecifier::UArg)
             return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);

>From b4ff31c11d64079456cda63bed1a734958754b10 Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Sat, 2 Dec 2023 01:35:09 +0000
Subject: [PATCH 4/5] 1. add one more warning about unsupported sizes 2.
 reformat

---
 clang/include/clang/AST/FormatString.h        | 16 +++++++++++---
 .../clang/Basic/DiagnosticSemaKinds.td        |  4 ++++
 clang/lib/AST/FormatString.cpp                | 13 ++++++-----
 clang/lib/AST/PrintfFormatString.cpp          |  6 ++---
 clang/lib/AST/ScanfFormatString.cpp           |  6 ++---
 clang/lib/Sema/SemaChecking.cpp               | 22 +++++++++++++++++++
 clang/test/Sema/format-strings-ms.c           |  8 +++++++
 7 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index dba973ffcaa4e..c3f761ed03d9f 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -420,10 +420,12 @@ class FormatSpecifier {
   bool UsesPositionalArg;
   unsigned argIndex;
   unsigned ExplicitlyFixedSize;
+  bool ExplicitlyFixedSizeValid;
+
 public:
   FormatSpecifier(bool isPrintf)
     : CS(isPrintf), VectorNumElts(false),
-      UsesPositionalArg(false), argIndex(0) {}
+      UsesPositionalArg(false), argIndex(0), ExplicitlyFixedSizeValid(true) {}
 
   void setLengthModifier(LengthModifier lm) {
     LM = lm;
@@ -463,8 +465,8 @@ class FormatSpecifier {
     FieldWidth = Amt;
   }
 
-  void setExplicitlyFixedSize(unsigned s) {
-    ExplicitlyFixedSize = s;
+  void setExplicitlyFixedSize(unsigned S) {
+    ExplicitlyFixedSize = S;
   }
 
   unsigned getExplicitlyFixedSize() const {
@@ -478,6 +480,14 @@ class FormatSpecifier {
 
   bool hasStandardLengthModifier() const;
 
+  void setExplicitlyFixedSizeValid(bool valid) {
+    ExplicitlyFixedSizeValid = valid;
+  }
+
+  bool isExplicitlyFixedSizeSupported() const {
+    return ExplicitlyFixedSizeValid;
+  }
+
   std::optional<LengthModifier> getCorrectedLengthModifier() const;
 
   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6dfb2d7195203..5d1dcb66d2990 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9707,6 +9707,10 @@ def warn_missing_format_string : Warning<
 def warn_scanf_nonzero_width : Warning<
   "zero field width in scanf format string is unused">,
   InGroup<Format>;
+def warn_format_conversion_size_unsupported: Warning<
+  "format specifies %select{an exact-|a fastest-}0 width integer type with "
+  "invalid bit-width %1">,
+  InGroup<Format>;
 def warn_format_conversion_argument_type_mismatch : Warning<
   "format specifies type %0 but the argument has "
   "%select{type|underlying type}2 %1">,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index 88d0f1c98ed0d..39359c2b18fb4 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -297,18 +297,19 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
 
       if (I == E) return false;
       int s = 0;
+      bool MSVCRT = true;
       while (unsigned(*I - '0') <= 9) {
+        MSVCRT = false;
         s = 10 * s + unsigned(*I - '0');
         ++I;
       }
 
-      // s == 0 is MSVCRT case, like l but only for c, C, s, S, or Z on windows
-      // s != 0 for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
-      if (s != 0) {
+      // MSVCRT == true is MSVCRT case, like l but only for c, C, s, S, or Z on windows
+      // MSVCRT == false for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+      if (!MSVCRT) {
         std::set<int> supported_list {8, 16, 32, 64};
-        if (supported_list.count(s) == 0) {
-          return false;
-        }
+        if (supported_list.count(s) == 0)
+          FS.setExplicitlyFixedSizeValid(false);
         FS.setExplicitlyFixedSize(s);
       }
 
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index ad92fb3aab97d..d4d9a0596620a 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -560,9 +560,9 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
         return ArgType::Invalid();
       case LengthModifier::AsWide:
       case LengthModifier::AsWideFast:
-        int s = getExplicitlyFixedSize();
-        bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-        return clang::analyze_format_string::wToArgType(s, fast, Ctx);
+        int S = getExplicitlyFixedSize();
+        bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
     }
 
   if (CS.isUIntArg())
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index 6f856f1b0bedb..de261a303f322 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -265,9 +265,9 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
           return ArgType::Invalid();
         case LengthModifier::AsWide:
         case LengthModifier::AsWideFast:
-          int s = getExplicitlyFixedSize();
-          bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-          return clang::analyze_format_string::wToArgType(s, fast, Ctx);
+          int S = getExplicitlyFixedSize();
+          bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+          return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
       }
       llvm_unreachable("Unsupported LengthModifier Type");
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 77c8334f3ca25..7e2f5dc6708c4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -11680,6 +11680,17 @@ bool CheckPrintfHandler::HandlePrintfSpecifier(
   if (!FS.hasStandardConversionSpecifier(S.getLangOpts()))
     HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
 
+  // Check the explicitly fixed size is supported
+  if (!FS.isExplicitlyFixedSizeSupported()){
+    EmitFormatDiagnostic(S.PDiag(
+                         diag::warn_format_conversion_size_unsupported) 
+                         << FS.getLengthModifier().toString()
+                         << FS.getExplicitlyFixedSize(),
+                         getLocationOfByte(startSpecifier),
+                         /*IsStringLocation*/true,
+                         getSpecifierRange(startSpecifier, specifierLen));
+  }
+
   // The remaining checks depend on the data arguments.
   if (ArgPassingKind == Sema::FAPK_VAList)
     return true;
@@ -12289,6 +12300,17 @@ bool CheckScanfHandler::HandleScanfSpecifier(
   else if (!FS.hasStandardLengthConversionCombination())
     HandleInvalidLengthModifier(FS, CS, startSpecifier, specifierLen,
                                 diag::warn_format_non_standard_conversion_spec);
+  
+  // Check the explicitly fixed size is supported
+  if (!FS.isExplicitlyFixedSizeSupported()){
+    EmitFormatDiagnostic(S.PDiag(
+                         diag::warn_format_conversion_size_unsupported) 
+                         << FS.getLengthModifier().toString()
+                         << FS.getExplicitlyFixedSize(),
+                         getLocationOfByte(startSpecifier),
+                         /*IsStringLocation*/true,
+                         getSpecifierRange(startSpecifier, specifierLen));
+  }
 
   if (!FS.hasStandardConversionSpecifier(S.getLangOpts()))
     HandleNonStandardConversionSpecifier(CS, startSpecifier, specifierLen);
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index bca824533d128..e6172ee436114 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -101,6 +101,10 @@ void w_int_test(void) {
   scanf("%w16i", b); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
   scanf("%w32u", c);
   scanf("%w64x", d);
+
+  // unsupported size
+  printf("%w92d", a); // expected-warning{{format specifies w width integer type with invalid bit-width 92}}
+  scanf("%w0i", b); // expected-warning{{format specifies w width integer type with invalid bit-width 0}}
 }
 
 void wf_test(void) {
@@ -118,6 +122,10 @@ void wf_test(void) {
   scanf("%wf16u", b);
   scanf("%wf32o", c);
   scanf("%wf64X", d);
+
+  // unsupported size
+  printf("%wf0d", a); // expected-warning{{format specifies wf width integer type with invalid bit-width 0}}
+  scanf("%wf35u", b); // expected-warning{{format specifies wf width integer type with invalid bit-width 35}}
 }
 
 #endif

>From a5b2d771fc51df5018765896207b76cfee88e39b Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Tue, 5 Dec 2023 07:48:50 +0000
Subject: [PATCH 5/5] Add getFastIntTypeByWidth() function

---
 clang/include/clang/AST/FormatString.h |  3 +-
 clang/include/clang/Basic/TargetInfo.h |  4 +++
 clang/lib/AST/PrintfFormatString.cpp   | 48 ++++++++++++++------------
 clang/lib/AST/ScanfFormatString.cpp    | 10 +++---
 clang/lib/Basic/TargetInfo.cpp         | 25 ++++++++++++++
 clang/test/Sema/format-strings-ms.c    |  8 ++---
 6 files changed, 65 insertions(+), 33 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index c3f761ed03d9f..7add829ee1cca 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -801,8 +801,7 @@ bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
                                               const LangOptions &LO,
                                               const TargetInfo &Target);
 
-ArgType wToArgType(int Size, bool Fast, ASTContext &C);
-ArgType wToArgTypeUnsigned(int Size, bool Fast, ASTContext &C);
+ArgType wToArgType(int Size, bool IsSigned, bool Fast, ASTContext &C);
 
 } // end analyze_format_string namespace
 } // end clang namespace
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 41f3c2e403cbe..c1e79b5d759ef 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -442,6 +442,10 @@ class TargetInfo : public TransferrableTargetInfo,
   virtual IntType getLeastIntTypeByWidth(unsigned BitWidth,
                                          bool IsSigned) const;
 
+  /// Return the fastest integer type with at least the specified width.
+  virtual IntType getFastIntTypeByWidth(unsigned BitWidth,
+                                        bool IsSigned, bool Fast) const;
+
   /// Return floating point type with specified width. On PPC, there are
   /// three possible types for 128-bit floating point: "PPC double-double",
   /// IEEE 754R quad precision, and "long double" (which under the covers
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index d4d9a0596620a..f41fba78b5d05 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -485,23 +485,27 @@ bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
 }
 
 ArgType clang::analyze_format_string::wToArgType(
-    int Size, bool Fast, ASTContext &C) {
-  ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.LongLongTy : C.IntTy;
-  if (Size == 8) return C.CharTy;
-  if (Size == 16) return Fast? FastType : C.ShortTy;
-  if (Size == 32) return Fast? FastType : C.IntTy;
-  if (Size == 64) return C.LongLongTy;
-  return ArgType::Invalid();
-}
-
-ArgType clang::analyze_format_string::wToArgTypeUnsigned(
-    int Size, bool Fast, ASTContext &C) {
-  ArgType FastType = C.getTargetInfo().getTriple().isArch64Bit() ? C.UnsignedLongLongTy : C.UnsignedIntTy;
-  if (Size == 8) return C.UnsignedCharTy;
-  if (Size == 16) return Fast? FastType : C.UnsignedShortTy;
-  if (Size == 32) return Fast? FastType : C.UnsignedIntTy;
-  if (Size == 64) return C.UnsignedLongLongTy;
-  return ArgType::Invalid();
+    int Size, bool IsSigned, bool Fast, ASTContext &C) {
+  switch (C.getTargetInfo().getFastIntTypeByWidth(Size, IsSigned, Fast)) {
+    case TargetInfo::SignedChar:
+      return C.SignedCharTy;
+    case TargetInfo::UnsignedChar:
+      return C.UnsignedCharTy;
+    case TargetInfo::SignedShort:
+      return C.ShortTy;
+    case TargetInfo::UnsignedShort:
+      return C.UnsignedShortTy;
+    case TargetInfo::SignedInt:
+      return C.IntTy;
+    case TargetInfo::UnsignedInt:
+      return C.UnsignedIntTy;
+    case TargetInfo::SignedLongLong:
+      return C.LongLongTy;
+    case TargetInfo::UnsignedLongLong:
+      return C.UnsignedLongLongTy;
+    default:
+      return ArgType::Invalid();
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -561,8 +565,8 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
       case LengthModifier::AsWide:
       case LengthModifier::AsWideFast:
         int S = getExplicitlyFixedSize();
-        bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-        return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
+        bool Fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        return clang::analyze_format_string::wToArgType(S, true, Fast, Ctx);
     }
 
   if (CS.isUIntArg())
@@ -599,9 +603,9 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
         return ArgType::Invalid();
       case LengthModifier::AsWide:
       case LengthModifier::AsWideFast:
-        int s = getExplicitlyFixedSize();
-        bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-        return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
+        int S = getExplicitlyFixedSize();
+        bool Fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        return clang::analyze_format_string::wToArgType(S, false, Fast, Ctx);
     }
 
   if (CS.isDoubleArg()) {
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index de261a303f322..52bb1de117fe1 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -267,7 +267,7 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
         case LengthModifier::AsWideFast:
           int S = getExplicitlyFixedSize();
           bool FAST = LM.getKind() == LengthModifier::AsWideFast ? true : false;
-          return clang::analyze_format_string::wToArgType(S, FAST, Ctx);
+          return clang::analyze_format_string::wToArgType(S, true, FAST, Ctx);
       }
       llvm_unreachable("Unsupported LengthModifier Type");
 
@@ -311,11 +311,11 @@ ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
           return ArgType::Invalid();
         case LengthModifier::AsWide:
         case LengthModifier::AsWideFast:
-          int s = getExplicitlyFixedSize();
-          bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+          int S = getExplicitlyFixedSize();
+          bool Fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
           if (CS.getKind() == ConversionSpecifier::uArg or CS.getKind() == ConversionSpecifier::UArg)
-            return clang::analyze_format_string::wToArgTypeUnsigned(s, fast, Ctx);
-          return clang::analyze_format_string::wToArgType(s, fast, Ctx);
+            return clang::analyze_format_string::wToArgType(S, false, Fast, Ctx);
+          return clang::analyze_format_string::wToArgType(S, true, Fast, Ctx);
       }
       llvm_unreachable("Unsupported LengthModifier Type");
 
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 6cd5d618a4aca..fbc842d492ec3 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -312,6 +312,31 @@ TargetInfo::IntType TargetInfo::getLeastIntTypeByWidth(unsigned BitWidth,
   return NoInt;
 }
 
+TargetInfo::IntType TargetInfo::getFastIntTypeByWidth(unsigned BitWidth,
+                                                      bool IsSigned, bool Fast)
+                                                      const {
+  IntType SignedFastType = getTriple().isArch64Bit() ? SignedLongLong : SignedInt;
+  IntType UnSignedFastType = getTriple().isArch64Bit() ?
+                             UnsignedLongLong : UnsignedInt;
+  if (getCharWidth() == BitWidth)
+    return IsSigned ? SignedChar : UnsignedChar;
+  if (getShortWidth() == BitWidth) {
+    if (Fast)
+      return IsSigned ? SignedFastType : UnSignedFastType;
+    else
+      return IsSigned ? SignedShort : UnsignedShort;
+  }
+  if (getIntWidth() == BitWidth) {
+    if (Fast)
+      return IsSigned ? SignedFastType : UnSignedFastType;
+    else
+      return IsSigned ? SignedInt : UnsignedInt;
+  }
+  if (getLongLongWidth() == BitWidth)
+    return IsSigned ? SignedLongLong : UnsignedLongLong;
+  return NoInt;
+}
+
 FloatModeKind TargetInfo::getRealTypeByWidth(unsigned BitWidth,
                                              FloatModeKind ExplicitType) const {
   if (getHalfWidth() == BitWidth)
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index e6172ee436114..89f267a0d4fcd 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -93,11 +93,11 @@ void w_int_test(void) {
   int64_t d;
 
   // for %w
-  printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
+  printf("%w8b", a);
   printf("%w16i", b);
   printf("%w32u", c);
   printf("%w64x", d);
-  scanf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+  scanf("%w8b", a); // expected-warning{{format specifies type 'signed char' but the argument has type 'int'}}
   scanf("%w16i", b); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
   scanf("%w32u", c);
   scanf("%w64x", d);
@@ -114,11 +114,11 @@ void wf_test(void) {
   int_fast64_t d;
 
   // for %wf
-  printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
+  printf("%wf8b", a);
   printf("%wf16u", b);
   printf("%wf32o", c);
   printf("%wf64X", d);
-  scanf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int'}}
+  scanf("%wf8b", a); // expected-warning{{format specifies type 'signed char' but the argument has type 'int'}}
   scanf("%wf16u", b);
   scanf("%wf32o", c);
   scanf("%wf64X", d);



More information about the llvm-commits mailing list