[clang] Fix clang to recognize new C23 modifiers %w and %wf when printing (PR #71771)

via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 8 21:18:25 PST 2023


https://github.com/ZijunZhaoCCK created https://github.com/llvm/llvm-project/pull/71771

None

>From 06c4cf02dfb4b20c8349c5f3c7209276f6d56edf Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao at google.com>
Date: Thu, 9 Nov 2023 02:21:46 +0000
Subject: [PATCH] Fix clang to recognize new C23 modifiers %w and %wf when
 printing

---
 clang/include/clang/AST/FormatString.h | 16 +++++++-
 clang/lib/AST/FormatString.cpp         | 52 +++++++++++++++++++++++++-
 clang/lib/AST/PrintfFormatString.cpp   | 19 ++++++++++
 clang/test/Sema/format-strings-ms.c    | 28 ++++++++++++++
 4 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5c4ad9baaef608c..6a886854650f1d9 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -81,8 +81,10 @@ class LengthModifier {
     AsLongDouble, // 'L'
     AsAllocate,   // for '%as', GNU extension to C90 scanf
     AsMAllocate,  // for '%ms', GNU extension to scanf
-    AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
-    AsWideChar = AsLong // for '%ls', only makes sense for printf
+    AsWide,       // 'w' (1. MSVCRT, like l but only for c, C, s, S, or Z on windows
+                  // 2. for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+    AsWideFast,   // 'wf' (for b, d, i, o, u, x, or X)
+    AsWideChar = AsLong, // for '%ls', only makes sense for printf
   };
 
   LengthModifier()
@@ -417,6 +419,7 @@ class FormatSpecifier {
   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
   bool UsesPositionalArg;
   unsigned argIndex;
+  unsigned size;
 public:
   FormatSpecifier(bool isPrintf)
     : CS(isPrintf), VectorNumElts(false),
@@ -460,6 +463,15 @@ class FormatSpecifier {
     FieldWidth = Amt;
   }
 
+  void setSize(unsigned s) {
+    size = s;
+  }
+
+  unsigned getSize() const {
+    return size;
+  }
+
+
   bool usesPositionalArg() const { return UsesPositionalArg; }
 
   bool hasValidLengthModifier(const TargetInfo &Target,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index e0c9e18cfe3a243..ebc136e780717e4 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -286,7 +286,33 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
       lmKind = LengthModifier::AsInt3264;
       break;
     case 'w':
-      lmKind = LengthModifier::AsWide; ++I; break;
+      ++I;
+      if (I == E) return false;
+      if (*I == 'f') {
+        lmKind = LengthModifier::AsWideFast;
+        ++I;
+      } else {
+        lmKind = LengthModifier::AsWide;
+      }
+
+      if (I == E) return false;
+      int s = 0;
+      while (unsigned(*I - '0') <= 9) {
+        s = 10 * s + unsigned(*I - '0');
+        ++I;
+      }
+
+      // s == 0 is MSVCRT case, like l but only for c, C, s, S, or Z on windows
+      // s != 0 for b, d, i, o, u, x, or X when a size followed(like 8, 16, 32 or 64)
+      if (s != 0) {
+        std::set<int> supported_list {8, 16, 32, 64};
+        if (supported_list.count(s) == 0) {
+          return false;
+        }
+        FS.setSize(s);
+      }
+
+      break;
   }
   LengthModifier lm(lmPosition, lmKind);
   FS.setLengthModifier(lm);
@@ -703,6 +729,8 @@ analyze_format_string::LengthModifier::toString() const {
     return "m";
   case AsWide:
     return "w";
+  case AsWideFast:
+    return "wf";
   case None:
     return "";
   }
@@ -970,6 +998,27 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target,
         case ConversionSpecifier::SArg:
         case ConversionSpecifier::ZArg:
           return Target.getTriple().isOSMSVCRT();
+        case ConversionSpecifier::bArg:
+        case ConversionSpecifier::dArg:
+        case ConversionSpecifier::iArg:
+        case ConversionSpecifier::oArg:
+        case ConversionSpecifier::uArg:
+        case ConversionSpecifier::xArg:
+        case ConversionSpecifier::XArg:
+          return true;
+        default:
+          return false;
+      }
+    case LengthModifier::AsWideFast:
+      switch (CS.getKind()) {
+        case ConversionSpecifier::bArg:
+        case ConversionSpecifier::dArg:
+        case ConversionSpecifier::iArg:
+        case ConversionSpecifier::oArg:
+        case ConversionSpecifier::uArg:
+        case ConversionSpecifier::xArg:
+        case ConversionSpecifier::XArg:
+          return true;
         default:
           return false;
       }
@@ -996,6 +1045,7 @@ bool FormatSpecifier::hasStandardLengthModifier() const {
     case LengthModifier::AsInt3264:
     case LengthModifier::AsInt64:
     case LengthModifier::AsWide:
+    case LengthModifier::AsWideFast:
     case LengthModifier::AsShortLong: // ???
       return false;
   }
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index f0b9d0ecaf23461..4b9111e8bcf509a 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -537,7 +537,16 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
             ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
       case LengthModifier::AsAllocate:
       case LengthModifier::AsMAllocate:
+        return ArgType::Invalid();
       case LengthModifier::AsWide:
+      case LengthModifier::AsWideFast:
+        int s = getSize();
+        bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.LongLongTy : Ctx.IntTy;
+        if (s == 8) return Ctx.CharTy;
+        if (s == 16) return fast? fastType : Ctx.ShortTy;
+        if (s == 32) return fast? fastType : Ctx.IntTy;
+        if (s == 64) return Ctx.LongLongTy;
         return ArgType::Invalid();
     }
 
@@ -572,7 +581,16 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
             ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
       case LengthModifier::AsAllocate:
       case LengthModifier::AsMAllocate:
+        return ArgType::Invalid();
       case LengthModifier::AsWide:
+      case LengthModifier::AsWideFast:
+        int s = getSize();
+        bool fast = LM.getKind() == LengthModifier::AsWideFast ? true : false;
+        ArgType fastType = Ctx.getTargetInfo().getTriple().isArch64Bit() ? Ctx.UnsignedLongLongTy : Ctx.UnsignedIntTy;
+        if (s == 8) return Ctx.UnsignedCharTy;
+        if (s == 16) return fast? fastType : Ctx.UnsignedShortTy;
+        if (s == 32) return fast? fastType : Ctx.UnsignedIntTy;
+        if (s == 64) return Ctx.UnsignedLongLongTy;
         return ArgType::Invalid();
     }
 
@@ -621,6 +639,7 @@ ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
       case LengthModifier::AsInt3264:
       case LengthModifier::AsInt64:
       case LengthModifier::AsWide:
+      case LengthModifier::AsWideFast:
         return ArgType::Invalid();
       case LengthModifier::AsShortLong:
         llvm_unreachable("only used for OpenCL which doesn not handle nArg");
diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c
index 697032673d4e770..59ae930bb734eb6 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -Wformat-non-iso -DNON_ISO_WARNING %s
 
+#include <stdint.h>
 int printf(const char *format, ...) __attribute__((format(printf, 1, 2)));
 int scanf(const char * restrict, ...) ;
 typedef unsigned short wchar_t;
@@ -85,4 +86,31 @@ void z_test(void *p) {
   scanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}}
 }
 
+void w_int_test(void) {
+  int8_t a = 0b101;
+  int16_t b = 2;
+  uint32_t c = 123;
+  int64_t d = 0x3b;
+
+  // for %w
+  printf("%w8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int8_t' (aka 'signed char')}}
+  printf("%w16i", b);
+  printf("%w32u", c);
+  printf("%w64x", d);
+
+}
+
+void wf_test(void) {
+  int_fast8_t a = 0b101;
+  uint_fast16_t b = 2;
+  int_fast32_t c = 021;
+  int_fast64_t d = 0x3a;
+
+  // for %wf
+  printf("%wf8b", a); // expected-warning{{format specifies type 'char' but the argument has type 'int_fast8_t' (aka 'signed char')}}
+  printf("%wf16u", b);
+  printf("%wf32o", c);
+  printf("%wf64X", d);
+}
+
 #endif



More information about the cfe-commits mailing list