[clang] [llvm] [Sema] Implement support for -Wformat-signedness (PR #74440)

Karl-Johan Karlsson via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 02:18:19 PST 2023


https://github.com/karka228 updated https://github.com/llvm/llvm-project/pull/74440

>From a80bf9d03f19d48c0aca4af7758dc49516da8825 Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Tue, 5 Dec 2023 10:03:00 +0100
Subject: [PATCH 1/2] [Sema] Implement support for -Wformat-signedness

In gcc there exist a modifier option -Wformat-signedness that turns on
additional signedness warnings in the already existing -Wformat warning.

This patch implements that support in clang.
---
 clang/include/clang/AST/FormatString.h        |   2 +
 .../include/clang/Basic/DiagnosticOptions.def |   1 +
 clang/include/clang/Driver/Options.td         |   3 +
 clang/lib/AST/FormatString.cpp                |  29 +++--
 clang/lib/Driver/ToolChains/Clang.cpp         |   2 +
 clang/lib/Sema/SemaChecking.cpp               |  26 ++++-
 format-strings-signedness.c                   | 107 ++++++++++++++++++
 7 files changed, 158 insertions(+), 12 deletions(-)
 create mode 100644 format-strings-signedness.c

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5c4ad9baaef60..c267a32be4d6f 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -264,6 +264,8 @@ class ArgType {
     /// The conversion specifier and the argument type are compatible. For
     /// instance, "%d" and int.
     Match = 1,
+    /// The conversion specifier and the argument type have different sign
+    MatchSignedness,
     /// The conversion specifier and the argument type are compatible because of
     /// default argument promotions. For instance, "%hhd" and int.
     MatchPromotion,
diff --git a/clang/include/clang/Basic/DiagnosticOptions.def b/clang/include/clang/Basic/DiagnosticOptions.def
index 6d0c1b14acc12..a9562e7d8dcb0 100644
--- a/clang/include/clang/Basic/DiagnosticOptions.def
+++ b/clang/include/clang/Basic/DiagnosticOptions.def
@@ -44,6 +44,7 @@ DIAGOPT(Name, Bits, Default)
 #endif
 
 SEMANTIC_DIAGOPT(IgnoreWarnings, 1, 0)   /// -w
+DIAGOPT(FormatSignedness, 1, 0) /// -Wformat-signedness
 DIAGOPT(NoRewriteMacros, 1, 0)  /// -Wno-rewrite-macros
 DIAGOPT(Pedantic, 1, 0)         /// -pedantic
 DIAGOPT(PedanticErrors, 1, 0)   /// -pedantic-errors
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1d04e4f6e7e6d..04fdf9a7eb92d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -918,6 +918,9 @@ def Wdeprecated : Flag<["-"], "Wdeprecated">, Group<W_Group>,
   HelpText<"Enable warnings for deprecated constructs and define __DEPRECATED">;
 def Wno_deprecated : Flag<["-"], "Wno-deprecated">, Group<W_Group>,
   Visibility<[ClangOption, CC1Option]>;
+def Wformat_signedness : Flag<["-"], "Wformat-signedness">,
+  Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>,
+  MarshallingInfoFlag<DiagnosticOpts<"FormatSignedness">>;
 def Wl_COMMA : CommaJoined<["-"], "Wl,">, Visibility<[ClangOption, FlangOption]>,
   Flags<[LinkerInput, RenderAsInput]>,
   HelpText<"Pass the comma separated arguments in <arg> to the linker">,
diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index e0c9e18cfe3a2..670cde017d3ac 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -409,7 +409,7 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
         return Match;
       if (const auto *BT = argTy->getAs<BuiltinType>()) {
         // Check if the only difference between them is signed vs unsigned
-        // if true, we consider they are compatible.
+        // if true, return match signedness.
         switch (BT->getKind()) {
           default:
             break;
@@ -419,44 +419,53 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
             [[fallthrough]];
           case BuiltinType::Char_S:
           case BuiltinType::SChar:
+            if (T == C.UnsignedShortTy || T == C.ShortTy)
+              return NoMatchTypeConfusion;
+            if (T == C.UnsignedCharTy)
+              return MatchSignedness;
+            if (T == C.SignedCharTy)
+              return Match;
+            break;
           case BuiltinType::Char_U:
           case BuiltinType::UChar:
             if (T == C.UnsignedShortTy || T == C.ShortTy)
               return NoMatchTypeConfusion;
-            if (T == C.UnsignedCharTy || T == C.SignedCharTy)
+            if (T == C.UnsignedCharTy)
               return Match;
+            if (T == C.SignedCharTy)
+              return MatchSignedness;
             break;
           case BuiltinType::Short:
             if (T == C.UnsignedShortTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::UShort:
             if (T == C.ShortTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::Int:
             if (T == C.UnsignedIntTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::UInt:
             if (T == C.IntTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::Long:
             if (T == C.UnsignedLongTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::ULong:
             if (T == C.LongTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::LongLong:
             if (T == C.UnsignedLongLongTy)
-              return Match;
+              return MatchSignedness;
             break;
           case BuiltinType::ULongLong:
             if (T == C.LongLongTy)
-              return Match;
+              return MatchSignedness;
             break;
           }
           // "Partially matched" because of promotions?
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index f02f7c841b91f..6a6a40293d7d3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6054,6 +6054,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fdeprecated-macro");
   }
 
+  Args.AddLastArg(CmdArgs, options::OPT_Wformat_signedness);
+
   // Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'.
   if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) {
     if (Asm->getOption().matches(options::OPT_fasm))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 07ced5ffc3407..ea8d5a5d6b913 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -11815,6 +11815,18 @@ isArithmeticArgumentPromotion(Sema &S, const ImplicitCastExpr *ICE) {
          S.Context.getFloatingTypeOrder(From, To) < 0;
 }
 
+static clang::analyze_format_string::ArgType::MatchKind
+handleFormatSignedness(clang::analyze_format_string::ArgType::MatchKind Match,
+                       DiagnosticOptions &DiagOpts) {
+  if (Match == clang::analyze_format_string::ArgType::MatchSignedness) {
+    if (DiagOpts.FormatSignedness)
+      Match = clang::analyze_format_string::ArgType::NoMatch;
+    else
+      Match = clang::analyze_format_string::ArgType::Match;
+  }
+  return Match;
+}
+
 bool
 CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
                                     const char *StartSpecifier,
@@ -11858,6 +11870,8 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
 
   ArgType::MatchKind ImplicitMatch = ArgType::NoMatch;
   ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy);
+  Match =
+      handleFormatSignedness(Match, S.getDiagnostics().getDiagnosticOptions());
   if (Match == ArgType::Match)
     return true;
 
@@ -11881,6 +11895,8 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
           ICE->getType() == S.Context.UnsignedIntTy) {
         // All further checking is done on the subexpression
         ImplicitMatch = AT.matchesType(S.Context, ExprTy);
+        ImplicitMatch = handleFormatSignedness(
+            ImplicitMatch, S.getDiagnostics().getDiagnosticOptions());
         if (ImplicitMatch == ArgType::Match)
           return true;
       }
@@ -12001,6 +12017,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
       switch (Match) {
       case ArgType::Match:
       case ArgType::MatchPromotion:
+      case ArgType::MatchSignedness:
       case ArgType::NoMatchPromotionTypeConfusion:
         llvm_unreachable("expected non-matching");
       case ArgType::NoMatchPedantic:
@@ -12037,8 +12054,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
       CastFix << (S.LangOpts.CPlusPlus ? ">" : ")");
 
       SmallVector<FixItHint,4> Hints;
-      if (AT.matchesType(S.Context, IntendedTy) != ArgType::Match ||
-          ShouldNotPrintDirectly)
+      ArgType::MatchKind IntendedMatch = AT.matchesType(S.Context, IntendedTy);
+      IntendedMatch = handleFormatSignedness(
+          IntendedMatch, S.getDiagnostics().getDiagnosticOptions());
+      if ((IntendedMatch != ArgType::Match) || ShouldNotPrintDirectly)
         Hints.push_back(FixItHint::CreateReplacement(SpecRange, os.str()));
 
       if (const CStyleCastExpr *CCast = dyn_cast<CStyleCastExpr>(E)) {
@@ -12106,6 +12125,7 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
       switch (Match) {
       case ArgType::Match:
       case ArgType::MatchPromotion:
+      case ArgType::MatchSignedness:
       case ArgType::NoMatchPromotionTypeConfusion:
         llvm_unreachable("expected non-matching");
       case ArgType::NoMatchPedantic:
@@ -12318,6 +12338,8 @@ bool CheckScanfHandler::HandleScanfSpecifier(
 
   analyze_format_string::ArgType::MatchKind Match =
       AT.matchesType(S.Context, Ex->getType());
+  Match =
+      handleFormatSignedness(Match, S.getDiagnostics().getDiagnosticOptions());
   bool Pedantic = Match == analyze_format_string::ArgType::NoMatchPedantic;
   if (Match == analyze_format_string::ArgType::Match)
     return true;
diff --git a/format-strings-signedness.c b/format-strings-signedness.c
new file mode 100644
index 0000000000000..02964def10eef
--- /dev/null
+++ b/format-strings-signedness.c
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -std=c11 -fsyntax-only -verify -Wformat -Wformat-signedness %s
+
+int printf(const char *restrict format, ...);
+int scanf(const char * restrict, ...);
+
+void test_printf_bool(_Bool x)
+{
+    printf("%d", x); // no-warning
+    printf("%u", x); // expected-warning{{format specifies type 'unsigned int' but the argument has type '_Bool'}}
+    printf("%x", x); // expected-warning{{format specifies type 'unsigned int' but the argument has type '_Bool'}}
+}
+
+void test_printf_char(char x)
+{
+    printf("%c", x); // no-warning
+}
+
+void test_printf_unsigned_char(unsigned char x)
+{
+    printf("%c", x); // no-warning
+}
+
+void test_printf_int(int x)
+{
+    printf("%d", x); // no-warning
+    printf("%u", x); // expected-warning{{format specifies type 'unsigned int' but the argument has type 'int'}}
+    printf("%x", x); // expected-warning{{format specifies type 'unsigned int' but the argument has type 'int'}}
+}
+
+void test_printf_unsigned(unsigned x)
+{
+    printf("%d", x); // expected-warning{{format specifies type 'int' but the argument has type 'unsigned int'}}
+    printf("%u", x); // no-warning
+    printf("%x", x); // no-warning
+}
+
+void test_printf_long(long x)
+{
+    printf("%ld", x); // no-warning
+    printf("%lu", x); // expected-warning{{format specifies type 'unsigned long' but the argument has type 'long'}}
+    printf("%lx", x); // expected-warning{{format specifies type 'unsigned long' but the argument has type 'long'}}
+}
+
+void test_printf_unsigned_long(unsigned long x)
+{
+    printf("%ld", x); // expected-warning{{format specifies type 'long' but the argument has type 'unsigned long'}}
+    printf("%lu", x); // no-warning
+    printf("%lx", x); // no-warning
+}
+
+void test_printf_long_long(long long x)
+{
+    printf("%lld", x); // no-warning
+    printf("%llu", x); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'long long'}}
+    printf("%llx", x); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'long long'}}
+}
+
+void test_printf_unsigned_long_long(unsigned long long x)
+{
+    printf("%lld", x); // expected-warning{{format specifies type 'long long' but the argument has type 'unsigned long long'}}
+    printf("%llu", x); // no-warning
+    printf("%llx", x); // no-warning
+}
+
+void test_scanf_char(char *y) {
+  scanf("%c", y); // no-warning
+}
+
+void test_scanf_unsigned_char(unsigned char *y) {
+  scanf("%c", y); // no-warning
+}
+
+void test_scanf_int(int *x) {
+  scanf("%d", x); // no-warning
+  scanf("%u", x); // expected-warning{{format specifies type 'unsigned int *' but the argument has type 'int *'}}
+  scanf("%x", x); // expected-warning{{format specifies type 'unsigned int *' but the argument has type 'int *'}}
+}
+
+void test_scanf_unsigned(unsigned *x) {
+  scanf("%d", x); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned int *'}}
+  scanf("%u", x); // no-warning
+  scanf("%x", x); // no-warning
+}
+
+void test_scanf_long(long *x) {
+  scanf("%ld", x); // no-warning
+  scanf("%lu", x); // expected-warning{{format specifies type 'unsigned long *' but the argument has type 'long *'}}
+  scanf("%lx", x); // expected-warning{{format specifies type 'unsigned long *' but the argument has type 'long *'}}
+}
+
+void test_scanf_unsigned_long(unsigned long *x) {
+  scanf("%ld", x); // expected-warning{{format specifies type 'long *' but the argument has type 'unsigned long *'}}
+  scanf("%lu", x); // no-warning
+  scanf("%lx", x); // no-warning
+}
+
+void test_scanf_longlong(long long *x) {
+  scanf("%lld", x); // no-warning
+  scanf("%llu", x); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'long long *'}}
+  scanf("%llx", x); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'long long *'}}
+}
+
+void test_scanf_unsigned_longlong(unsigned long long *x) {
+  scanf("%lld", x); // expected-warning{{format specifies type 'long long *' but the argument has type 'unsigned long long *'}}
+  scanf("%llu", x); // no-warning
+  scanf("%llx", x); // no-warning
+}

>From 1f5fe21d5851e268dfedf909f5ec2832b741a514 Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Tue, 5 Dec 2023 11:17:08 +0100
Subject: [PATCH 2/2] [Sema] Implement support for -Wformat-signedness

Move the testcase to its correct dir.
---
 .../test/Sema/format-strings-signedness.c                         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename format-strings-signedness.c => clang/test/Sema/format-strings-signedness.c (100%)

diff --git a/format-strings-signedness.c b/clang/test/Sema/format-strings-signedness.c
similarity index 100%
rename from format-strings-signedness.c
rename to clang/test/Sema/format-strings-signedness.c



More information about the llvm-commits mailing list