[clang] e3bd67e - [clang][Sema] check default argument promotions for printf

YingChi Long via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 31 19:11:05 PDT 2022


Author: YingChi Long
Date: 2022-09-01T10:10:10+08:00
New Revision: e3bd67eddf65b20956513e91715b1f997dae2690

URL: https://github.com/llvm/llvm-project/commit/e3bd67eddf65b20956513e91715b1f997dae2690
DIFF: https://github.com/llvm/llvm-project/commit/e3bd67eddf65b20956513e91715b1f997dae2690.diff

LOG: [clang][Sema] check default argument promotions for printf

The main focus of this patch is to make ArgType::matchesType check for
possible default parameter promotions when the argType is not a pointer.
If so, no warning will be given for `int`, `unsigned int` types as
corresponding arguments to %hhd and %hd. However, the usage of %hhd
corresponding to short is relatively rare, and it is more likely to be a
misuse. This patch keeps the original behavior of clang like this as
much as possible, while making it more convenient to consider the
default arguments promotion.

Fixes https://github.com/llvm/llvm-project/issues/57102

Reviewed By: aaron.ballman, nickdesaulniers, #clang-language-wg

Differential Revision: https://reviews.llvm.org/D132568

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/AST/FormatString.h
    clang/lib/AST/FormatString.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/Sema/format-strings-freebsd.c
    clang/test/Sema/format-strings-scanf.c
    clang/test/Sema/format-strings.c
    clang/www/c_status.html

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2251993a7c72b..bb7c508001db4 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -175,6 +175,10 @@ AIX Support
 C Language Changes in Clang
 ---------------------------
 
+- Adjusted ``-Wformat`` warnings according to `WG14 N2562 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf>`_.
+  Clang will now consider default argument promotions in printf, and remove unnecessary warnings.
+  Especially ``int`` argument with specifier ``%hhd`` and ``%hd``.
+
 C2x Feature Support
 -------------------
 

diff  --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 0f1f6b1340ff1..a2b8122890b92 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -261,8 +261,14 @@ class ArgType {
     /// instance, "%d" and float.
     NoMatch = 0,
     /// The conversion specifier and the argument type are compatible. For
-    /// instance, "%d" and _Bool.
+    /// instance, "%d" and int.
     Match = 1,
+    /// The conversion specifier and the argument type are compatible because of
+    /// default argument promotions. For instance, "%hhd" and int.
+    MatchPromotion,
+    /// The conversion specifier and the argument type are compatible but still
+    /// seems likely to be an error. For instanace, "%hhd" and short.
+    NoMatchPromotionTypeConfusion,
     /// The conversion specifier and the argument type are disallowed by the C
     /// standard, but are in practice harmless. For instance, "%p" and int*.
     NoMatchPedantic,

diff  --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index cf41228ba2f09..83846438a05f5 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -348,7 +348,7 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
       return Match;
 
     case AnyCharTy: {
-      if (const EnumType *ETy = argTy->getAs<EnumType>()) {
+      if (const auto *ETy = argTy->getAs<EnumType>()) {
         // If the enum is incomplete we know nothing about the underlying type.
         // Assume that it's 'int'.
         if (!ETy->getDecl()->isComplete())
@@ -356,17 +356,34 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
         argTy = ETy->getDecl()->getIntegerType();
       }
 
-      if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
+      if (const auto *BT = argTy->getAs<BuiltinType>()) {
+        // The types are perfectly matched?
         switch (BT->getKind()) {
+        default:
+          break;
+        case BuiltinType::Char_S:
+        case BuiltinType::SChar:
+        case BuiltinType::UChar:
+        case BuiltinType::Char_U:
+        case BuiltinType::Bool:
+          return Match;
+        }
+        // "Partially matched" because of promotions?
+        if (!Ptr) {
+          switch (BT->getKind()) {
           default:
             break;
-          case BuiltinType::Char_S:
-          case BuiltinType::SChar:
-          case BuiltinType::UChar:
-          case BuiltinType::Char_U:
-          case BuiltinType::Bool:
-            return Match;
+          case BuiltinType::Int:
+          case BuiltinType::UInt:
+            return MatchPromotion;
+          case BuiltinType::Short:
+          case BuiltinType::UShort:
+          case BuiltinType::WChar_S:
+          case BuiltinType::WChar_U:
+            return NoMatchPromotionTypeConfusion;
+          }
         }
+      }
       return NoMatch;
     }
 
@@ -383,8 +400,9 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
 
       if (T == argTy)
         return Match;
-      // Check for "compatible types".
-      if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
+      if (const auto *BT = argTy->getAs<BuiltinType>()) {
+        // Check if the only 
diff erence between them is signed vs unsigned
+        // if true, we consider they are compatible.
         switch (BT->getKind()) {
           default:
             break;
@@ -395,25 +413,66 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
           case BuiltinType::Bool:
             if (T == C.UnsignedShortTy || T == C.ShortTy)
               return NoMatchTypeConfusion;
-            return T == C.UnsignedCharTy || T == C.SignedCharTy ? Match
-                                                                : NoMatch;
+            if (T == C.UnsignedCharTy || T == C.SignedCharTy)
+              return Match;
+            break;
           case BuiltinType::Short:
-            return T == C.UnsignedShortTy ? Match : NoMatch;
+            if (T == C.UnsignedShortTy)
+              return Match;
+            break;
           case BuiltinType::UShort:
-            return T == C.ShortTy ? Match : NoMatch;
+            if (T == C.ShortTy)
+              return Match;
+            break;
           case BuiltinType::Int:
-            return T == C.UnsignedIntTy ? Match : NoMatch;
+            if (T == C.UnsignedIntTy)
+              return Match;
+            break;
           case BuiltinType::UInt:
-            return T == C.IntTy ? Match : NoMatch;
+            if (T == C.IntTy)
+              return Match;
+            break;
           case BuiltinType::Long:
-            return T == C.UnsignedLongTy ? Match : NoMatch;
+            if (T == C.UnsignedLongTy)
+              return Match;
+            break;
           case BuiltinType::ULong:
-            return T == C.LongTy ? Match : NoMatch;
+            if (T == C.LongTy)
+              return Match;
+            break;
           case BuiltinType::LongLong:
-            return T == C.UnsignedLongLongTy ? Match : NoMatch;
+            if (T == C.UnsignedLongLongTy)
+              return Match;
+            break;
           case BuiltinType::ULongLong:
-            return T == C.LongLongTy ? Match : NoMatch;
-        }
+            if (T == C.LongLongTy)
+              return Match;
+            break;
+          }
+          // "Partially matched" because of promotions?
+          if (!Ptr) {
+            switch (BT->getKind()) {
+            default:
+              break;
+            case BuiltinType::Int:
+            case BuiltinType::UInt:
+              if (T == C.SignedCharTy || T == C.UnsignedCharTy ||
+                  T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy ||
+                  T == C.WideCharTy)
+                return MatchPromotion;
+              break;
+            case BuiltinType::Short:
+            case BuiltinType::UShort:
+              if (T == C.SignedCharTy || T == C.UnsignedCharTy)
+                return NoMatchPromotionTypeConfusion;
+              break;
+            case BuiltinType::WChar_U:
+            case BuiltinType::WChar_S:
+              if (T != C.WCharTy && T != C.WideCharTy)
+                return NoMatchPromotionTypeConfusion;
+            }
+          }
+      }
       return NoMatch;
     }
 

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 8073223116c29..03b0c72fac9b6 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -10081,10 +10081,14 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
     return true;
   }
 
-  analyze_printf::ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy);
-  if (Match == analyze_printf::ArgType::Match)
+  ArgType::MatchKind ImplicitMatch = ArgType::NoMatch;
+  ArgType::MatchKind Match = AT.matchesType(S.Context, ExprTy);
+  if (Match == ArgType::Match)
     return true;
 
+  // NoMatchPromotionTypeConfusion should be only returned in ImplictCastExpr
+  assert(Match != ArgType::NoMatchPromotionTypeConfusion);
+
   // Look through argument promotions for our error message's reported type.
   // This includes the integral and floating promotions, but excludes array
   // and function pointer decay (seeing that an argument intended to be a
@@ -10101,13 +10105,9 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
       if (ICE->getType() == S.Context.IntTy ||
           ICE->getType() == S.Context.UnsignedIntTy) {
         // All further checking is done on the subexpression
-        const analyze_printf::ArgType::MatchKind ImplicitMatch =
-            AT.matchesType(S.Context, ExprTy);
-        if (ImplicitMatch == analyze_printf::ArgType::Match)
+        ImplicitMatch = AT.matchesType(S.Context, ExprTy);
+        if (ImplicitMatch == ArgType::Match)
           return true;
-        if (ImplicitMatch == ArgType::NoMatchPedantic ||
-            ImplicitMatch == ArgType::NoMatchTypeConfusion)
-          Match = ImplicitMatch;
       }
     }
   } else if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E)) {
@@ -10118,10 +10118,29 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
     // modifier is provided.
     if (ExprTy == S.Context.IntTy &&
         FS.getLengthModifier().getKind() != LengthModifier::AsChar)
-      if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue()))
+      if (llvm::isUIntN(S.Context.getCharWidth(), CL->getValue())) {
         ExprTy = S.Context.CharTy;
+        // To improve check results, we consider a character literal in C
+        // to be a 'char' rather than an 'int'. 'printf("%hd", 'a');' is
+        // more likely a type confusion situation, so we will suggest to
+        // use '%hhd' instead by discarding the MatchPromotion.
+        if (Match == ArgType::MatchPromotion)
+          Match = ArgType::NoMatch;
+      }
   }
-
+  if (Match == ArgType::MatchPromotion) {
+    // WG14 N2562 only clarified promotions in *printf
+    // For NSLog in ObjC, just preserve -Wformat behavior
+    if (!S.getLangOpts().ObjC &&
+        ImplicitMatch != ArgType::NoMatchPromotionTypeConfusion &&
+        ImplicitMatch != ArgType::NoMatchTypeConfusion)
+      return true;
+    Match = ArgType::NoMatch;
+  }
+  if (ImplicitMatch == ArgType::NoMatchPedantic ||
+      ImplicitMatch == ArgType::NoMatchTypeConfusion)
+    Match = ImplicitMatch;
+  assert(Match != ArgType::MatchPromotion);
   // Look through enums to their underlying type.
   bool IsEnum = false;
   if (auto EnumTy = ExprTy->getAs<EnumType>()) {
@@ -10194,7 +10213,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
     if (IntendedTy == ExprTy && !ShouldNotPrintDirectly) {
       unsigned Diag;
       switch (Match) {
-      case ArgType::Match: llvm_unreachable("expected non-matching");
+      case ArgType::Match:
+      case ArgType::MatchPromotion:
+      case ArgType::NoMatchPromotionTypeConfusion:
+        llvm_unreachable("expected non-matching");
       case ArgType::NoMatchPedantic:
         Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic;
         break;
@@ -10291,7 +10313,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
     case Sema::VAK_ValidInCXX11: {
       unsigned Diag;
       switch (Match) {
-      case ArgType::Match: llvm_unreachable("expected non-matching");
+      case ArgType::Match:
+      case ArgType::MatchPromotion:
+      case ArgType::NoMatchPromotionTypeConfusion:
+        llvm_unreachable("expected non-matching");
       case ArgType::NoMatchPedantic:
         Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic;
         break;

diff  --git a/clang/test/Sema/format-strings-freebsd.c b/clang/test/Sema/format-strings-freebsd.c
index 64b526eb2f076..3d3ed6b40f48d 100644
--- a/clang/test/Sema/format-strings-freebsd.c
+++ b/clang/test/Sema/format-strings-freebsd.c
@@ -35,9 +35,9 @@ void check_freebsd_kernel_extensions(int i, long l, char *s, short h)
   freebsd_kernel_printf("%lr", l); // no-warning
 
   // h modifier expects a short
-  freebsd_kernel_printf("%hr", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
+  freebsd_kernel_printf("%hr", i); // no-warning
   freebsd_kernel_printf("%hr", h); // no-warning
-  freebsd_kernel_printf("%hy", i); // expected-warning{{format specifies type 'short' but the argument has type 'int'}}
+  freebsd_kernel_printf("%hy", i); // no-warning
   freebsd_kernel_printf("%hy", h); // no-warning
 
   // %y expects an int

diff  --git a/clang/test/Sema/format-strings-scanf.c b/clang/test/Sema/format-strings-scanf.c
index d6e403148a5c4..054439ecaf5a7 100644
--- a/clang/test/Sema/format-strings-scanf.c
+++ b/clang/test/Sema/format-strings-scanf.c
@@ -246,3 +246,55 @@ void check_conditional_literal(char *s, int *i) {
   scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}}
   scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}}
 }
+
+void test_promotion(void) {
+  // No promotions for *scanf pointers clarified in N2562
+  // https://github.com/llvm/llvm-project/issues/57102
+  // N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf
+  int i;
+  signed char sc;
+  unsigned char uc;
+  short ss;
+  unsigned short us;
+
+  // pointers could not be "promoted"
+  scanf("%hhd", &i); // expected-warning{{format specifies type 'char *' but the argument has type 'int *'}}
+  scanf("%hd", &i); // expected-warning{{format specifies type 'short *' but the argument has type 'int *'}}
+  scanf("%d", &i); // no-warning
+  // char & uchar
+  scanf("%hhd", &sc); // no-warning
+  scanf("%hhd", &uc); // no-warning
+  scanf("%hd", &sc); // expected-warning{{format specifies type 'short *' but the argument has type 'signed char *'}}
+  scanf("%hd", &uc); // expected-warning{{format specifies type 'short *' but the argument has type 'unsigned char *'}}
+  scanf("%d", &sc); // expected-warning{{format specifies type 'int *' but the argument has type 'signed char *'}}
+  scanf("%d", &uc); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned char *'}}
+  // short & ushort
+  scanf("%hhd", &ss); // expected-warning{{format specifies type 'char *' but the argument has type 'short *'}}
+  scanf("%hhd", &us); // expected-warning{{format specifies type 'char *' but the argument has type 'unsigned short *'}}
+  scanf("%hd", &ss); // no-warning
+  scanf("%hd", &us); // no-warning
+  scanf("%d", &ss); // expected-warning{{format specifies type 'int *' but the argument has type 'short *'}}
+  scanf("%d", &us); // expected-warning{{format specifies type 'int *' but the argument has type 'unsigned short *'}}
+
+  // long types
+  scanf("%ld", &i); // expected-warning{{format specifies type 'long *' but the argument has type 'int *'}}
+  scanf("%lld", &i); // expected-warning{{format specifies type 'long long *' but the argument has type 'int *'}}
+  scanf("%ld", &sc); // expected-warning{{format specifies type 'long *' but the argument has type 'signed char *'}}
+  scanf("%lld", &sc); // expected-warning{{format specifies type 'long long *' but the argument has type 'signed char *'}}
+  scanf("%ld", &uc); // expected-warning{{format specifies type 'long *' but the argument has type 'unsigned char *'}}
+  scanf("%lld", &uc); // expected-warning{{format specifies type 'long long *' but the argument has type 'unsigned char *'}}
+  scanf("%llx", &i); // expected-warning{{format specifies type 'unsigned long long *' but the argument has type 'int *'}}
+
+  // ill-formed floats
+  scanf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}}
+  &sc);
+
+  // pointers in scanf
+  scanf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
+
+  // FIXME: does this match what the C committee allows or should it be pedantically warned on?
+  char c;
+  void *vp;
+  scanf("%hhd", &c); // Pedantic warning?
+  scanf("%hhd", vp); // expected-warning{{format specifies type 'char *' but the argument has type 'void *'}}
+}

diff  --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index 919bd74de392f..36dd88cea946a 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -830,3 +830,57 @@ void test_block(void) {
   printf_arg2("foo", "%s string %i\n", "aaa", 123);
   printf_arg2("%s string\n", "foo", "bar"); // expected-warning{{data argument not used by format string}}
 }
+
+void test_promotion(void) {
+  // Default argument promotions for *printf in N2562
+  // https://github.com/llvm/llvm-project/issues/57102
+  // N2562: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf
+  int i;
+  signed char sc;
+  unsigned char uc;
+  char c;
+  short ss;
+  unsigned short us;
+
+  printf("%hhd %hd %d %hhd %hd %d", i, i, i, sc, sc, sc); // no-warning
+  printf("%hhd %hd %d %hhd %hd %d", uc, uc, uc, c, c, c); // no-warning
+
+  // %ld %lld %llx
+  printf("%ld", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
+  printf("%lld", i); // expected-warning{{format specifies type 'long long' but the argument has type 'int'}}
+  printf("%ld", sc); // expected-warning{{format specifies type 'long' but the argument has type 'signed char'}}
+  printf("%lld", sc); // expected-warning{{format specifies type 'long long' but the argument has type 'signed char'}}
+  printf("%ld", uc); // expected-warning{{format specifies type 'long' but the argument has type 'unsigned char'}}
+  printf("%lld", uc); // expected-warning{{format specifies type 'long long' but the argument has type 'unsigned char'}}
+  printf("%llx", i); // expected-warning{{format specifies type 'unsigned long long' but the argument has type 'int'}}
+
+  // ill formed spec for floats
+  printf("%hf", // expected-warning{{length modifier 'h' results in undefined behavior or no effect with 'f' conversion specifier}}
+  sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}}
+
+  // for %hhd and `short` they are compatible by promotions but more likely misuse
+  printf("%hd", ss); // no-warning
+  printf("%hhd", ss); // expected-warning{{format specifies type 'char' but the argument has type 'short'}}
+  printf("%hu", us); // no-warning
+  printf("%hhu", ss); // expected-warning{{format specifies type 'unsigned char' but the argument has type 'short'}}
+
+  // floats & integers are not compatible
+  printf("%f", i); // expected-warning{{format specifies type 'double' but the argument has type 'int'}}
+  printf("%f", sc); // expected-warning{{format specifies type 'double' but the argument has type 'signed char'}}
+  printf("%f", uc); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned char'}}
+  printf("%f", c); // expected-warning{{format specifies type 'double' but the argument has type 'char'}}
+  printf("%f", ss); // expected-warning{{format specifies type 'double' but the argument has type 'short'}}
+  printf("%f", us); // expected-warning{{format specifies type 'double' but the argument has type 'unsigned short'}}
+
+  // character literals
+  // In C language engineering practice, printing a character literal with %hhd or %d is common, but %hd may be misuse.
+  printf("%hhu", 'a'); // no-warning
+  printf("%hhd", 'a'); // no-warning
+  printf("%hd", 'a'); // expected-warning{{format specifies type 'short' but the argument has type 'char'}}
+  printf("%hu", 'a'); // expected-warning{{format specifies type 'unsigned short' but the argument has type 'char'}}
+  printf("%d", 'a'); // no-warning
+  printf("%u", 'a'); // no-warning
+  
+  // pointers
+  printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
+}

diff  --git a/clang/www/c_status.html b/clang/www/c_status.html
index 5eefdda4cc45b..7d97a6bc1f13f 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -819,13 +819,7 @@ <h2 id="c2x">C2x implementation status</h2>
     <tr>
       <td>Unclear type relationship between a format specifier and its argument</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2562.pdf">N2562</a></td>
-      <td class="partial" align="center">
-        <details><summary>Partial</summary>
-          Clang supports diagnostics checking format specifier validity, but
-          does not yet account for all of the changes in this paper, especially
-          regarding length modifiers like <code>h</code> and <code>hh</code>.
-        </details>
-      </td>
+      <td class="unreleased" align="center">Clang 16</td>
     </tr>
     <!-- Apr 2021 Papers -->
     <tr>


        


More information about the cfe-commits mailing list