[clang] 04e6178 - [Sema] tolerate more promotion matches in format string checking

Fri Aug 25 10:14:24 PDT 2023

Author: Félix Cloutier
Date: 2023-08-25T10:14:01-07:00
New Revision: 04e6178ae932c9a1d939dcfe3ef1189f4bbb21aa

URL: https://github.com/llvm/llvm-project/commit/04e6178ae932c9a1d939dcfe3ef1189f4bbb21aa
DIFF: https://github.com/llvm/llvm-project/commit/04e6178ae932c9a1d939dcfe3ef1189f4bbb21aa.diff

LOG: [Sema] tolerate more promotion matches in format string checking

It's been reported that when using __attribute__((format)) on non-variadic
functions, certain values that normally get promoted when passed as variadic
arguments now unconditionally emit a diagnostic:

```c
void foo(const char *fmt, float f) __attribute__((format(printf, 1, 2)));
void bar(void) {
	foo("%g", 123.f);
	//   ^ format specifies type 'double' but the argument has type 'float'
}
```

This is normally not an issue because float values get promoted to doubles when
passed as variadic arguments, but needless to say, variadic argument promotion
does not apply to non-variadic arguments.

While this can be fixed by adjusting the prototype of `foo`, this is sometimes
undesirable in C (for instance, if `foo` is ABI). In C++, using variadic
templates, this might instead require call-site fixing, which is tedious and
arguably needless work:

```c++
template<typename... Args>
void foo(const char *fmt, Args &&...args) __attribute__((format(printf, 1, 2)));
void bar(void) {
	foo("%g", 123.f);
	//   ^ format specifies type 'double' but the argument has type 'float'
}
```

To address this issue, we teach FormatString about a few promotions that have
always been around but that have never been exercised in the direction that
FormatString checks for:

* `char`, `unsigned char` -> `int`, `unsigned`
* `half`, `float16`, `float` -> `double`

This addresses issue https://github.com/llvm/llvm-project/issues/59824.

Added: 
    clang/test/SemaCXX/format-strings-scanf.cpp

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/lib/AST/FormatString.cpp
    clang/test/Sema/attr-format.c
    clang/test/SemaCXX/attr-format.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f0e601fc2df88b..8580b2ccb20c24 100644

--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -128,6 +128,13 @@ Removed Compiler Flags
 Attribute Changes in Clang
 --------------------------
 
+- When a non-variadic function is decorated with the ``format`` attribute,
+  Clang now checks that the format string would match the function's parameters'
+  types after default argument promotion. As a result, it's no longer an
+  automatic diagnostic to use parameters of types that the format style
+  supports but that are never the result of default argument promotion, such as
+  ``float``. (`#59824: <https://github.com/llvm/llvm-project/issues/59824>`_)
+
 Improvements to Clang's diagnostics
 -----------------------------------
 - Clang constexpr evaluator now prints template arguments when displaying

diff  --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index ad5af9508983fe..f86278e4b5163d 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -458,6 +458,10 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
             switch (BT->getKind()) {
             default:
               break;
+            case BuiltinType::Bool:
+              if (T == C.IntTy || T == C.UnsignedIntTy)
+                return MatchPromotion;
+              break;
             case BuiltinType::Int:
             case BuiltinType::UInt:
               if (T == C.SignedCharTy || T == C.UnsignedCharTy ||
@@ -465,6 +469,24 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const {
                   T == C.WideCharTy)
                 return MatchPromotion;
               break;
+            case BuiltinType::Char_U:
+              if (T == C.UnsignedIntTy)
+                return MatchPromotion;
+              if (T == C.UnsignedShortTy)
+                return NoMatchPromotionTypeConfusion;
+              break;
+            case BuiltinType::Char_S:
+              if (T == C.IntTy)
+                return MatchPromotion;
+              if (T == C.ShortTy)
+                return NoMatchPromotionTypeConfusion;
+              break;
+            case BuiltinType::Half:
+            case BuiltinType::Float16:
+            case BuiltinType::Float:
+              if (T == C.DoubleTy)
+                return MatchPromotion;
+              break;
             case BuiltinType::Short:
             case BuiltinType::UShort:
               if (T == C.SignedCharTy || T == C.UnsignedCharTy)

diff  --git a/clang/test/Sema/attr-format.c b/clang/test/Sema/attr-format.c
index d891828a77a4c6..9cc6b5482144a7 100644
--- a/clang/test/Sema/attr-format.c
+++ b/clang/test/Sema/attr-format.c
@@ -94,13 +94,9 @@ void call_nonvariadic(void) {
   d3("%s", 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
 }
 
-__attribute__((format(printf, 1, 2))) void forward_fixed(const char *fmt, int i) { // expected-warning{{GCC requires a function with the 'format' attribute to be variadic}}
-  forward_fixed(fmt, i);
-  a(fmt, i);
-}
-
-__attribute__((format(printf, 1, 2))) void forward_fixed_2(const char *fmt, int i, int j) { // expected-warning{{GCC requires a function with the 'format' attribute to be variadic}}
-  forward_fixed_2(fmt, i, j);
-  a(fmt, i);
+__attribute__((format(printf, 1, 2)))
+void forward_fixed(const char *fmt, _Bool b, char i, short j, int k, float l, double m) { // expected-warning{{GCC requires a function with the 'format' attribute to be variadic}}
+  forward_fixed(fmt, b, i, j, k, l, m);
+  a(fmt, b, i, j, k, l, m);
 }
 

diff  --git a/clang/test/SemaCXX/attr-format.cpp b/clang/test/SemaCXX/attr-format.cpp
index db9e92be6ce959..adc05fc46776ca 100644
--- a/clang/test/SemaCXX/attr-format.cpp
+++ b/clang/test/SemaCXX/attr-format.cpp
@@ -72,12 +72,20 @@ void do_format() {
   int x = 123;
   int &y = x;
   const char *s = "world";
+  bool b = false;
   format("bare string");
   format("%s", 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
   format("%s %s %u %d %i %p\n", "hello", s, 10u, x, y, &do_format);
   format("%s %s %u %d %i %p\n", "hello", s, 10u, x, y, do_format);
   format("bad format %s"); // expected-warning{{more '%' conversions than data arguments}}
 
+  format("%c %c %hhd %hd %d\n", (char)'a', 'a', 'a', (short)123, (int)123);
+  format("%f %f %f\n", (__fp16)123.f, 123.f, 123.);
+  format("%Lf", (__fp16)123.f); // expected-warning{{format specifies type 'long double' but the argument has type '__fp16'}}
+  format("%Lf", 123.f); // expected-warning{{format specifies type 'long double' but the argument has type 'float'}}
+  format("%hhi %hhu %hi %hu %i %u", b, b, b, b, b, b);
+  format("%li", b); // expected-warning{{format specifies type 'long' but the argument has type 'bool'}}
+
   struct foo f;
   format_invalid_nonpod("hello %i", f); // expected-warning{{format specifies type 'int' but the argument has type 'struct foo'}}
 

diff  --git a/clang/test/SemaCXX/format-strings-scanf.cpp b/clang/test/SemaCXX/format-strings-scanf.cpp
new file mode 100644
index 00000000000000..f78d334d1685ed
--- /dev/null
+++ b/clang/test/SemaCXX/format-strings-scanf.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -fblocks -std=c++11 %s
+
+__attribute__((format(scanf, 1, 2)))
+int scanf(const char *, ...);
+
+template<typename... Args>
+__attribute__((format(scanf, 1, 2)))
+int scan(const char *fmt, Args &&...args) { // expected-warning{{GCC requires a function with the 'format' attribute to be variadic}}
+    return scanf(fmt, args...);
+}
+
+union bag {
+    bool b;
+    unsigned char uc;
+    signed char sc;
+    unsigned short us;
+    signed short ss;
+    unsigned int ui;
+    signed int si;
+    unsigned long ul;
+    signed long sl;
+    unsigned long long ull;
+    signed long long sll;
+    __fp16 f16;
+    float ff;
+    double fd;
+    long double fl;
+};
+
+void test(void) {
+    bag b;
+    scan("%hhi %hhu %hhi %hhu", &b.sc, &b.uc, &b.b, &b.b);
+    scan("%hi %hu", &b.ss, &b.us);
+    scan("%i %u", &b.si, &b.ui);
+    scan("%li %lu", &b.sl, &b.ul);
+    scan("%lli %llu", &b.sll, &b.ull);
+    scan("%f", &b.ff);
+    scan("%lf", &b.fd);
+    scan("%Lf", &b.fl);
+
+    // expected-warning at +4{{format specifies type 'short *' but the argument has type 'signed char *'}}
+    // expected-warning at +3{{format specifies type 'unsigned short *' but the argument has type 'unsigned char *'}}
+    // expected-warning at +2{{format specifies type 'short *' but the argument has type 'bool *'}}
+    // expected-warning at +1{{format specifies type 'unsigned short *' but the argument has type 'bool *'}}
+    scan("%hi %hu %hi %hu", &b.sc, &b.uc, &b.b, &b.b);
+
+    // expected-warning at +3{{format specifies type 'long *' but the argument has type 'short *'}}
+    // expected-warning at +2{{format specifies type 'char *' but the argument has type 'short *'}}
+    // expected-warning at +1{{format specifies type 'int *' but the argument has type 'short *'}}
+    scan("%hhi %i %li", &b.ss, &b.ss, &b.ss);
+
+    // expected-warning at +3{{format specifies type 'float *' but the argument has type '__fp16 *'}}
+    // expected-warning at +2{{format specifies type 'float *' but the argument has type 'double *'}}
+    // expected-warning at +1{{format specifies type 'float *' but the argument has type 'long double *'}}
+    scan("%f %f %f", &b.f16, &b.fd, &b.fl);
+
+    // expected-warning at +3{{format specifies type 'double *' but the argument has type '__fp16 *'}}
+    // expected-warning at +2{{format specifies type 'double *' but the argument has type 'float *'}}
+    // expected-warning at +1{{format specifies type 'double *' but the argument has type 'long double *'}}
+    scan("%lf %lf %lf", &b.f16, &b.ff, &b.fl);
+
+    // expected-warning at +3{{format specifies type 'long double *' but the argument has type '__fp16 *'}}
+    // expected-warning at +2{{format specifies type 'long double *' but the argument has type 'float *'}}
+    // expected-warning at +1{{format specifies type 'long double *' but the argument has type 'double *'}}
+    scan("%Lf %Lf %Lf", &b.f16, &b.ff, &b.fd);
+}