[clang] [clang] Catch missing format attributes (PR #105479)

via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 21 01:05:33 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Budimir Aranđelović (budimirarandjelovichtec)

<details>
<summary>Changes</summary>

Enable flag -Wmissing-format-attribute to catch missing attributes.

Fixes https://github.com/llvm/llvm-project/issues/60718

---

Patch is 40.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105479.diff


9 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+3) 
- (modified) clang/include/clang/Basic/DiagnosticGroups.td (-1) 
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+3) 
- (modified) clang/include/clang/Sema/Attr.h (+7) 
- (modified) clang/include/clang/Sema/Sema.h (+4) 
- (modified) clang/lib/Sema/SemaDecl.cpp (+2) 
- (modified) clang/lib/Sema/SemaDeclAttr.cpp (+214-2) 
- (added) clang/test/Sema/attr-format-missing.c (+393) 
- (added) clang/test/Sema/attr-format-missing.cpp (+174) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6adf57da42e656..f9fd4a20ea6bf6 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -707,6 +707,9 @@ Improvements to Clang's diagnostics
 
 - Clang now diagnoses integer constant expressions that are folded to a constant value as an extension in more circumstances. Fixes #GH59863
 
+- Clang now diagnoses missing format attributes for non-template functions and
+  class/struct/union members. Fixes #GH60718
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 2241f8481484e2..da6a3b2fe3571b 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -525,7 +525,6 @@ def MainReturnType : DiagGroup<"main-return-type">;
 def MaxUnsignedZero : DiagGroup<"max-unsigned-zero">;
 def MissingBraces : DiagGroup<"missing-braces">;
 def MissingDeclarations: DiagGroup<"missing-declarations">;
-def : DiagGroup<"missing-format-attribute">;
 def MissingIncludeDirs : DiagGroup<"missing-include-dirs">;
 def MissingNoreturn : DiagGroup<"missing-noreturn">;
 def MultiChar : DiagGroup<"multichar">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 940f9ac226ca87..75e51c5aa166d2 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1031,6 +1031,9 @@ def err_opencl_invalid_param : Error<
 def err_opencl_invalid_return : Error<
   "declaring function return value of type %0 is not allowed %select{; did you forget * ?|}1">;
 def warn_enum_value_overflow : Warning<"overflow in enumeration value">;
+def warn_missing_format_attribute : Warning<
+  "diagnostic behavior may be improved by adding the %0 format attribute to the declaration of %1">,
+  InGroup<DiagGroup<"missing-format-attribute">>, DefaultIgnore;
 def warn_pragma_options_align_reset_failed : Warning<
   "#pragma options align=reset failed: %0">,
   InGroup<IgnoredPragmas>;
diff --git a/clang/include/clang/Sema/Attr.h b/clang/include/clang/Sema/Attr.h
index 3f0b10212789a4..37c124ca7b454a 100644
--- a/clang/include/clang/Sema/Attr.h
+++ b/clang/include/clang/Sema/Attr.h
@@ -123,6 +123,13 @@ inline bool isInstanceMethod(const Decl *D) {
   return false;
 }
 
+inline bool checkIfMethodHasImplicitObjectParameter(const Decl *D) {
+  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(D))
+    return MethodDecl->isInstance() &&
+           !MethodDecl->hasCXXExplicitFunctionObjectParameter();
+  return false;
+}
+
 /// Diagnose mutually exclusive attributes when present on a given
 /// declaration. Returns true if diagnosed.
 template <typename AttrTy>
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 57994f4033922c..9022ac86edf817 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4602,6 +4602,10 @@ class Sema final : public SemaBase {
 
   enum class RetainOwnershipKind { NS, CF, OS };
 
+  void DiagnoseMissingFormatAttributes(Stmt *Body, const FunctionDecl *FDecl);
+  std::vector<FormatAttr *>
+  GetMissingFormatAttributes(Stmt *Body, const FunctionDecl *FDecl);
+
   UuidAttr *mergeUuidAttr(Decl *D, const AttributeCommonInfo &CI,
                           StringRef UuidAsWritten, MSGuidDecl *GuidDecl);
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 66eeaa8e6f7777..29c28d4b567513 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -15925,6 +15925,8 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
         }
       }
 
+      DiagnoseMissingFormatAttributes(Body, FD);
+
       // We might not have found a prototype because we didn't wish to warn on
       // the lack of a missing prototype. Try again without the checks for
       // whether we want to warn on the missing prototype.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index f2cd46d1e7c932..4634dd8b9cce80 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3508,7 +3508,7 @@ static void handleFormatAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
 
   // In C++ the implicit 'this' function parameter also counts, and they are
   // counted from one.
-  bool HasImplicitThisParam = isInstanceMethod(D);
+  bool HasImplicitThisParam = checkIfMethodHasImplicitObjectParameter(D);
   unsigned NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
 
   IdentifierInfo *II = AL.getArgAsIdent(0)->Ident;
@@ -3621,7 +3621,7 @@ static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  bool HasImplicitThisParam = isInstanceMethod(D);
+  bool HasImplicitThisParam = checkIfMethodHasImplicitObjectParameter(D);
   int32_t NumArgs = getFunctionOrMethodNumParams(D);
 
   FunctionDecl *FD = D->getAsFunction();
@@ -5320,6 +5320,218 @@ static void handlePreferredTypeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) PreferredTypeAttr(S.Context, AL, ParmTSI));
 }
 
+// This function is called only if function call is not inside template body.
+// TODO: Add call for function calls inside template body.
+// Emit warnings if parent function misses format attributes.
+void Sema::DiagnoseMissingFormatAttributes(Stmt *Body,
+                                           const FunctionDecl *FDecl) {
+  assert(FDecl);
+
+  // If there are no function body, exit.
+  if (!Body)
+    return;
+
+  // Get missing format attributes
+  std::vector<FormatAttr *> MissingFormatAttributes =
+      GetMissingFormatAttributes(Body, FDecl);
+  if (MissingFormatAttributes.empty())
+    return;
+
+  // Check if there are more than one format type found. In that case do not
+  // emit diagnostic.
+  const clang::IdentifierInfo *AttrType = MissingFormatAttributes[0]->getType();
+  if (llvm::any_of(MissingFormatAttributes, [&](const FormatAttr *Attr) {
+        return AttrType != Attr->getType();
+      }))
+    return;
+
+  for (const FormatAttr *FA : MissingFormatAttributes) {
+    // If format index and first-to-check argument index are negative, it means
+    // that this attribute is only saved for multiple format types checking.
+    if (FA->getFormatIdx() < 0 || FA->getFirstArg() < 0)
+      continue;
+
+    // Emit diagnostic
+    SourceLocation Loc = FDecl->getLocation();
+    Diag(Loc, diag::warn_missing_format_attribute)
+        << FA->getType() << FDecl
+        << FixItHint::CreateInsertion(Loc,
+                                      (llvm::Twine("__attribute__((format(") +
+                                       FA->getType()->getName() + ", " +
+                                       llvm::Twine(FA->getFormatIdx()) + ", " +
+                                       llvm::Twine(FA->getFirstArg()) + ")))")
+                                          .str());
+  }
+}
+
+// Returns vector of format attributes. There are no two attributes with same
+// arguments in returning vector. There can be attributes that effectivelly only
+// store information about format type.
+std::vector<FormatAttr *>
+Sema::GetMissingFormatAttributes(Stmt *Body, const FunctionDecl *FDecl) {
+  unsigned int FunctionFormatArgumentIndexOffset =
+      checkIfMethodHasImplicitObjectParameter(FDecl) ? 2 : 1;
+
+  std::vector<FormatAttr *> MissingAttributes;
+
+  // Iterate over body statements.
+  for (auto *Child : Body->children()) {
+    // If child statement is compound statement, recursively get missing
+    // attributes.
+    if (dyn_cast_or_null<CompoundStmt>(Child)) {
+      std::vector<FormatAttr *> CompoundStmtMissingAttributes =
+          GetMissingFormatAttributes(Child, FDecl);
+
+      // If there are already missing attributes with same arguments, do not add
+      // duplicates.
+      for (FormatAttr *FA : CompoundStmtMissingAttributes) {
+        if (!llvm::any_of(MissingAttributes, [&](const FormatAttr *Attr) {
+              return FA->getType() == Attr->getType() &&
+                     FA->getFormatIdx() == Attr->getFormatIdx() &&
+                     FA->getFirstArg() == Attr->getFirstArg();
+            }))
+          MissingAttributes.push_back(FA);
+      }
+
+      continue;
+    }
+
+    ValueStmt *VS = dyn_cast_or_null<ValueStmt>(Child);
+    if (!VS)
+      continue;
+    Expr *TheExpr = VS->getExprStmt();
+    if (!TheExpr)
+      continue;
+    CallExpr *TheCall = dyn_cast_or_null<CallExpr>(TheExpr);
+    if (!TheCall)
+      continue;
+    const FunctionDecl *ChildFunction =
+        dyn_cast_or_null<FunctionDecl>(TheCall->getCalleeDecl());
+    if (!ChildFunction)
+      continue;
+
+    Expr **Args = TheCall->getArgs();
+    unsigned int NumArgs = TheCall->getNumArgs();
+
+    // If child expression is function, check if it is format function.
+    // If it is, check if parent function misses format attributes.
+
+    // If child function is format function and format arguments are not
+    // relevant to emit diagnostic, save only information about format type
+    // (format index and first-to-check argument index are set to -1).
+    // Information about format type is later used to determine if there are
+    // more than one format type found.
+
+    unsigned int ChildFunctionFormatArgumentIndexOffset =
+        checkIfMethodHasImplicitObjectParameter(ChildFunction) ? 2 : 1;
+
+    // Check if function has format attribute with forwarded format string.
+    IdentifierInfo *AttrType;
+    const ParmVarDecl *FormatArg;
+    if (llvm::any_of(ChildFunction->specific_attrs<FormatAttr>(),
+                     [&](const FormatAttr *Attr) {
+                       AttrType = Attr->getType();
+
+                       int OffsetFormatIndex =
+                           Attr->getFormatIdx() -
+                           ChildFunctionFormatArgumentIndexOffset;
+                       if (OffsetFormatIndex < 0 ||
+                           (unsigned)OffsetFormatIndex >= NumArgs)
+                         return true;
+
+                       const auto *FormatArgExpr = dyn_cast<DeclRefExpr>(
+                           Args[OffsetFormatIndex]->IgnoreParenCasts());
+                       if (!FormatArgExpr)
+                         return true;
+
+                       FormatArg = dyn_cast_or_null<ParmVarDecl>(
+                           FormatArgExpr->getReferencedDeclOfCallee());
+                       if (!FormatArg)
+                         return true;
+
+                       return false;
+                     })) {
+      MissingAttributes.push_back(
+          FormatAttr::CreateImplicit(getASTContext(), AttrType, -1, -1));
+      continue;
+    }
+
+    // Do not add in a vector format attributes whose type is different than
+    // parent function attribute type.
+    if (llvm::any_of(FDecl->specific_attrs<FormatAttr>(),
+                     [&](const FormatAttr *FunctionAttr) {
+                       return AttrType != FunctionAttr->getType();
+                     }))
+      continue;
+
+    // Check if format string argument is parent function parameter.
+    int StringIndex = 0;
+    if (!llvm::any_of(FDecl->parameters(), [&](const ParmVarDecl *Param) {
+          if (Param != FormatArg)
+            return false;
+
+          StringIndex = Param->getFunctionScopeIndex() +
+                        FunctionFormatArgumentIndexOffset;
+
+          return true;
+        })) {
+      MissingAttributes.push_back(
+          FormatAttr::CreateImplicit(getASTContext(), AttrType, -1, -1));
+      continue;
+    }
+
+    unsigned NumOfParentFunctionParams = FDecl->getNumParams();
+
+    // Compare parent and calling function format attribute arguments (archetype
+    // and format string).
+    if (llvm::any_of(
+            FDecl->specific_attrs<FormatAttr>(), [&](const FormatAttr *Attr) {
+              if (Attr->getType() != AttrType)
+                return false;
+              int OffsetFormatIndex =
+                  Attr->getFormatIdx() - FunctionFormatArgumentIndexOffset;
+
+              if (OffsetFormatIndex < 0 ||
+                  (unsigned)OffsetFormatIndex >= NumOfParentFunctionParams)
+                return false;
+
+              if (FDecl->parameters()[OffsetFormatIndex] != FormatArg)
+                return false;
+
+              return true;
+            })) {
+      MissingAttributes.push_back(
+          FormatAttr::CreateImplicit(getASTContext(), AttrType, -1, -1));
+      continue;
+    }
+
+    // Get first argument index
+    int FirstToCheck = [&]() -> unsigned {
+      if (!FDecl->isVariadic())
+        return 0;
+      const auto *FirstToCheckArg = Args[NumArgs - 1]->IgnoreParenCasts();
+      if (!FirstToCheckArg ||
+          FirstToCheckArg->getType().getCanonicalType() !=
+              Context.getBuiltinVaListType().getCanonicalType())
+        return 0;
+      return NumOfParentFunctionParams + FunctionFormatArgumentIndexOffset;
+    }();
+
+    // If there are already attributes which arguments matches arguments
+    // detected in this iteration, do not add new attribute as it would be
+    // duplicate.
+    if (!llvm::any_of(MissingAttributes, [&](const FormatAttr *Attr) {
+          return Attr->getType() == AttrType &&
+                 Attr->getFormatIdx() == StringIndex &&
+                 Attr->getFirstArg() == FirstToCheck;
+        }))
+      MissingAttributes.push_back(FormatAttr::CreateImplicit(
+          getASTContext(), AttrType, StringIndex, FirstToCheck));
+  }
+
+  return MissingAttributes;
+}
+
 //===----------------------------------------------------------------------===//
 // Microsoft specific attribute handlers.
 //===----------------------------------------------------------------------===//
diff --git a/clang/test/Sema/attr-format-missing.c b/clang/test/Sema/attr-format-missing.c
new file mode 100644
index 00000000000000..e887e11d767040
--- /dev/null
+++ b/clang/test/Sema/attr-format-missing.c
@@ -0,0 +1,393 @@
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,c_diagnostics -Wmissing-format-attribute %s
+// RUN: %clang_cc1 -fsyntax-only -Wmissing-format-attribute -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s --check-prefixes=CHECK,C-CHECK
+// RUN: %clang_cc1 -fsyntax-only -x c++ -verify=expected,cpp_diagnostics -Wmissing-format-attribute %s
+// RUN: %clang_cc1 -fsyntax-only -x c++ -verify=expected,cpp_diagnostics -std=c++2b -Wmissing-format-attribute %s
+// RUN: %clang_cc1 -fsyntax-only -x c++ -verify=expected,cpp_diagnostics -std=c++23 -Wmissing-format-attribute %s
+// RUN: not %clang_cc1 -fsyntax-only -x c++ -Wmissing-format-attribute -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+#ifndef __cplusplus
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+typedef __WCHAR_TYPE__ wchar_t;
+#endif
+
+typedef __SIZE_TYPE__ size_t;
+typedef __builtin_va_list va_list;
+
+__attribute__((__format__(__printf__, 1, 2)))
+int printf(const char *, ...); // #printf
+
+__attribute__((__format__(__scanf__, 1, 2)))
+int scanf(const char *, ...); // #scanf
+
+__attribute__((__format__(__printf__, 1, 0)))
+int vprintf(const char *, va_list); // #vprintf
+
+__attribute__((__format__(__scanf__, 1, 0)))
+int vscanf(const char *, va_list); // #vscanf
+
+__attribute__((__format__(__printf__, 2, 0)))
+int vsprintf(char *, const char *, va_list); // #vsprintf
+
+__attribute__((__format__(__printf__, 3, 0)))
+int vsnprintf(char *ch, size_t, const char *, va_list); // #vsnprintf
+
+__attribute__((__format__(__scanf__, 1, 4)))
+void f1(char *out, const size_t len, const char *format, ... /* args */) // #f1
+{
+    va_list args;
+    vsnprintf(out, len, format, args); // expected-no-warning@#f1
+}
+
+__attribute__((__format__(__printf__, 1, 4)))
+void f2(char *out, const size_t len, const char *format, ... /* args */) // #f2
+{
+    va_list args;
+    vsnprintf(out, len, format, args); // expected-warning@#f2 {{diagnostic behavior may be improved by adding the 'printf' format attribute to the declaration of 'f2'}}
+                                       // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:6-[[@LINE-4]]:6}:"__attribute__((format(printf, 3, 4)))"
+}
+
+void f3(char *out, va_list args) // #f3
+{
+    vprintf(out, args); // expected-warning@#f3 {{diagnostic behavior may be improved by adding the 'printf' format attribute to the declaration of 'f3'}}
+                        // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:6-[[@LINE-3]]:6}:"__attribute__((format(printf, 1, 0)))"
+}
+
+void f4(char* out, ... /* args */) // #f4
+{
+    va_list args;
+    vprintf("test", args); // expected-no-warning@#f4
+
+    const char *ch;
+    vprintf(ch, args); // expected-no-warning@#f4
+}
+
+void f5(va_list args) // #f5
+{
+    char *ch;
+    vscanf(ch, args); // expected-no-warning@#f5
+}
+
+void f6(char *out, va_list args) // #f6
+{
+    char *ch;
+    vprintf(ch, args); // expected-no-warning@#f6
+    vprintf("test", args); // expected-no-warning@#f6
+    vprintf(out, args); // expected-warning@#f6 {{diagnostic behavior may be improved by adding the 'printf' format attribute to the declaration of 'f6'}}
+                        // CHECK: fix-it:"{{.*}}":{[[@LINE-6]]:6-[[@LINE-6]]:6}:"__attribute__((format(printf, 1, 0)))"
+}
+
+void f7(const char *out, ... /* args */) // #f7
+{
+    va_list args;
+
+    vscanf(out, args); // expected-warning@#f7 {{diagnostic behavior may be improved by adding the 'scanf' format attribute to the declaration of 'f7'}}
+                       // CHECK: fix-it:"{{.*}}":{[[@LINE-5]]:6-[[@LINE-5]]:6}:"__attribute__((format(scanf, 1, 2)))"
+}
+
+void f8(const char *out, ... /* args */) // #f8
+{
+    va_list args;
+
+    vscanf(out, args); // expected-no-warning@#f8
+    vprintf(out, args); // expected-no-warning@#f8
+}
+
+void f9(const char out[], ... /* args */) // #f9
+{
+    va_list args;
+    char *ch;
+    vprintf(ch, args); // expected-no-warning
+    vsprintf(ch, out, args); // expected-warning@#f9 {{diagnostic behavior may be improved by adding the 'printf' format attribute to the declaration of 'f9'}}
+                             // CHECK: fix-it:"{{.*}}":{[[@LINE-6]]:6-[[@LINE-6]]:6}:"__attribute__((format(printf, 1, 2)))"
+}
+
+void f10(const wchar_t *out, ... /* args */) // #f10
+{
+    va_list args;
+    vscanf(out, args);
+#if (defined(__aarch64__) && !defined(_WIN64)) || (defined(__arm__) && !defined(_WIN32))
+                        // c_diagnostics-warning at -2 {{incompatible pointer types passing 'const wchar_t *' (aka 'const unsigned int *') to parameter of type 'const char *'}}
+#elif __SIZEOF_WCHAR_T__ == 4
+                        // c_diagnostics-warning at -4 {{incompatible pointer types passing 'const wchar_t *' (aka 'const int *') to parameter of type 'const char *'}}
+#else
+                        // c_diagnostics-warning at -6 {{incompatible pointer types passing 'const wchar_t *' (aka 'const unsigned short *') to parameter of type 'const char *'}}
+#endif
+                        // c_diagnostics-note@#vscanf {{passing argument to parameter here}}
+                        // c_diagnostics-warning@#f10 {{diagnostic behavior may be improved by adding the 'scanf' format attribute to the declaration of 'f10'}}
+                        // C-CHECK: fix-it:"{{.*}}":{[[@LINE-13]]:6-[[@LINE-13]]:6}:"__attribute__((format(scanf, 1, 2)))"
+                        // cpp_diagnostics-error at -11 {{no matching function for call to 'vscanf'}}
+                        // cpp_diagnostics-note@#vscanf {{candidate function not viable: no known conversion from 'const wchar_t *' to 'const char *' for 1st argument}}
+}
+
+void f11(const wchar_t *out, ... /* args */) // #f11
+{
+    va_list args;
+    vscanf((const char *) out, args); // expected-warning@#f11 {{diagnostic behavior may be improved by adding the 'scanf' format attribute to the declaration of 'f11'}}
+                                      // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:6-[[@LINE-4]]:6}:"__attribute__((format(scanf, 1, 2)))"
+}
+
+void f12(const wchar_t *out, ... /* args */) // #f12
+{
+    va_list args;
+    vscanf((char *) out, args); // expected-warning@#f12 {{diagnostic behavior may be improved by adding the 'scanf' format attribute to the declaration of 'f12'}}
+                                // CHECK: fix-it:"{{.*}}":{[[@LINE-4]]:6-[[@LINE-4]]:6}:"__attribute__((format(sc...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/105479


More information about the cfe-commits mailing list