[clang] [analyzer] Fix format attribute handling in GenericTaintChecker (PR #132765)
DonĂ¡t Nagy via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 26 02:47:27 PDT 2025
https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/132765
>From f0ac1f6c223b3bfce25ba0183ba1aa2825c455ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 24 Mar 2025 16:58:31 +0100
Subject: [PATCH 1/3] [NFC][analyzer] Add testcase to highlight GenericTaint
bug
Currently `optin.taint.GenericTaint` can produce false positives if a
[format attribute](https://clang.llvm.org/docs/AttributeReference.html#format)
is applied on a non-static method.
This commit adds a testcase that highlights this buggy behavior.
---
clang/test/Analysis/taint-generic.cpp | 43 +++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 8836e1d3d2d98..41fbe313d2b44 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -161,3 +161,46 @@ void top() {
clang_analyzer_isTainted(A.data); // expected-warning {{YES}}
}
} // namespace gh114270
+
+
+namespace format_attribute {
+__attribute__((__format__ (__printf__, 1, 2)))
+void log_nonmethod(const char *fmt, ...);
+
+void test_format_attribute_nonmethod() {
+ int n;
+ fscanf(stdin, "%d", &n); // Get a tainted value.
+
+ log_nonmethod("This number is suspicious: %d\n", n); // no-warning
+}
+
+struct Foo {
+ // When the format attribute is applied to a method, argumet '1' is the
+ // implicit `this`, so e.g. in this case argument '2' specifies `fmt`.
+ // Specifying '1' instead of '2' would produce a compilation error:
+ // "format attribute cannot specify the implicit this argument as the format string"
+ __attribute__((__format__ (__printf__, 2, 3)))
+ void log_method(const char *fmt, ...);
+
+ void test_format_attribute_method() {
+ int n;
+ fscanf(stdin, "%d", &n); // Get a tainted value.
+
+ // FIXME: The analyzer misinterprets the parameter indices in the format
+ // attribute when the format attribute is applied to a method.
+ log_method("This number is suspicious: %d\n", n);
+ // expected-warning at -1 {{Untrusted data is used as a format string}}
+ }
+
+ __attribute__((__format__ (__printf__, 1, 2)))
+ static void log_static_method(const char *fmt, ...);
+
+ void test_format_attribute_static_method() {
+ int n;
+ fscanf(stdin, "%d", &n); // Get a tainted value.
+
+ log_static_method("This number is suspicious: %d\n", n); // no-warning
+ }
+};
+
+} // namespace format_attribute
>From a97854213898d42c76bebffc8a093558cfc7e89e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 24 Mar 2025 17:44:10 +0100
Subject: [PATCH 2/3] [analyzer] Fix format attribute handling in
GenericTaintChecker
Previously the `optin.taint.GenericTaint` checker misinterpreted the
parameter indices in situations when the format attribute was applied to
a (non-static) method. This commit fixes this bug.
---
.../Checkers/GenericTaintChecker.cpp | 16 ++++++++++++++++
clang/test/Analysis/taint-generic.cpp | 5 ++---
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index b89a6e2588c98..1b61370a580d9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -1080,7 +1080,23 @@ static bool getPrintfFormatArgumentNum(const CallEvent &Call,
const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
+ // The format attribute uses 1-based parameter indexing, for example
+ // plain `printf(const char *fmt, ...)` would be annotated with
+ // `__format__(__printf__, 1, 2)`, so we need to subtract 1 to get a
+ // 0-based index. (This checker uses 0-based parameter indices.)
ArgNum = Format->getFormatIdx() - 1;
+ // The format attribute also counts the implicit `this` parameter of
+ // methods, so e.g. in `SomeClass::method(const char *fmt, ...)` could be
+ // annotated with `__format__(__printf__, 2, 3)`. This checker doesn't
+ // count the implicit `this` parameter, so in this case we need to subtract
+ // one again.
+ // FIXME: Apparently the implementation of the format attribute doesn't
+ // support methods with an explicit object parameter, so we cannot
+ // implement proper support for that rare case either.
+ const CXXMethodDecl *MDecl = dyn_cast<CXXMethodDecl>(FDecl);
+ if (MDecl && !MDecl->isStatic())
+ ArgNum--;
+
if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum)
return true;
}
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 41fbe313d2b44..3bf3ca3a75099 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -186,10 +186,9 @@ struct Foo {
int n;
fscanf(stdin, "%d", &n); // Get a tainted value.
- // FIXME: The analyzer misinterprets the parameter indices in the format
+ // The analyzer used to misinterpret the parameter indices in the format
// attribute when the format attribute is applied to a method.
- log_method("This number is suspicious: %d\n", n);
- // expected-warning at -1 {{Untrusted data is used as a format string}}
+ log_method("This number is suspicious: %d\n", n); // no-warning
}
__attribute__((__format__ (__printf__, 1, 2)))
>From d66b38269d368391b2bc66395a452c53bd832327 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 26 Mar 2025 10:47:18 +0100
Subject: [PATCH 3/3] Replace "nonmethod" with "freefunc"
---
clang/test/Analysis/taint-generic.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 3bf3ca3a75099..c080313e4d172 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -165,13 +165,13 @@ void top() {
namespace format_attribute {
__attribute__((__format__ (__printf__, 1, 2)))
-void log_nonmethod(const char *fmt, ...);
+void log_freefunc(const char *fmt, ...);
-void test_format_attribute_nonmethod() {
+void test_format_attribute_freefunc() {
int n;
fscanf(stdin, "%d", &n); // Get a tainted value.
- log_nonmethod("This number is suspicious: %d\n", n); // no-warning
+ log_freefunc("This number is suspicious: %d\n", n); // no-warning
}
struct Foo {
More information about the cfe-commits
mailing list