[clang] [analyzer] Limit `isTainted()` by skipping complicated symbols (PR #105493)
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 21 05:08:13 PDT 2024
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/105493
>From f9f481a55d174746205f33472e49410b7f57bc5a Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Wed, 21 Aug 2024 11:51:30 +0200
Subject: [PATCH 1/4] [analyzer] Limit `isTainted()` by skipping complicated
symbols
As discussed in
https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570/10
Some `isTainted()` queries can blow up the analysis times, and
effectively halt the analysis under specific workloads.
We don't really have the time now to do a caching re-implementation of
`isTainted()`, so we need to workaround the case.
The workaround with the smallest blast radius was to limit what symbols
`isTainted()` does the query (by walking the SymExpr).
So far, the threshold 10 worked for us, but this value can be overridden
using the "max-tainted-symbol-complexity" config value.
This new option is "deprecated" from the getgo, as I expect this issue
to be fixed within the next few months and I don't want users to
override this value anyways. If they do, this message will let them know
that they are on their own, and the next release may break them (as we
no longer recognize this option if we drop it).
---
.../StaticAnalyzer/Core/AnalyzerOptions.def | 5 ++
clang/lib/StaticAnalyzer/Checkers/Taint.cpp | 7 +++
clang/test/Analysis/analyzer-config.c | 1 +
clang/test/Analysis/taint-generic.c | 49 ++++++++++++++++++-
4 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 29aa6a3b8a16e7..98c659192b55d5 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -407,6 +407,11 @@ ANALYZER_OPTION(
ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity",
"The maximum complexity of symbolic constraint.", 35)
+// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
+// Ideally, we should get rid of this option soon.
+ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity",
+ "[DEPRECATED] The maximum complexity of symbolic to carry taint", 10)
+
ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large",
"The maximum times a large function could be inlined.", 32)
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 6362c82b009d72..5408db85bbb0bb 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -12,6 +12,7 @@
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include <optional>
@@ -256,6 +257,12 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
if (!Sym)
return TaintedSymbols;
+ // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
+ if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
+ Sym->computeComplexity() >= Opts.MaxTaintedSymbolComplexity) {
+ return {};
+ }
+
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymbolRef SubSym : Sym->symbols()) {
if (!isa<SymbolData>(SubSym))
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index b8dbcdd7e55afe..13cb50451f2618 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -94,6 +94,7 @@
// CHECK-NEXT: max-inlinable-size = 100
// CHECK-NEXT: max-nodes = 225000
// CHECK-NEXT: max-symbol-complexity = 35
+// CHECK-NEXT: max-tainted-symbol-complexity = 10
// CHECK-NEXT: max-times-inline-large = 32
// CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
// CHECK-NEXT: mode = deep
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index b0df85f237298d..174ecaabfbab44 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -63,6 +63,7 @@ void clang_analyzer_isTainted_char(char);
void clang_analyzer_isTainted_wchar(wchar_t);
void clang_analyzer_isTainted_charp(char*);
void clang_analyzer_isTainted_int(int);
+void clang_analyzer_dump_int(int);
int coin();
@@ -459,7 +460,53 @@ unsigned radar11369570_hanging(const unsigned char *arr, int l) {
longcmp(a, t, c);
l -= 12;
}
- return 5/a; // expected-warning {{Division by a tainted value, possibly zero}}
+ return 5/a; // FIXME: Should be a "div by tainted" warning here.
+}
+
+// This computation used to take a very long time.
+void complex_taint_queries(const int *p) {
+ int tainted = 0;
+ scanf("%d", &tainted);
+
+ // Make "tmp" tainted.
+ int tmp = tainted + tainted;
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
+
+ // Make "tmp" SymExpr a lot more complicated by applying computation.
+ // This should balloon the symbol complexity.
+ tmp += p[0] + p[0];
+ tmp += p[1] + p[1];
+ tmp += p[2] + p[2];
+ clang_analyzer_dump_int(tmp); // expected-warning{{((((conj_}} symbol complexity: 8
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
+
+ tmp += p[3] + p[3];
+ clang_analyzer_dump_int(tmp); // expected-warning{{(((((conj_}} symbol complexity: 10
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already to ocomplex to be traversed
+
+ tmp += p[4] + p[4];
+ tmp += p[5] + p[5];
+ tmp += p[6] + p[6];
+ tmp += p[7] + p[7];
+ tmp += p[8] + p[8];
+ tmp += p[9] + p[9];
+ tmp += p[10] + p[10];
+ tmp += p[11] + p[11];
+ tmp += p[12] + p[12];
+ tmp += p[13] + p[13];
+ tmp += p[14] + p[14];
+ tmp += p[15] + p[15];
+
+ // The SymExpr still holds the full history of the computation, yet, "isTainted" doesn't traverse the tree as the complexity is over the threshold.
+ clang_analyzer_dump_int(tmp);
+ // expected-warning at -1{{(((((((((((((((((conj_}} symbol complexity: 34
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} FIXME: Ideally, this should still result in "tainted".
+
+ // By making it even one step more complex, then it would hit the "max-symbol-complexity"
+ // threshold and the engine would cut the SymExpr and replace it by a new conjured symbol.
+ tmp += p[16];
+ clang_analyzer_dump_int(tmp); // expected-warning{{conj_}} symbol complexity: 1
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}}
}
// Check that we do not assert of the following code.
>From 02c8f06898b938e3e64732679a95de0944980ad6 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Wed, 21 Aug 2024 14:04:33 +0200
Subject: [PATCH 2/4] Fix typo in comment
---
clang/test/Analysis/taint-generic.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index 174ecaabfbab44..1c139312734bca 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -482,7 +482,7 @@ void complex_taint_queries(const int *p) {
tmp += p[3] + p[3];
clang_analyzer_dump_int(tmp); // expected-warning{{(((((conj_}} symbol complexity: 10
- clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already to ocomplex to be traversed
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already too complex to be traversed
tmp += p[4] + p[4];
tmp += p[5] + p[5];
>From f6227e837e3bc484b2da1dc29d5ea6b849d5c446 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Wed, 21 Aug 2024 14:05:35 +0200
Subject: [PATCH 3/4] Fix grammar in AnalyzerOptions.def
---
clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 98c659192b55d5..83aeaea012b212 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -410,7 +410,7 @@ ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity",
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
// Ideally, we should get rid of this option soon.
ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity",
- "[DEPRECATED] The maximum complexity of symbolic to carry taint", 10)
+ "[DEPRECATED] The maximum complexity of a symbol to carry taint", 10)
ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large",
"The maximum times a large function could be inlined.", 32)
>From 195b885fc513ac8e00ecee21f63f15b57b37d33e Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Wed, 21 Aug 2024 14:06:31 +0200
Subject: [PATCH 4/4] Adjust the threshold to 9, and use `>` when checking
Fixes
https://github.com/llvm/llvm-project/pull/105493#discussion_r1724914010
---
clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def | 2 +-
clang/lib/StaticAnalyzer/Checkers/Taint.cpp | 2 +-
clang/test/Analysis/analyzer-config.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 83aeaea012b212..737bc8e86cfb6a 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -410,7 +410,7 @@ ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity",
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
// Ideally, we should get rid of this option soon.
ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity",
- "[DEPRECATED] The maximum complexity of a symbol to carry taint", 10)
+ "[DEPRECATED] The maximum complexity of a symbol to carry taint", 9)
ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large",
"The maximum times a large function could be inlined.", 32)
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 5408db85bbb0bb..0bb5739db4b756 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -259,7 +259,7 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
- Sym->computeComplexity() >= Opts.MaxTaintedSymbolComplexity) {
+ Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
return {};
}
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 13cb50451f2618..8eb869bac46f8f 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -94,7 +94,7 @@
// CHECK-NEXT: max-inlinable-size = 100
// CHECK-NEXT: max-nodes = 225000
// CHECK-NEXT: max-symbol-complexity = 35
-// CHECK-NEXT: max-tainted-symbol-complexity = 10
+// CHECK-NEXT: max-tainted-symbol-complexity = 9
// CHECK-NEXT: max-times-inline-large = 32
// CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
// CHECK-NEXT: mode = deep
More information about the cfe-commits
mailing list