[llvm-branch-commits] [clang] Backport taint analysis slowdown regression fix (PR #105516)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 21 05:52:12 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-static-analyzer-1
Author: Balazs Benics (steakhal)
<details>
<summary>Changes</summary>
Same as the cherry-picked commit + the release notes.
---
Full diff: https://github.com/llvm/llvm-project/pull/105516.diff
5 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+5)
- (modified) clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def (+5)
- (modified) clang/lib/StaticAnalyzer/Checkers/Taint.cpp (+7)
- (modified) clang/test/Analysis/analyzer-config.c (+1)
- (modified) clang/test/Analysis/taint-generic.c (+48-1)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 17ddbfe910f878..fa69fcb9aa29bf 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1391,6 +1391,11 @@ Crash and bug fixes
- Z3 crosschecking (aka. Z3 refutation) is now bounded, and can't consume
more total time than the eymbolic execution itself. (#GH97298)
+- In clang-18, we regressed in terms of analysis time for projects having many
+ nested loops with buffer indexing or shifting or other binary operations.
+ For example, functions computing different hash values. Some of this slowdown
+ was attributed to taint analysis, which is fixed now. (#GH105493)
+
- ``std::addressof``, ``std::as_const``, ``std::forward``,
``std::forward_like``, ``std::move``, ``std::move_if_noexcept``, are now
modeled just like their builtin counterpart. (#GH94193)
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 29aa6a3b8a16e7..737bc8e86cfb6a 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -407,6 +407,11 @@ ANALYZER_OPTION(
ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity",
"The maximum complexity of symbolic constraint.", 35)
+// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
+// Ideally, we should get rid of this option soon.
+ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity",
+ "[DEPRECATED] The maximum complexity of a symbol to carry taint", 9)
+
ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large",
"The maximum times a large function could be inlined.", 32)
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 6362c82b009d72..0bb5739db4b756 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -12,6 +12,7 @@
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include <optional>
@@ -256,6 +257,12 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
if (!Sym)
return TaintedSymbols;
+ // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
+ if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
+ Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
+ return {};
+ }
+
// Traverse all the symbols this symbol depends on to see if any are tainted.
for (SymbolRef SubSym : Sym->symbols()) {
if (!isa<SymbolData>(SubSym))
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 2a4c40005a4dc0..1ee0d8e4eecebd 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -96,6 +96,7 @@
// CHECK-NEXT: max-inlinable-size = 100
// CHECK-NEXT: max-nodes = 225000
// CHECK-NEXT: max-symbol-complexity = 35
+// CHECK-NEXT: max-tainted-symbol-complexity = 9
// CHECK-NEXT: max-times-inline-large = 32
// CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
// CHECK-NEXT: mode = deep
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index b0df85f237298d..1c139312734bca 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -63,6 +63,7 @@ void clang_analyzer_isTainted_char(char);
void clang_analyzer_isTainted_wchar(wchar_t);
void clang_analyzer_isTainted_charp(char*);
void clang_analyzer_isTainted_int(int);
+void clang_analyzer_dump_int(int);
int coin();
@@ -459,7 +460,53 @@ unsigned radar11369570_hanging(const unsigned char *arr, int l) {
longcmp(a, t, c);
l -= 12;
}
- return 5/a; // expected-warning {{Division by a tainted value, possibly zero}}
+ return 5/a; // FIXME: Should be a "div by tainted" warning here.
+}
+
+// This computation used to take a very long time.
+void complex_taint_queries(const int *p) {
+ int tainted = 0;
+ scanf("%d", &tainted);
+
+ // Make "tmp" tainted.
+ int tmp = tainted + tainted;
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
+
+ // Make "tmp" SymExpr a lot more complicated by applying computation.
+ // This should balloon the symbol complexity.
+ tmp += p[0] + p[0];
+ tmp += p[1] + p[1];
+ tmp += p[2] + p[2];
+ clang_analyzer_dump_int(tmp); // expected-warning{{((((conj_}} symbol complexity: 8
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
+
+ tmp += p[3] + p[3];
+ clang_analyzer_dump_int(tmp); // expected-warning{{(((((conj_}} symbol complexity: 10
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already too complex to be traversed
+
+ tmp += p[4] + p[4];
+ tmp += p[5] + p[5];
+ tmp += p[6] + p[6];
+ tmp += p[7] + p[7];
+ tmp += p[8] + p[8];
+ tmp += p[9] + p[9];
+ tmp += p[10] + p[10];
+ tmp += p[11] + p[11];
+ tmp += p[12] + p[12];
+ tmp += p[13] + p[13];
+ tmp += p[14] + p[14];
+ tmp += p[15] + p[15];
+
+ // The SymExpr still holds the full history of the computation, yet, "isTainted" doesn't traverse the tree as the complexity is over the threshold.
+ clang_analyzer_dump_int(tmp);
+ // expected-warning at -1{{(((((((((((((((((conj_}} symbol complexity: 34
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} FIXME: Ideally, this should still result in "tainted".
+
+ // By making it even one step more complex, then it would hit the "max-symbol-complexity"
+ // threshold and the engine would cut the SymExpr and replace it by a new conjured symbol.
+ tmp += p[16];
+ clang_analyzer_dump_int(tmp); // expected-warning{{conj_}} symbol complexity: 1
+ clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}}
}
// Check that we do not assert of the following code.
``````````
</details>
https://github.com/llvm/llvm-project/pull/105516
More information about the llvm-branch-commits
mailing list