[clang] [clang][analyzer] Add taintedness to argv (PR #178054)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 26 13:48:02 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-static-analyzer-1
Author: Daniel Krupp (dkrupp)
<details>
<summary>Changes</summary>
If the execution environment is untrusted, we assume that the argv of the main function is an attacker
controlled value and set it as an taint analysis source.
---
Full diff: https://github.com/llvm/llvm-project/pull/178054.diff
4 Files Affected:
- (modified) clang/docs/analyzer/checkers.rst (+7)
- (modified) clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (+38-2)
- (added) clang/test/Analysis/taint-diagnostic-visitor-main.c (+54)
- (modified) clang/test/Analysis/taint-generic.c (+13)
``````````diff
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 31edf9e99dc7d..178156c9b2f07 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1374,6 +1374,13 @@ For a more detailed description of configuration options, please see the
* `Config` Specifies the name of the YAML configuration file. The user can
define their own taint sources and sinks.
+* The if the analyzer option `assume-controlled-environment` is set to `false`,
+ it is assumed that the command line arguments and the environment
+ variables of the program are attacker controlled.
+ In particular, the `argv` argument of the `main` function and
+ the return value of the `getenv()` function are assumed to
+ hold tainted values.
+
**Related Guidelines**
* `CWE Data Neutralization Issues
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index bea8f3f13ba21..69af9919e34af 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -63,6 +63,8 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
constexpr llvm::StringLiteral MsgCustomSink =
"Untrusted data is passed to a user-defined sink";
+const std::string MsgTaintOrigin = "Taint originated here";
+
using ArgIdxTy = int;
using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
@@ -159,7 +161,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C,
return "";
}
if (TaintedSymbols.empty())
- return "Taint originated here";
+ return MsgTaintOrigin;
for (auto Sym : TaintedSymbols) {
BR.markInteresting(Sym);
@@ -378,10 +380,12 @@ struct GenericTaintRuleParser {
CheckerManager &Mgr;
};
-class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
+class GenericTaintChecker
+ : public Checker<check::PreCall, check::PostCall, check::BeginFunction> {
public:
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+ void checkBeginFunction(CheckerContext &C) const;
void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
const char *Sep) const override;
@@ -827,8 +831,40 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
std::make_move_iterator(Rules.end()));
}
+// The incoming parameters of the main function get tainted
+// if the program called in an untrusted environment.
+void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const {
+ if (!C.inTopFrame() || C.getAnalysisManager()
+ .getAnalyzerOptions()
+ .ShouldAssumeControlledEnvironment)
+ return;
+
+ const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
+ if (!FD || FD->param_size() < 2 || !FD->isMain())
+ return;
+
+ ProgramStateRef State = C.getState();
+ const MemRegion *ArgvReg =
+ State->getRegion(FD->parameters()[1], C.getLocationContext());
+ SVal ArgvSval = State->getSVal(ArgvReg);
+ // Add taintedness to argv**
+ State = addTaint(State, ArgvSval);
+
+ const NoteTag *OriginatingTag =
+ C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string {
+ // We give diagnostics only for taint related reports
+ if (!BR.isInteresting(ArgvSval) ||
+ BR.getBugType().getCategory() != categories::TaintedData)
+ return "";
+
+ return MsgTaintOrigin;
+ });
+ C.addTransition(State, OriginatingTag);
+}
+
void GenericTaintChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
+
initTaintRules(C);
// FIXME: this should be much simpler.
diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c
new file mode 100644
index 0000000000000..315e352cb549f
--- /dev/null
+++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c
@@ -0,0 +1,54 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify=expected,untrusted %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify=expected,trusted %s
+
+// This file is for testing enhanced diagnostics produced by the GenericTaintChecker
+
+typedef __typeof(sizeof(int)) size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
+int scanf(const char *restrict format, ...);
+int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+char *strcat( char *dest, const char *src );
+char * strncat ( char * destination, const char * source, size_t num );
+char* strcpy( char* dest, const char* src );
+char * strncpy ( char * destination, const char * source, size_t num );
+void *malloc(size_t size );
+void free( void *ptr );
+char *fgets(char *str, int n, FILE *stream);
+extern FILE *stdin;
+
+
+#ifdef UNTRUSTED
+// In an untrusted environment the cmd line arguments
+// are assumed to be tainted.
+int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}}
+ if (argc < 1)// untrusted-note {{'argc' is >= 1}}
+ // untrusted-note at -1 {{Taking false branch}}
+ return 1;
+ char cmd[2048] = "/bin/cat ";
+ char filename[1024];
+ strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint propagated to the 1st argument}}
+ strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note {{Taint propagated to the 1st argument}}
+ system(cmd);// untrusted-warning {{Untrusted data is passed to a system call}}
+ // untrusted-note at -1 {{Untrusted data is passed to a system call}}
+ return 0;
+ }
+#else
+int main(int argc, char * argv[]) {
+ if (argc < 1)// trusted-note {{'argc' is >= 1}}
+ // trusted-note at -1 {{Taking false branch}}
+ return 1;
+ char cmd[2048] = "/bin/cat ";
+ char filename[1024];
+ scanf("%s", filename);// trusted-note {{Taint originated here}}
+ // trusted-note at -1 {{Taint propagated to the 2nd argument}}
+ strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// trusted-note {{Taint propagated to the 1st argument}}
+ strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint propagated to the 1st argument}}
+ system(cmd);// trusted-warning {{Untrusted data is passed to a system call}}
+ // trusted-note at -1 {{Untrusted data is passed to a system call}}
+ return 0;
+ }
+ #endif
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index 6017483f06b6d..93a5b9b3a53cd 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2);
char *strncpy( char * destination, const char * source, size_t num );
char *strndup(const char *s, size_t n);
char *strncat(char *restrict s1, const char *restrict s2, size_t n);
+char *strcat( char *dest, const char *src );
void *malloc(size_t);
void *calloc(size_t nmemb, size_t size);
@@ -1396,3 +1397,15 @@ void testAcceptPropagates() {
int acceptSocket = accept(listenSocket, 0, 0);
clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
}
+
+int main(int argc, char * argv[]) {
+ if (argc < 1)
+ return 1;
+ char cmd[2048] = "/bin/cat ";
+ char filename[1024];
+ clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}}
+ strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1);
+ system(cmd);// expected-warning {{Untrusted data is passed to a system call}}
+ return 0;
+ }
+
\ No newline at end of file
``````````
</details>
https://github.com/llvm/llvm-project/pull/178054
More information about the cfe-commits
mailing list