[clang] [clang][analyzer] Add taintedness to argv (PR #178054)
Daniel Krupp via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 27 02:06:00 PST 2026
https://github.com/dkrupp updated https://github.com/llvm/llvm-project/pull/178054
>From 55858029bdd834d5782ef533077c0daba7aef643 Mon Sep 17 00:00:00 2001
From: Daniel Krupp <daniel.krupp at ericsson.com>
Date: Mon, 26 Jan 2026 22:17:41 +0100
Subject: [PATCH] [clang][analyzer] Add taintedness to argv
If the execution environment is untrusted, we assume
that the argv of the main function is an attacker
controlled value and set it as an taint analysis source.
---
clang/docs/analyzer/checkers.rst | 7 +++
.../Checkers/GenericTaintChecker.cpp | 40 +++++++++++++-
.../Analysis/taint-diagnostic-visitor-main.c | 54 +++++++++++++++++++
clang/test/Analysis/taint-generic.c | 13 +++++
4 files changed, 112 insertions(+), 2 deletions(-)
create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main.c
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 31edf9e99dc7d..178156c9b2f07 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1374,6 +1374,13 @@ For a more detailed description of configuration options, please see the
* `Config` Specifies the name of the YAML configuration file. The user can
define their own taint sources and sinks.
+* The if the analyzer option `assume-controlled-environment` is set to `false`,
+ it is assumed that the command line arguments and the environment
+ variables of the program are attacker controlled.
+ In particular, the `argv` argument of the `main` function and
+ the return value of the `getenv()` function are assumed to
+ hold tainted values.
+
**Related Guidelines**
* `CWE Data Neutralization Issues
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index bea8f3f13ba21..9d88f85c88ad2 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -63,6 +63,8 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
constexpr llvm::StringLiteral MsgCustomSink =
"Untrusted data is passed to a user-defined sink";
+const std::string MsgTaintOrigin = "Taint originated here";
+
using ArgIdxTy = int;
using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
@@ -159,7 +161,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C,
return "";
}
if (TaintedSymbols.empty())
- return "Taint originated here";
+ return MsgTaintOrigin;
for (auto Sym : TaintedSymbols) {
BR.markInteresting(Sym);
@@ -378,10 +380,12 @@ struct GenericTaintRuleParser {
CheckerManager &Mgr;
};
-class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
+class GenericTaintChecker
+ : public Checker<check::PreCall, check::PostCall, check::BeginFunction> {
public:
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+ void checkBeginFunction(CheckerContext &C) const;
void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
const char *Sep) const override;
@@ -827,8 +831,40 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
std::make_move_iterator(Rules.end()));
}
+// The incoming parameters of the main function get tainted
+// if the program called in an untrusted environment.
+void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const {
+ if (!C.inTopFrame() || C.getAnalysisManager()
+ .getAnalyzerOptions()
+ .ShouldAssumeControlledEnvironment)
+ return;
+
+ const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
+ if (!FD || !FD->isMain() || FD->param_size() < 2)
+ return;
+
+ ProgramStateRef State = C.getState();
+ const MemRegion *ArgvReg =
+ State->getRegion(FD->parameters()[1], C.getLocationContext());
+ SVal ArgvSval = State->getSVal(ArgvReg);
+ // Add taintedness to argv**
+ State = addTaint(State, ArgvSval);
+
+ const NoteTag *OriginatingTag =
+ C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string {
+ // We give diagnostics only for taint related reports
+ if (!BR.isInteresting(ArgvSval) ||
+ BR.getBugType().getCategory() != categories::TaintedData)
+ return "";
+
+ return MsgTaintOrigin;
+ });
+ C.addTransition(State, OriginatingTag);
+}
+
void GenericTaintChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
+
initTaintRules(C);
// FIXME: this should be much simpler.
diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c
new file mode 100644
index 0000000000000..315e352cb549f
--- /dev/null
+++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c
@@ -0,0 +1,54 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify=expected,untrusted %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify=expected,trusted %s
+
+// This file is for testing enhanced diagnostics produced by the GenericTaintChecker
+
+typedef __typeof(sizeof(int)) size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
+int scanf(const char *restrict format, ...);
+int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+char *strcat( char *dest, const char *src );
+char * strncat ( char * destination, const char * source, size_t num );
+char* strcpy( char* dest, const char* src );
+char * strncpy ( char * destination, const char * source, size_t num );
+void *malloc(size_t size );
+void free( void *ptr );
+char *fgets(char *str, int n, FILE *stream);
+extern FILE *stdin;
+
+
+#ifdef UNTRUSTED
+// In an untrusted environment the cmd line arguments
+// are assumed to be tainted.
+int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}}
+ if (argc < 1)// untrusted-note {{'argc' is >= 1}}
+ // untrusted-note at -1 {{Taking false branch}}
+ return 1;
+ char cmd[2048] = "/bin/cat ";
+ char filename[1024];
+ strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint propagated to the 1st argument}}
+ strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note {{Taint propagated to the 1st argument}}
+ system(cmd);// untrusted-warning {{Untrusted data is passed to a system call}}
+ // untrusted-note at -1 {{Untrusted data is passed to a system call}}
+ return 0;
+ }
+#else
+int main(int argc, char * argv[]) {
+ if (argc < 1)// trusted-note {{'argc' is >= 1}}
+ // trusted-note at -1 {{Taking false branch}}
+ return 1;
+ char cmd[2048] = "/bin/cat ";
+ char filename[1024];
+ scanf("%s", filename);// trusted-note {{Taint originated here}}
+ // trusted-note at -1 {{Taint propagated to the 2nd argument}}
+ strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// trusted-note {{Taint propagated to the 1st argument}}
+ strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint propagated to the 1st argument}}
+ system(cmd);// trusted-warning {{Untrusted data is passed to a system call}}
+ // trusted-note at -1 {{Untrusted data is passed to a system call}}
+ return 0;
+ }
+ #endif
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index 6017483f06b6d..93a5b9b3a53cd 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2);
char *strncpy( char * destination, const char * source, size_t num );
char *strndup(const char *s, size_t n);
char *strncat(char *restrict s1, const char *restrict s2, size_t n);
+char *strcat( char *dest, const char *src );
void *malloc(size_t);
void *calloc(size_t nmemb, size_t size);
@@ -1396,3 +1397,15 @@ void testAcceptPropagates() {
int acceptSocket = accept(listenSocket, 0, 0);
clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
}
+
+int main(int argc, char * argv[]) {
+ if (argc < 1)
+ return 1;
+ char cmd[2048] = "/bin/cat ";
+ char filename[1024];
+ clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}}
+ strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1);
+ system(cmd);// expected-warning {{Untrusted data is passed to a system call}}
+ return 0;
+ }
+
\ No newline at end of file
More information about the cfe-commits
mailing list