[clang] [analyzer] Fix taint sink rules for exec-like functions (PR #66358)
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 14 03:49:36 PDT 2023
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/66358:
Variadic arguments were not considered as taint sink arguments. I also decided to extend the list of exec-like functions.
(Juliet CWE78_OS_Command_Injection__char_connect_socket_execl)
---
This commit was split off from PR https://github.com/llvm/llvm-project/pull/66074 as requested.
>From 4b7a44c3f6267713375c43d9ccf25240d335ee53 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Thu, 14 Sep 2023 11:55:10 +0200
Subject: [PATCH] [analyzer] Fix taint sink rules for exec-like functions
Variadic arguments were not considered as taint sink arguments.
I also decided to extend the list of exec-like functions.
(Juliet CWE78_OS_Command_Injection__char_connect_socket_execl)
---
.../Checkers/GenericTaintChecker.cpp | 14 +-
clang/test/Analysis/taint-generic.c | 161 +++++++++++++++++-
2 files changed, 167 insertions(+), 8 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 5da0f34b3d0464f..8ebedc1269dc1d1 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -743,12 +743,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
// Sinks
{{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
{{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
+ {{{"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+ {{{"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+ {{{"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+ {{{"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+ {{{"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+ {{{"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+ {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+ {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
{{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
{{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
{{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index f08186fc8c4c1e5..dbe954d597054da 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -63,6 +63,8 @@ void clang_analyzer_isTainted_wchar(wchar_t);
void clang_analyzer_isTainted_charp(char*);
void clang_analyzer_isTainted_int(int);
+int coin();
+
int scanf(const char *restrict format, ...);
char *gets(char *str);
char *gets_s(char *str, rsize_t n);
@@ -119,6 +121,41 @@ void *malloc(size_t);
void *calloc(size_t nmemb, size_t size);
void bcopy(void *s1, void *s2, size_t n);
+
+// function | pathname | filename | fd | arglist | argv[] | envp[]
+// ===============================================================
+// 1 execl | X | | | X | |
+// 2 execle | X | | | X | | X
+// 3 execlp | | X | | X | |
+// 4 execv | X | | | | X |
+// 5 execve | X | | | | X | X
+// 6 execvp | | X | | | X |
+// 7 execvpe | | X | | | X | X
+// 8 fexecve | | | X | | X | X
+// ===============================================================
+// letter | | p | f | l | v | e
+//
+// legend:
+// - pathname: rel/abs path to the binary
+// - filename: file name searched in PATH to execute the binary
+// - fd: accepts a file descriptor
+// - arglist: accepts variadic arguments
+// - argv: accepts a pointer to array, denoting the new argv
+// - envp: accepts a pointer to array, denoting the new envp
+
+int execl(const char *path, const char *arg, ...);
+int execle(const char *path, const char *arg, ...);
+int execlp(const char *file, const char *arg, ...);
+int execv(const char *path, char *const argv[]);
+int execve(const char *path, char *const argv[], char *const envp[]);
+int execvp(const char *file, char *const argv[]);
+int execvpe(const char *file, char *const argv[], char *const envp[]);
+int fexecve(int fd, char *const argv[], char *const envp[]);
+FILE *popen(const char *command, const char *type);
+int pclose(FILE *stream);
+int system(const char *command);
+
+
typedef size_t socklen_t;
struct sockaddr {
@@ -225,7 +262,6 @@ void testUncontrolledFormatString(char **p) {
}
-int system(const char *command);
void testTaintSystemCall(void) {
char buffer[156];
char addr[128];
@@ -288,7 +324,6 @@ void testTaintedBufferSize(void) {
#define SOCK_STREAM 1
int socket(int, int, int);
size_t read(int, void *, size_t);
-int execl(const char *, const char *, ...);
void testSocket(void) {
int sock;
@@ -1120,6 +1155,128 @@ void testConfigurationSinks(void) {
// expected-warning at -1 {{Untrusted data is passed to a user-defined sink}}
}
+int test_exec_like_functions() {
+ char buf[100] = {0};
+ scanf("%99s", buf);
+ clang_analyzer_isTainted_char(buf[0]); // expected-warning {{YES}}
+
+ char *cleanArray[] = {"ENV1=V1", "ENV2=V2", NULL};
+ char *taintedArray[] = {buf, "ENV2=V2", NULL};
+ clang_analyzer_isTainted_char(taintedArray[0][0]); // expected-warning {{YES}}
+ clang_analyzer_isTainted_char(*(char*)taintedArray[0]); // expected-warning {{YES}}
+ clang_analyzer_isTainted_char(*(char*)taintedArray); // expected-warning {{NO}} We should have YES here.
+ // FIXME: Above the triple pointer indirection will confuse the checker,
+ // as we only check two levels. The results would be worse, if the tainted
+ // subobject ("buf") would not be at the beginning of the enclosing object,
+ // for the same reason.
+
+ switch (coin()) {
+ default: break;
+ // Execute `path` with all arguments after `path` until a NULL pointer
+ // and environment from `environ'.
+ case 0: return execl("path", "arg0", "arg1", "arg2", NULL); // no-warning
+ case 1: return execl(buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execl("path", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 3: return execl("path", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 4: return execl("path", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `path` with all arguments after `PATH` until a NULL pointer,
+ // and the argument after that for environment.
+ case 0: return execle("path", "arg0", "arg1", NULL, cleanArray); // no-warning
+ case 1: return execle( buf, "arg0", "arg1", NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execle("path", buf, "arg1", NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 3: return execle("path", "arg0", buf, NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 4: return execle("path", "arg0", "arg1", NULL, buf); // expected-warning {{Untrusted data is passed to a system call}}
+ case 5: return execle("path", "arg0", "arg1", NULL, taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `file`, searching in the `PATH' environment variable if it
+ // contains no slashes, with all arguments after `file` until a NULL
+ // pointer and environment from `environ'.
+ case 0: return execlp("file", "arg0", "arg1", "arg2", NULL); // no-warning
+ case 1: return execlp( buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execlp("file", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 3: return execlp("file", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 4: return execlp("file", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `path` with arguments `ARGV` and environment from `environ'.
+ case 0: return execv("path", /*argv=*/ cleanArray); // no-warning
+ case 1: return execv( buf, /*argv=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execv("path", /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Replace the current process, executing `path` with arguments `ARGV`
+ // and environment `ENVP`. `ARGV` and `ENVP` are terminated by NULL pointers.
+ case 0: return execve("path", /*argv=*/ cleanArray, /*envp=*/cleanArray); // no-warning
+ case 1: return execve( buf, /*argv=*/ cleanArray, /*envp=*/cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execve("path", /*argv=*/taintedArray, /*envp=*/cleanArray); // FIXME: We might wanna have a report here.
+ case 3: return execve("path", /*argv=*/cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `file`, searching in the `PATH' environment variable if it
+ // contains no slashes, with arguments `ARGV` and environment from `environ'.
+ case 0: return execvp("file", /*argv=*/ cleanArray); // no-warning
+ case 1: return execvp( buf, /*argv=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execvp("file", /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ // execvpe
+ switch (coin()) {
+ default: break;
+ // Execute `file`, searching in the `PATH' environment variable if it
+ // contains no slashes, with arguments `ARGV` and environment `ENVP`.
+ // `ARGV` and `ENVP` are terminated by NULL pointers.
+ case 0: return execvpe("file", /*argv=*/ cleanArray, /*envp=*/ cleanArray); // no-warning
+ case 1: return execvpe( buf, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execvpe("file", /*argv=*/taintedArray, /*envp=*/ cleanArray); // FIXME: We might wanna have a report here.
+ case 3: return execvpe("file", /*argv=*/ cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ int cleanFD = coin();
+ int taintedFD;
+ scanf("%d", &taintedFD);
+ clang_analyzer_isTainted_int(taintedFD); // expected-warning {{YES}}
+
+ switch (coin()) {
+ default: break;
+ // Execute the file `FD` refers to, overlaying the running program image.
+ // `ARGV` and `ENVP` are passed to the new program, as for `execve'.
+ case 0: return fexecve( cleanFD, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // no-warning
+ case 1: return fexecve(taintedFD, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return fexecve( cleanFD, /*argv=*/taintedArray, /*envp=*/ cleanArray); // FIXME: We might wanna have a report here.
+ case 3: return fexecve( cleanFD, /*argv=*/ cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Create a new stream connected to a pipe running the given `command`.
+ case 0: return pclose(popen("command", /*mode=*/"r")); // no-warning
+ case 1: return pclose(popen( buf, /*mode=*/"r")); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return pclose(popen("command", /*mode=*/buf)); // 'mode' is not a taint sink.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute the given line as a shell command.
+ case 0: return system("command"); // no-warning
+ case 1: return system( buf); // expected-warning {{Untrusted data is passed to a system call}}
+ }
+
+ return 0;
+}
+
void testUnknownFunction(void (*foo)(void)) {
foo(); // no-crash
}
More information about the cfe-commits
mailing list