[clang] [analyzer] Fix taint sink rules for exec-like functions (PR #66358)

via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 14 03:50:21 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-static-analyzer-1
            
<details>
<summary>Changes</summary>
Variadic arguments were not considered as taint sink arguments. I also decided to extend the list of exec-like functions.

(Juliet CWE78_OS_Command_Injection__char_connect_socket_execl)

---

This commit was split off from PR https://github.com/llvm/llvm-project/pull/66074 as requested.
--
Full diff: https://github.com/llvm/llvm-project/pull/66358.diff

2 Files Affected:

- (modified) clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (+8-6) 
- (modified) clang/test/Analysis/taint-generic.c (+159-2) 


<pre>
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 5da0f34b3d0464f..8ebedc1269dc1d1 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -743,12 +743,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &amp;C) const {
       // Sinks
       {{{&quot;system&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
       {{{&quot;popen&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{&quot;execl&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{&quot;execle&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{&quot;execlp&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{&quot;execvp&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{&quot;execvP&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{&quot;execve&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execl&quot;}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execle&quot;}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execlp&quot;}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execv&quot;}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execve&quot;}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+      {{{&quot;fexecve&quot;}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execvp&quot;}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+      {{{&quot;execvpe&quot;}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
       {{{&quot;dlopen&quot;}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
       {{CDF_MaybeBuiltin, {{&quot;malloc&quot;}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
       {{CDF_MaybeBuiltin, {{&quot;calloc&quot;}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index f08186fc8c4c1e5..dbe954d597054da 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -63,6 +63,8 @@ void clang_analyzer_isTainted_wchar(wchar_t);
 void clang_analyzer_isTainted_charp(char*);
 void clang_analyzer_isTainted_int(int);
 
+int coin();
+
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
 char *gets_s(char *str, rsize_t n);
@@ -119,6 +121,41 @@ void *malloc(size_t);
 void *calloc(size_t nmemb, size_t size);
 void bcopy(void *s1, void *s2, size_t n);
 
+
+//    function | pathname | filename | fd | arglist | argv[] | envp[]
+//    ===============================================================
+// 1  execl    |     X    |          |    |    X    |        |
+// 2  execle   |     X    |          |    |    X    |        |   X
+// 3  execlp   |          |     X    |    |    X    |        |
+// 4  execv    |     X    |          |    |         |    X   |
+// 5  execve   |     X    |          |    |         |    X   |   X
+// 6  execvp   |          |     X    |    |         |    X   |
+// 7  execvpe  |          |     X    |    |         |    X   |   X
+// 8  fexecve  |          |          |  X |         |    X   |   X
+//    ===============================================================
+//    letter   |          |     p    |  f |    l    |    v   |   e
+//
+// legend:
+//  - pathname: rel/abs path to the binary
+//  - filename: file name searched in PATH to execute the binary
+//  - fd:       accepts a file descriptor
+//  - arglist:  accepts variadic arguments
+//  - argv:     accepts a pointer to array, denoting the new argv
+//  - envp:     accepts a pointer to array, denoting the new envp
+
+int execl(const char *path, const char *arg, ...);
+int execle(const char *path, const char *arg, ...);
+int execlp(const char *file, const char *arg, ...);
+int execv(const char *path, char *const argv[]);
+int execve(const char *path, char *const argv[], char *const envp[]);
+int execvp(const char *file, char *const argv[]);
+int execvpe(const char *file, char *const argv[], char *const envp[]);
+int fexecve(int fd, char *const argv[], char *const envp[]);
+FILE *popen(const char *command, const char *type);
+int pclose(FILE *stream);
+int system(const char *command);
+
+
 typedef size_t socklen_t;
 
 struct sockaddr {
@@ -225,7 +262,6 @@ void testUncontrolledFormatString(char **p) {
 
 }
 
-int system(const char *command);
 void testTaintSystemCall(void) {
   char buffer[156];
   char addr[128];
@@ -288,7 +324,6 @@ void testTaintedBufferSize(void) {
 #define SOCK_STREAM 1
 int socket(int, int, int);
 size_t read(int, void *, size_t);
-int  execl(const char *, const char *, ...);
 
 void testSocket(void) {
   int sock;
@@ -1120,6 +1155,128 @@ void testConfigurationSinks(void) {
   // expected-warning at -1 {{Untrusted data is passed to a user-defined sink}}
 }
 
+int test_exec_like_functions() {
+  char buf[100] = {0};
+  scanf(&quot;%99s&quot;, buf);
+  clang_analyzer_isTainted_char(buf[0]); // expected-warning {{YES}}
+
+  char *cleanArray[] = {&quot;ENV1=V1&quot;, &quot;ENV2=V2&quot;, NULL};
+  char *taintedArray[] = {buf, &quot;ENV2=V2&quot;, NULL};
+  clang_analyzer_isTainted_char(taintedArray[0][0]);      // expected-warning {{YES}}
+  clang_analyzer_isTainted_char(*(char*)taintedArray[0]); // expected-warning {{YES}}
+  clang_analyzer_isTainted_char(*(char*)taintedArray);    // expected-warning {{NO}}  We should have YES here.
+  // FIXME: Above the triple pointer indirection will confuse the checker,
+  // as we only check two levels. The results would be worse, if the tainted
+  // subobject (&quot;buf&quot;) would not be at the beginning of the enclosing object,
+  // for the same reason.
+
+  switch (coin()) {
+    default: break;
+    // Execute `path` with all arguments after `path` until a NULL pointer
+    // and environment from `environ&#x27;.
+    case 0: return execl(&quot;path&quot;, &quot;arg0&quot;, &quot;arg1&quot;, &quot;arg2&quot;, NULL); // no-warning
+    case 1: return execl(buf, &quot;arg0&quot;, &quot;arg1&quot;, &quot;arg2&quot;, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execl(&quot;path&quot;, buf, &quot;arg1&quot;, &quot;arg2&quot;, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 3: return execl(&quot;path&quot;, &quot;arg0&quot;, buf, &quot;arg2&quot;, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 4: return execl(&quot;path&quot;, &quot;arg0&quot;, &quot;arg1&quot;, buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `path` with all arguments after `PATH` until a NULL pointer,
+    // and the argument after that for environment.
+    case 0: return execle(&quot;path&quot;, &quot;arg0&quot;, &quot;arg1&quot;, NULL,   cleanArray); // no-warning
+    case 1: return execle(   buf, &quot;arg0&quot;, &quot;arg1&quot;, NULL,   cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execle(&quot;path&quot;,    buf, &quot;arg1&quot;, NULL,   cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 3: return execle(&quot;path&quot;, &quot;arg0&quot;,    buf, NULL,   cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 4: return execle(&quot;path&quot;, &quot;arg0&quot;, &quot;arg1&quot;, NULL,          buf); // expected-warning {{Untrusted data is passed to a system call}}
+    case 5: return execle(&quot;path&quot;, &quot;arg0&quot;, &quot;arg1&quot;, NULL, taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `file`, searching in the `PATH&#x27; environment variable if it
+    // contains no slashes, with all arguments after `file` until a NULL
+    // pointer and environment from `environ&#x27;.
+    case 0: return execlp(&quot;file&quot;, &quot;arg0&quot;, &quot;arg1&quot;, &quot;arg2&quot;, NULL); // no-warning
+    case 1: return execlp(   buf, &quot;arg0&quot;, &quot;arg1&quot;, &quot;arg2&quot;, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execlp(&quot;file&quot;,    buf, &quot;arg1&quot;, &quot;arg2&quot;, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 3: return execlp(&quot;file&quot;, &quot;arg0&quot;,    buf, &quot;arg2&quot;, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 4: return execlp(&quot;file&quot;, &quot;arg0&quot;, &quot;arg1&quot;,    buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `path` with arguments `ARGV` and environment from `environ&#x27;.
+    case 0: return execv(&quot;path&quot;, /*argv=*/  cleanArray); // no-warning
+    case 1: return execv(   buf, /*argv=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execv(&quot;path&quot;, /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Replace the current process, executing `path` with arguments `ARGV`
+    // and environment `ENVP`. `ARGV` and `ENVP` are terminated by NULL pointers.
+    case 0: return execve(&quot;path&quot;, /*argv=*/  cleanArray, /*envp=*/cleanArray); // no-warning
+    case 1: return execve(   buf, /*argv=*/  cleanArray, /*envp=*/cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execve(&quot;path&quot;, /*argv=*/taintedArray, /*envp=*/cleanArray); // FIXME: We might wanna have a report here.
+    case 3: return execve(&quot;path&quot;, /*argv=*/cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `file`, searching in the `PATH&#x27; environment variable if it
+    // contains no slashes, with arguments `ARGV` and environment from `environ&#x27;.
+    case 0: return execvp(&quot;file&quot;, /*argv=*/  cleanArray); // no-warning
+    case 1: return execvp(   buf, /*argv=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execvp(&quot;file&quot;, /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  // execvpe
+  switch (coin()) {
+    default: break;
+    // Execute `file`, searching in the `PATH&#x27; environment variable if it
+    // contains no slashes, with arguments `ARGV` and environment `ENVP`.
+    // `ARGV` and `ENVP` are terminated by NULL pointers.
+    case 0: return execvpe(&quot;file&quot;, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // no-warning
+    case 1: return execvpe(   buf, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execvpe(&quot;file&quot;, /*argv=*/taintedArray, /*envp=*/  cleanArray); // FIXME: We might wanna have a report here.
+    case 3: return execvpe(&quot;file&quot;, /*argv=*/  cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  int cleanFD = coin();
+  int taintedFD;
+  scanf(&quot;%d&quot;, &amp;taintedFD);
+  clang_analyzer_isTainted_int(taintedFD); // expected-warning {{YES}}
+
+  switch (coin()) {
+    default: break;
+    // Execute the file `FD` refers to, overlaying the running program image.
+    // `ARGV` and `ENVP` are passed to the new program, as for `execve&#x27;.
+    case 0: return fexecve(  cleanFD, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // no-warning
+    case 1: return fexecve(taintedFD, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return fexecve(  cleanFD, /*argv=*/taintedArray, /*envp=*/  cleanArray); // FIXME: We might wanna have a report here.
+    case 3: return fexecve(  cleanFD, /*argv=*/  cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Create a new stream connected to a pipe running the given `command`.
+    case 0: return pclose(popen(&quot;command&quot;, /*mode=*/&quot;r&quot;)); // no-warning
+    case 1: return pclose(popen(      buf, /*mode=*/&quot;r&quot;)); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return pclose(popen(&quot;command&quot;, /*mode=*/buf)); // &#x27;mode&#x27; is not a taint sink.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute the given line as a shell command.
+    case 0: return system(&quot;command&quot;); // no-warning
+    case 1: return system(      buf); // expected-warning {{Untrusted data is passed to a system call}}
+  }
+
+  return 0;
+}
+
 void testUnknownFunction(void (*foo)(void)) {
   foo(); // no-crash
 }
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66358


More information about the cfe-commits mailing list