[clang] First batch of patches for the Juliet benchmark for taint improvements (PR #66074)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 12 05:16:55 PDT 2023
llvmbot wrote:
@llvm/pr-subscribers-clang
<details>
<summary>Changes</summary>
See the motivation here: https://discourse.llvm.org/t/patches-inspired-by-the-juliet-benchmark/73106
I've checked all these 4 commits on a large set of projects, and they - surprisingly - don't show any report differences besides noise.
I plan to land these 4 commits individually (aka. I don't plan to squash them).
--
Full diff: https://github.com/llvm/llvm-project/pull/66074.diff
4 Files Affected:
- (modified) clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (+15-7)
- (modified) clang/test/Analysis/taint-diagnostic-visitor.c (+1-1)
- (modified) clang/test/Analysis/taint-generic.c (+203-7)
- (modified) clang/test/Analysis/taint-generic.cpp (+12)
<pre>
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 3dcb45c0b110383..e6f0b7354b168bf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -105,7 +105,8 @@ bool isStdin(SVal Val, const ASTContext &ACtx) {
if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
D = D->getCanonicalDecl();
// FIXME: This should look for an exact match.
- if (D->getName().contains("stdin") && D->isExternC()) {
+ if (D->getName().contains("stdin") && D->hasExternalStorage() &&
+ D->isExternC()) {
const QualType FILETy = ACtx.getFILEType().getCanonicalType();
const QualType Ty = D->getType().getCanonicalType();
@@ -622,12 +623,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{{"getlogin_r"}}, TR::Source({{0}})},
// Props
+ {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
+ {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
{{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
{{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
{{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
@@ -695,6 +698,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+ {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
{{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
@@ -731,6 +735,8 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
{{CDF_MaybeBuiltin, {{"strcat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+ {{CDF_MaybeBuiltin, {{"wcsncat"}}},
+ TR::Prop({{1}}, {{0, ReturnValueIndex}})},
{{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
@@ -739,12 +745,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
// Sinks
{{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
{{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
- {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
+ {{{"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+ {{{"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+ {{{"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+ {{{"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+ {{{"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+ {{{"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+ {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+ {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
{{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
{{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
{{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 663836836d3db67..f1b9ceebdd9a6b8 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -13,7 +13,7 @@ size_t strlen( const char* str );
void *malloc(size_t size );
void free( void *ptr );
char *fgets(char *str, int n, FILE *stream);
-FILE *stdin;
+extern FILE *stdin;
void taintDiagnostic(void)
{
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index b7906d201e4fad3..f3c56b6ff930034 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -56,10 +56,14 @@
// CHECK-INVALID-ARG-SAME: rules greater or equal to -1
typedef long long rsize_t;
+typedef __WCHAR_TYPE__ wchar_t;
void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_wchar(wchar_t);
void clang_analyzer_isTainted_charp(char*);
void clang_analyzer_isTainted_int(int);
+int coin();
+
int scanf(const char *restrict format, ...);
char *gets(char *str);
char *gets_s(char *str, rsize_t n);
@@ -75,6 +79,17 @@ extern FILE *stdin;
#define bool _Bool
#define NULL (void*)0
+wchar_t *fgetws(wchar_t *ws, int n, FILE *stream);
+wchar_t *wmemset(wchar_t *wcs, wchar_t wc, unsigned long n);
+wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wmemmove(wchar_t *dest, const wchar_t *src, size_t n);
+size_t wcslen(const wchar_t *s);
+wchar_t *wcscpy(wchar_t * dest, const wchar_t * src);
+wchar_t *wcsncpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wcscat(wchar_t *dest, const wchar_t *src);
+wchar_t *wcsncat(wchar_t *dest,const wchar_t *src, size_t n);
+int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...);
+
char *getenv(const char *name);
FILE *fopen(const char *name, const char *mode);
@@ -105,6 +120,41 @@ void *malloc(size_t);
void *calloc(size_t nmemb, size_t size);
void bcopy(void *s1, void *s2, size_t n);
+
+// function | pathname | filename | fd | arglist | argv[] | envp[]
+// ===============================================================
+// 1 execl | X | | | X | |
+// 2 execle | X | | | X | | X
+// 3 execlp | | X | | X | |
+// 4 execv | X | | | | X |
+// 5 execve | X | | | | X | X
+// 6 execvp | | X | | | X |
+// 7 execvpe | | X | | | X | X
+// 8 fexecve | | | X | | X | X
+// ===============================================================
+// letter | | p | f | l | v | e
+//
+// legend:
+// - pathname: rel/abs path to the binary
+// - filename: file name searched in PATH to execute the binary
+// - fd: accepts a file descriptor
+// - arglist: accepts variadic arguments
+// - argv: accepts a pointer to array, denoting the new argv
+// - envp: accepts a pointer to array, denoting the new envp
+
+int execl(const char *path, const char *arg, ...);
+int execle(const char *path, const char *arg, ...);
+int execlp(const char *file, const char *arg, ...);
+int execv(const char *path, char *const argv[]);
+int execve(const char *path, char *const argv[], char *const envp[]);
+int execvp(const char *file, char *const argv[]);
+int execvpe(const char *file, char *const argv[], char *const envp[]);
+int fexecve(int fd, char *const argv[], char *const envp[]);
+FILE *popen(const char *command, const char *type);
+int pclose(FILE *stream);
+int system(const char *command);
+
+
typedef size_t socklen_t;
struct sockaddr {
@@ -211,7 +261,6 @@ void testUncontrolledFormatString(char **p) {
}
-int system(const char *command);
void testTaintSystemCall(void) {
char buffer[156];
char addr[128];
@@ -274,7 +323,6 @@ void testTaintedBufferSize(void) {
#define SOCK_STREAM 1
int socket(int, int, int);
size_t read(int, void *, size_t);
-int execl(const char *, const char *, ...);
void testSocket(void) {
int sock;
@@ -430,6 +478,24 @@ int testSprintf_propagates_taint(char *buf, char *msg) {
return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}}
}
+void test_wchar_apis_propagates(const char *path) {
+ FILE *f = fopen(path, "r");
+ clang_analyzer_isTainted_charp((char*)f); // expected-warning {{YES}}
+ wchar_t wbuf[10];
+ fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f);
+ clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}}
+ int n = wcslen(wbuf);
+ clang_analyzer_isTainted_int(n); // expected-warning {{YES}}
+
+ wchar_t dst[100] = L"ABC";
+ clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}}
+ wcsncat(dst, wbuf, sizeof(wbuf)/sizeof(*wbuf));
+ clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}}
+
+ int m = wcslen(dst);
+ clang_analyzer_isTainted_int(m); // expected-warning {{YES}}
+}
+
int scanf_s(const char *format, ...);
int testScanf_s_(int *out) {
scanf_s("%d", out);
@@ -544,6 +610,10 @@ void testFread(const char *fname, int *buffer, size_t size, size_t count) {
}
ssize_t recv(int sockfd, void *buf, size_t len, int flags);
+int accept(int fd, struct sockaddr *addr, socklen_t *addrlen);
+int bind(int fd, const struct sockaddr *addr, socklen_t addrlen);
+int listen(int fd, int backlog);
+
void testRecv(int *buf, size_t len, int flags) {
int fd;
scanf("%d", &fd); // fake a tainted a file descriptor
@@ -640,7 +710,6 @@ void testRawmemchr(int c) {
clang_analyzer_isTainted_charp(result); // expected-warning {{YES}}
}
-typedef char wchar_t;
int mbtowc(wchar_t *pwc, const char *s, size_t n);
void testMbtowc(wchar_t *pwc, size_t n) {
char buf[10];
@@ -653,8 +722,7 @@ void testMbtowc(wchar_t *pwc, size_t n) {
int wctomb(char *s, wchar_t wc);
void testWctomb(char *buf) {
- wchar_t wc;
- scanf("%c", &wc);
+ wchar_t wc = getchar();
int result = wctomb(buf, wc);
clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}}
@@ -663,8 +731,7 @@ void testWctomb(char *buf) {
int wcwidth(wchar_t c);
void testWcwidth() {
- wchar_t wc;
- scanf("%c", &wc);
+ wchar_t wc = getchar();
int width = wcwidth(wc);
clang_analyzer_isTainted_int(width); // expected-warning {{YES}}
@@ -1087,6 +1154,128 @@ void testConfigurationSinks(void) {
// expected-warning at -1 {{Untrusted data is passed to a user-defined sink}}
}
+int test_exec_like_functions() {
+ char buf[100] = {0};
+ scanf("%99s", buf);
+ clang_analyzer_isTainted_char(buf[0]); // expected-warning {{YES}}
+
+ char *cleanArray[] = {"ENV1=V1", "ENV2=V2", NULL};
+ char *taintedArray[] = {buf, "ENV2=V2", NULL};
+ clang_analyzer_isTainted_char(taintedArray[0][0]); // expected-warning {{YES}}
+ clang_analyzer_isTainted_char(*(char*)taintedArray[0]); // expected-warning {{YES}}
+ clang_analyzer_isTainted_char(*(char*)taintedArray); // expected-warning {{NO}} We should have YES here.
+ // FIXME: Above the triple pointer indirection will confuse the checker,
+ // as we only check two levels. The results would be worse, if the tainted
+ // subobject ("buf") would not be at the beginning of the enclosing object,
+ // for the same reason.
+
+ switch (coin()) {
+ default: break;
+ // Execute `path` with all arguments after `path` until a NULL pointer
+ // and environment from `environ'.
+ case 0: return execl("path", "arg0", "arg1", "arg2", NULL); // no-warning
+ case 1: return execl(buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execl("path", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 3: return execl("path", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 4: return execl("path", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `path` with all arguments after `PATH` until a NULL pointer,
+ // and the argument after that for environment.
+ case 0: return execle("path", "arg0", "arg1", NULL, cleanArray); // no-warning
+ case 1: return execle( buf, "arg0", "arg1", NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execle("path", buf, "arg1", NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 3: return execle("path", "arg0", buf, NULL, cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 4: return execle("path", "arg0", "arg1", NULL, buf); // expected-warning {{Untrusted data is passed to a system call}}
+ case 5: return execle("path", "arg0", "arg1", NULL, taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `file`, searching in the `PATH' environment variable if it
+ // contains no slashes, with all arguments after `file` until a NULL
+ // pointer and environment from `environ'.
+ case 0: return execlp("file", "arg0", "arg1", "arg2", NULL); // no-warning
+ case 1: return execlp( buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execlp("file", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 3: return execlp("file", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ case 4: return execlp("file", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `path` with arguments `ARGV` and environment from `environ'.
+ case 0: return execv("path", /*argv=*/ cleanArray); // no-warning
+ case 1: return execv( buf, /*argv=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execv("path", /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Replace the current process, executing `path` with arguments `ARGV`
+ // and environment `ENVP`. `ARGV` and `ENVP` are terminated by NULL pointers.
+ case 0: return execve("path", /*argv=*/ cleanArray, /*envp=*/cleanArray); // no-warning
+ case 1: return execve( buf, /*argv=*/ cleanArray, /*envp=*/cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execve("path", /*argv=*/taintedArray, /*envp=*/cleanArray); // FIXME: We might wanna have a report here.
+ case 3: return execve("path", /*argv=*/cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute `file`, searching in the `PATH' environment variable if it
+ // contains no slashes, with arguments `ARGV` and environment from `environ'.
+ case 0: return execvp("file", /*argv=*/ cleanArray); // no-warning
+ case 1: return execvp( buf, /*argv=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execvp("file", /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ // execvpe
+ switch (coin()) {
+ default: break;
+ // Execute `file`, searching in the `PATH' environment variable if it
+ // contains no slashes, with arguments `ARGV` and environment `ENVP`.
+ // `ARGV` and `ENVP` are terminated by NULL pointers.
+ case 0: return execvpe("file", /*argv=*/ cleanArray, /*envp=*/ cleanArray); // no-warning
+ case 1: return execvpe( buf, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return execvpe("file", /*argv=*/taintedArray, /*envp=*/ cleanArray); // FIXME: We might wanna have a report here.
+ case 3: return execvpe("file", /*argv=*/ cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ int cleanFD = coin();
+ int taintedFD;
+ scanf("%d", &taintedFD);
+ clang_analyzer_isTainted_int(taintedFD); // expected-warning {{YES}}
+
+ switch (coin()) {
+ default: break;
+ // Execute the file `FD` refers to, overlaying the running program image.
+ // `ARGV` and `ENVP` are passed to the new program, as for `execve'.
+ case 0: return fexecve( cleanFD, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // no-warning
+ case 1: return fexecve(taintedFD, /*argv=*/ cleanArray, /*envp=*/ cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return fexecve( cleanFD, /*argv=*/taintedArray, /*envp=*/ cleanArray); // FIXME: We might wanna have a report here.
+ case 3: return fexecve( cleanFD, /*argv=*/ cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Create a new stream connected to a pipe running the given `command`.
+ case 0: return pclose(popen("command", /*mode=*/"r")); // no-warning
+ case 1: return pclose(popen( buf, /*mode=*/"r")); // expected-warning {{Untrusted data is passed to a system call}}
+ case 2: return pclose(popen("command", /*mode=*/buf)); // 'mode' is not a taint sink.
+ }
+
+ switch (coin()) {
+ default: break;
+ // Execute the given line as a shell command.
+ case 0: return system("command"); // no-warning
+ case 1: return system( buf); // expected-warning {{Untrusted data is passed to a system call}}
+ }
+
+ return 0;
+}
+
void testUnknownFunction(void (*foo)(void)) {
foo(); // no-crash
}
@@ -1107,3 +1296,10 @@ void testProctitle2(char *real_argv[]) {
setproctitle_init(1, argv, 0); // expected-warning {{Untrusted data is passed to a user-defined sink}}
setproctitle_init(1, real_argv, argv); // expected-warning {{Untrusted data is passed to a user-defined sink}}
}
+
+void testAcceptPropagates() {
+ int listenSocket = socket(2, 1, 6);
+ clang_analyzer_isTainted_int(listenSocket); // expected-warning {{YES}}
+ int acceptSocket = accept(listenSocket, 0, 0);
+ clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
+}
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 09cd54471948e1a..c907c8f5eeb958b 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -7,6 +7,12 @@ int scanf(const char*, ...);
int mySource1();
int mySource3();
+typedef struct _FILE FILE;
+extern "C" {
+extern FILE *stdin;
+}
+int fscanf(FILE *stream, const char *format, ...);
+
bool isOutOfRange2(const int*);
void mySink2(int);
@@ -124,3 +130,9 @@ void testConfigurationMemberFunc() {
foo.myMemberScanf("%d", &x);
Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
}
+
+void testReadingFromStdin(char **p) {
+ int n;
+ fscanf(stdin, "%d", &n);
+ Buffer[n] = 1; // expected-warning {{Out of bound memory access (index is tainted)}}
+}
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66074
More information about the cfe-commits
mailing list