[clang] First batch of patches for the Juliet benchmark for taint improvements (PR #66074)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 12 05:15:55 PDT 2023


https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/66074:

See the motivation here: https://discourse.llvm.org/t/patches-inspired-by-the-juliet-benchmark/73106

I've checked all these 4 commits on a large set of projects, and they - surprisingly - don't show any report differences besides noise.
I plan to land these 4 commits individually (aka. I don't plan to squash them).

>From ededc22487a23a7deaa971526da8a932ea27b231 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:00:33 +0200
Subject: [PATCH 1/4] [analyzer] Fix stdin declaration in C++ tests

The `stdin` declaration shoulkd be within `extern "C" {...}`, in C++
mode. In addition, it should be also marked `extern` in both C and
C++ modes.

I tightened the check to ensure we only accept `stdin` if both of these
match. However, from the Juliet test suite's perspective, this commit
should not matter.
---
 .../StaticAnalyzer/Checkers/GenericTaintChecker.cpp  |  3 ++-
 clang/test/Analysis/taint-diagnostic-visitor.c       |  2 +-
 clang/test/Analysis/taint-generic.cpp                | 12 ++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 3dcb45c0b110383..819f65a9f7318db 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -105,7 +105,8 @@ bool isStdin(SVal Val, const ASTContext &ACtx) {
   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
     D = D->getCanonicalDecl();
     // FIXME: This should look for an exact match.
-    if (D->getName().contains("stdin") && D->isExternC()) {
+    if (D->getName().contains("stdin") && D->hasExternalStorage() &&
+        D->isExternC()) {
       const QualType FILETy = ACtx.getFILEType().getCanonicalType();
       const QualType Ty = D->getType().getCanonicalType();
 
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 663836836d3db67..f1b9ceebdd9a6b8 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -13,7 +13,7 @@ size_t strlen( const char* str );
 void *malloc(size_t size );
 void free( void *ptr );
 char *fgets(char *str, int n, FILE *stream);
-FILE *stdin;
+extern FILE *stdin;
 
 void taintDiagnostic(void)
 {
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 09cd54471948e1a..c907c8f5eeb958b 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -7,6 +7,12 @@ int scanf(const char*, ...);
 int mySource1();
 int mySource3();
 
+typedef struct _FILE FILE;
+extern "C" {
+extern FILE *stdin;
+}
+int fscanf(FILE *stream, const char *format, ...);
+
 bool isOutOfRange2(const int*);
 
 void mySink2(int);
@@ -124,3 +130,9 @@ void testConfigurationMemberFunc() {
   foo.myMemberScanf("%d", &x);
   Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
 }
+
+void testReadingFromStdin(char **p) {
+  int n;
+  fscanf(stdin, "%d", &n);
+  Buffer[n] = 1; // expected-warning {{Out of bound memory access (index is tainted)}}
+}

>From b3215f7b2e5df91bb0a12cbe309929ee9d764b16 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:01:45 +0200
Subject: [PATCH 2/4] [analyzer] Make socket `accept()` propagate taint

This allows to track taint on real code from socket to reading into a
buffer using recv.
---
 .../StaticAnalyzer/Checkers/GenericTaintChecker.cpp   |  1 +
 clang/test/Analysis/taint-generic.c                   | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 819f65a9f7318db..e57272c0776aaad 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -623,6 +623,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"getlogin_r"}}, TR::Source({{0}})},
 
       // Props
+      {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index b7906d201e4fad3..e58b9c71a757821 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -544,6 +544,10 @@ void testFread(const char *fname, int *buffer, size_t size, size_t count) {
 }
 
 ssize_t recv(int sockfd, void *buf, size_t len, int flags);
+int accept(int fd, struct sockaddr *addr, socklen_t *addrlen);
+int bind(int fd, const struct sockaddr *addr, socklen_t addrlen);
+int listen(int fd, int backlog);
+
 void testRecv(int *buf, size_t len, int flags) {
   int fd;
   scanf("%d", &fd); // fake a tainted a file descriptor
@@ -1107,3 +1111,10 @@ void testProctitle2(char *real_argv[]) {
   setproctitle_init(1, argv, 0);         // expected-warning {{Untrusted data is passed to a user-defined sink}}
   setproctitle_init(1, real_argv, argv); // expected-warning {{Untrusted data is passed to a user-defined sink}}
 }
+
+void testAcceptPropagates() {
+  int listenSocket = socket(2, 1, 6);
+  clang_analyzer_isTainted_int(listenSocket); // expected-warning {{YES}}
+  int acceptSocket = accept(listenSocket, 0, 0);
+  clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
+}

>From c54e1342fe93a51acce9148072d60d8783333194 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:03:40 +0200
Subject: [PATCH 3/4] [analyzer] Fix taint sink rules for exec-like functions

Variadic arguments were not considered as taint sink arguments.
I also decided to extend the list of exec-like functions.

(Juliet CWE78_OS_Command_Injection__char_connect_socket_execl)
---
 .../Checkers/GenericTaintChecker.cpp          |  14 +-
 clang/test/Analysis/taint-generic.c           | 161 +++++++++++++++++-
 2 files changed, 167 insertions(+), 8 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index e57272c0776aaad..63993bd4edc7f95 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -741,12 +741,14 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       // Sinks
       {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
       {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
+      {{{"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+      {{{"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+      {{{"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
+      {{{"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+      {{{"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+      {{{"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
+      {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
+      {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
       {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
       {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
       {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index e58b9c71a757821..d0a3dc04985c2df 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -60,6 +60,8 @@ void clang_analyzer_isTainted_char(char);
 void clang_analyzer_isTainted_charp(char*);
 void clang_analyzer_isTainted_int(int);
 
+int coin();
+
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
 char *gets_s(char *str, rsize_t n);
@@ -105,6 +107,41 @@ void *malloc(size_t);
 void *calloc(size_t nmemb, size_t size);
 void bcopy(void *s1, void *s2, size_t n);
 
+
+//    function | pathname | filename | fd | arglist | argv[] | envp[]
+//    ===============================================================
+// 1  execl    |     X    |          |    |    X    |        |
+// 2  execle   |     X    |          |    |    X    |        |   X
+// 3  execlp   |          |     X    |    |    X    |        |
+// 4  execv    |     X    |          |    |         |    X   |
+// 5  execve   |     X    |          |    |         |    X   |   X
+// 6  execvp   |          |     X    |    |         |    X   |
+// 7  execvpe  |          |     X    |    |         |    X   |   X
+// 8  fexecve  |          |          |  X |         |    X   |   X
+//    ===============================================================
+//    letter   |          |     p    |  f |    l    |    v   |   e
+//
+// legend:
+//  - pathname: rel/abs path to the binary
+//  - filename: file name searched in PATH to execute the binary
+//  - fd:       accepts a file descriptor
+//  - arglist:  accepts variadic arguments
+//  - argv:     accepts a pointer to array, denoting the new argv
+//  - envp:     accepts a pointer to array, denoting the new envp
+
+int execl(const char *path, const char *arg, ...);
+int execle(const char *path, const char *arg, ...);
+int execlp(const char *file, const char *arg, ...);
+int execv(const char *path, char *const argv[]);
+int execve(const char *path, char *const argv[], char *const envp[]);
+int execvp(const char *file, char *const argv[]);
+int execvpe(const char *file, char *const argv[], char *const envp[]);
+int fexecve(int fd, char *const argv[], char *const envp[]);
+FILE *popen(const char *command, const char *type);
+int pclose(FILE *stream);
+int system(const char *command);
+
+
 typedef size_t socklen_t;
 
 struct sockaddr {
@@ -211,7 +248,6 @@ void testUncontrolledFormatString(char **p) {
 
 }
 
-int system(const char *command);
 void testTaintSystemCall(void) {
   char buffer[156];
   char addr[128];
@@ -274,7 +310,6 @@ void testTaintedBufferSize(void) {
 #define SOCK_STREAM 1
 int socket(int, int, int);
 size_t read(int, void *, size_t);
-int  execl(const char *, const char *, ...);
 
 void testSocket(void) {
   int sock;
@@ -1091,6 +1126,128 @@ void testConfigurationSinks(void) {
   // expected-warning at -1 {{Untrusted data is passed to a user-defined sink}}
 }
 
+int test_exec_like_functions() {
+  char buf[100] = {0};
+  scanf("%99s", buf);
+  clang_analyzer_isTainted_char(buf[0]); // expected-warning {{YES}}
+
+  char *cleanArray[] = {"ENV1=V1", "ENV2=V2", NULL};
+  char *taintedArray[] = {buf, "ENV2=V2", NULL};
+  clang_analyzer_isTainted_char(taintedArray[0][0]);      // expected-warning {{YES}}
+  clang_analyzer_isTainted_char(*(char*)taintedArray[0]); // expected-warning {{YES}}
+  clang_analyzer_isTainted_char(*(char*)taintedArray);    // expected-warning {{NO}}  We should have YES here.
+  // FIXME: Above the triple pointer indirection will confuse the checker,
+  // as we only check two levels. The results would be worse, if the tainted
+  // subobject ("buf") would not be at the beginning of the enclosing object,
+  // for the same reason.
+
+  switch (coin()) {
+    default: break;
+    // Execute `path` with all arguments after `path` until a NULL pointer
+    // and environment from `environ'.
+    case 0: return execl("path", "arg0", "arg1", "arg2", NULL); // no-warning
+    case 1: return execl(buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execl("path", buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 3: return execl("path", "arg0", buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 4: return execl("path", "arg0", "arg1", buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `path` with all arguments after `PATH` until a NULL pointer,
+    // and the argument after that for environment.
+    case 0: return execle("path", "arg0", "arg1", NULL,   cleanArray); // no-warning
+    case 1: return execle(   buf, "arg0", "arg1", NULL,   cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execle("path",    buf, "arg1", NULL,   cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 3: return execle("path", "arg0",    buf, NULL,   cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 4: return execle("path", "arg0", "arg1", NULL,          buf); // expected-warning {{Untrusted data is passed to a system call}}
+    case 5: return execle("path", "arg0", "arg1", NULL, taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `file`, searching in the `PATH' environment variable if it
+    // contains no slashes, with all arguments after `file` until a NULL
+    // pointer and environment from `environ'.
+    case 0: return execlp("file", "arg0", "arg1", "arg2", NULL); // no-warning
+    case 1: return execlp(   buf, "arg0", "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execlp("file",    buf, "arg1", "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 3: return execlp("file", "arg0",    buf, "arg2", NULL); // expected-warning {{Untrusted data is passed to a system call}}
+    case 4: return execlp("file", "arg0", "arg1",    buf, NULL); // expected-warning {{Untrusted data is passed to a system call}}
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `path` with arguments `ARGV` and environment from `environ'.
+    case 0: return execv("path", /*argv=*/  cleanArray); // no-warning
+    case 1: return execv(   buf, /*argv=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execv("path", /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Replace the current process, executing `path` with arguments `ARGV`
+    // and environment `ENVP`. `ARGV` and `ENVP` are terminated by NULL pointers.
+    case 0: return execve("path", /*argv=*/  cleanArray, /*envp=*/cleanArray); // no-warning
+    case 1: return execve(   buf, /*argv=*/  cleanArray, /*envp=*/cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execve("path", /*argv=*/taintedArray, /*envp=*/cleanArray); // FIXME: We might wanna have a report here.
+    case 3: return execve("path", /*argv=*/cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute `file`, searching in the `PATH' environment variable if it
+    // contains no slashes, with arguments `ARGV` and environment from `environ'.
+    case 0: return execvp("file", /*argv=*/  cleanArray); // no-warning
+    case 1: return execvp(   buf, /*argv=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execvp("file", /*argv=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  // execvpe
+  switch (coin()) {
+    default: break;
+    // Execute `file`, searching in the `PATH' environment variable if it
+    // contains no slashes, with arguments `ARGV` and environment `ENVP`.
+    // `ARGV` and `ENVP` are terminated by NULL pointers.
+    case 0: return execvpe("file", /*argv=*/  cleanArray, /*envp=*/  cleanArray); // no-warning
+    case 1: return execvpe(   buf, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return execvpe("file", /*argv=*/taintedArray, /*envp=*/  cleanArray); // FIXME: We might wanna have a report here.
+    case 3: return execvpe("file", /*argv=*/  cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  int cleanFD = coin();
+  int taintedFD;
+  scanf("%d", &taintedFD);
+  clang_analyzer_isTainted_int(taintedFD); // expected-warning {{YES}}
+
+  switch (coin()) {
+    default: break;
+    // Execute the file `FD` refers to, overlaying the running program image.
+    // `ARGV` and `ENVP` are passed to the new program, as for `execve'.
+    case 0: return fexecve(  cleanFD, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // no-warning
+    case 1: return fexecve(taintedFD, /*argv=*/  cleanArray, /*envp=*/  cleanArray); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return fexecve(  cleanFD, /*argv=*/taintedArray, /*envp=*/  cleanArray); // FIXME: We might wanna have a report here.
+    case 3: return fexecve(  cleanFD, /*argv=*/  cleanArray, /*envp=*/taintedArray); // FIXME: We might wanna have a report here.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Create a new stream connected to a pipe running the given `command`.
+    case 0: return pclose(popen("command", /*mode=*/"r")); // no-warning
+    case 1: return pclose(popen(      buf, /*mode=*/"r")); // expected-warning {{Untrusted data is passed to a system call}}
+    case 2: return pclose(popen("command", /*mode=*/buf)); // 'mode' is not a taint sink.
+  }
+
+  switch (coin()) {
+    default: break;
+    // Execute the given line as a shell command.
+    case 0: return system("command"); // no-warning
+    case 1: return system(      buf); // expected-warning {{Untrusted data is passed to a system call}}
+  }
+
+  return 0;
+}
+
 void testUnknownFunction(void (*foo)(void)) {
   foo(); // no-crash
 }

>From 63b6a37624935ae0bb52b830e15b015998559e06 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:06:30 +0200
Subject: [PATCH 4/4] [analyzer] Propagate taint for wchar variants of some
 APIs

Functions like `fgets`, `strlen` `strcat` propagates taint.
However, their `wchar_t` variants don't. This patch fixes that.

Notice, that there could be many more APIs missing.
The intention of this patch is to fix those that so far surfaced,
instead of exhaustively fixing this issue.
---
 .../Checkers/GenericTaintChecker.cpp          |  4 ++
 clang/test/Analysis/taint-generic.c           | 38 ++++++++++++++++---
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 63993bd4edc7f95..e6f0b7354b168bf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -630,6 +630,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
+      {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
       {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
       {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
       {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
@@ -697,6 +698,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+      {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
       {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
@@ -733,6 +735,8 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strcat"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {{"wcsncat"}}},
+       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdupa"}}},
        TR::Prop({{0}}, {{ReturnValueIndex}})},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index d0a3dc04985c2df..f3c56b6ff930034 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -56,7 +56,9 @@
 // CHECK-INVALID-ARG-SAME:        rules greater or equal to -1
 
 typedef long long rsize_t;
+typedef __WCHAR_TYPE__ wchar_t;
 void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_wchar(wchar_t);
 void clang_analyzer_isTainted_charp(char*);
 void clang_analyzer_isTainted_int(int);
 
@@ -77,6 +79,17 @@ extern FILE *stdin;
 #define bool _Bool
 #define NULL (void*)0
 
+wchar_t *fgetws(wchar_t *ws, int n, FILE *stream);
+wchar_t *wmemset(wchar_t *wcs, wchar_t wc, unsigned long n);
+wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wmemmove(wchar_t *dest, const wchar_t *src, size_t n);
+size_t wcslen(const wchar_t *s);
+wchar_t *wcscpy(wchar_t * dest, const wchar_t * src);
+wchar_t *wcsncpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wcscat(wchar_t *dest, const wchar_t *src);
+wchar_t *wcsncat(wchar_t *dest,const wchar_t *src, size_t n);
+int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...);
+
 char *getenv(const char *name);
 
 FILE *fopen(const char *name, const char *mode);
@@ -465,6 +478,24 @@ int testSprintf_propagates_taint(char *buf, char *msg) {
   return 1 / x;                    // expected-warning {{Division by a tainted value, possibly zero}}
 }
 
+void test_wchar_apis_propagates(const char *path) {
+  FILE *f = fopen(path, "r");
+  clang_analyzer_isTainted_charp((char*)f);  // expected-warning {{YES}}
+  wchar_t wbuf[10];
+  fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f);
+  clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}}
+  int n = wcslen(wbuf);
+  clang_analyzer_isTainted_int(n); // expected-warning {{YES}}
+
+  wchar_t dst[100] = L"ABC";
+  clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}}
+  wcsncat(dst, wbuf, sizeof(wbuf)/sizeof(*wbuf));
+  clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}}
+
+  int m = wcslen(dst);
+  clang_analyzer_isTainted_int(m); // expected-warning {{YES}}
+}
+
 int scanf_s(const char *format, ...);
 int testScanf_s_(int *out) {
   scanf_s("%d", out);
@@ -679,7 +710,6 @@ void testRawmemchr(int c) {
   clang_analyzer_isTainted_charp(result); // expected-warning {{YES}}
 }
 
-typedef char wchar_t;
 int mbtowc(wchar_t *pwc, const char *s, size_t n);
 void testMbtowc(wchar_t *pwc, size_t n) {
   char buf[10];
@@ -692,8 +722,7 @@ void testMbtowc(wchar_t *pwc, size_t n) {
 
 int wctomb(char *s, wchar_t wc);
 void testWctomb(char *buf) {
-  wchar_t wc;
-  scanf("%c", &wc);
+  wchar_t wc = getchar();
 
   int result = wctomb(buf, wc);
   clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}}
@@ -702,8 +731,7 @@ void testWctomb(char *buf) {
 
 int wcwidth(wchar_t c);
 void testWcwidth() {
-  wchar_t wc;
-  scanf("%c", &wc);
+  wchar_t wc = getchar();
 
   int width = wcwidth(wc);
   clang_analyzer_isTainted_int(width); // expected-warning {{YES}}



More information about the cfe-commits mailing list