[clang] [analyzer] First batch of patches for the Juliet benchmark for taint improvements (PR #66074)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 12 07:12:22 PDT 2023


https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/66074:

>From 13677c9acfbadd82d9e008339a65d86adc87e1ff Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:00:33 +0200
Subject: [PATCH 1/3] [analyzer] Fix stdin declaration in C++ tests

The `stdin` declaration should be within `extern "C" {...}`, in C++
mode. In addition, it should be also marked `extern` in both C and
C++ modes.

I tightened the check to ensure we only accept `stdin` if both of these
match. However, from the Juliet test suite's perspective, this commit
should not matter.
---
 .../StaticAnalyzer/Checkers/GenericTaintChecker.cpp  |  3 +--
 clang/test/Analysis/taint-diagnostic-visitor.c       |  2 +-
 clang/test/Analysis/taint-generic.cpp                | 12 ++++++++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 3dcb45c0b110383..8138c8411fb2613 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -104,8 +104,7 @@ bool isStdin(SVal Val, const ASTContext &ACtx) {
   // variable named stdin with the proper type.
   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
     D = D->getCanonicalDecl();
-    // FIXME: This should look for an exact match.
-    if (D->getName().contains("stdin") && D->isExternC()) {
+    if (D->getName() == "stdin" && D->hasExternalStorage() && D->isExternC()) {
       const QualType FILETy = ACtx.getFILEType().getCanonicalType();
       const QualType Ty = D->getType().getCanonicalType();
 
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 663836836d3db67..f1b9ceebdd9a6b8 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -13,7 +13,7 @@ size_t strlen( const char* str );
 void *malloc(size_t size );
 void free( void *ptr );
 char *fgets(char *str, int n, FILE *stream);
-FILE *stdin;
+extern FILE *stdin;
 
 void taintDiagnostic(void)
 {
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 09cd54471948e1a..c907c8f5eeb958b 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -7,6 +7,12 @@ int scanf(const char*, ...);
 int mySource1();
 int mySource3();
 
+typedef struct _FILE FILE;
+extern "C" {
+extern FILE *stdin;
+}
+int fscanf(FILE *stream, const char *format, ...);
+
 bool isOutOfRange2(const int*);
 
 void mySink2(int);
@@ -124,3 +130,9 @@ void testConfigurationMemberFunc() {
   foo.myMemberScanf("%d", &x);
   Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
 }
+
+void testReadingFromStdin(char **p) {
+  int n;
+  fscanf(stdin, "%d", &n);
+  Buffer[n] = 1; // expected-warning {{Out of bound memory access (index is tainted)}}
+}

>From 4d77846aebd008392918d311e86cd72ead833854 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:01:45 +0200
Subject: [PATCH 2/3] [analyzer] Make socket `accept()` propagate taint

This allows to track taint on real code from `socket()`
to reading into a buffer using `recv()`.
---
 .../StaticAnalyzer/Checkers/GenericTaintChecker.cpp   |  1 +
 clang/test/Analysis/taint-generic.c                   | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 8138c8411fb2613..54c3f6dcdddaf59 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -621,6 +621,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"getlogin_r"}}, TR::Source({{0}})},
 
       // Props
+      {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index b7906d201e4fad3..e58b9c71a757821 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -544,6 +544,10 @@ void testFread(const char *fname, int *buffer, size_t size, size_t count) {
 }
 
 ssize_t recv(int sockfd, void *buf, size_t len, int flags);
+int accept(int fd, struct sockaddr *addr, socklen_t *addrlen);
+int bind(int fd, const struct sockaddr *addr, socklen_t addrlen);
+int listen(int fd, int backlog);
+
 void testRecv(int *buf, size_t len, int flags) {
   int fd;
   scanf("%d", &fd); // fake a tainted a file descriptor
@@ -1107,3 +1111,10 @@ void testProctitle2(char *real_argv[]) {
   setproctitle_init(1, argv, 0);         // expected-warning {{Untrusted data is passed to a user-defined sink}}
   setproctitle_init(1, real_argv, argv); // expected-warning {{Untrusted data is passed to a user-defined sink}}
 }
+
+void testAcceptPropagates() {
+  int listenSocket = socket(2, 1, 6);
+  clang_analyzer_isTainted_int(listenSocket); // expected-warning {{YES}}
+  int acceptSocket = accept(listenSocket, 0, 0);
+  clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
+}

>From 8b80d806dad75026eadc406fc47e4f21b57e1275 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Tue, 12 Sep 2023 14:06:30 +0200
Subject: [PATCH 3/3] [analyzer] Propagate taint for wchar variants of some
 APIs

Functions like `fgets`, `strlen`, `strcat` propagate taint.
However, their `wchar_t` variants don't. This patch fixes that.

Notice, that there could be many more APIs missing.
This patch intends to fix those that so far surfaced,
instead of exhaustively fixing this issue.
---
 .../Checkers/GenericTaintChecker.cpp          |  4 ++
 clang/test/Analysis/taint-generic.c           | 39 ++++++++++++++++---
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 54c3f6dcdddaf59..5da0f34b3d0464f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -628,6 +628,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
+      {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
       {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
       {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
       {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
@@ -695,6 +696,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+      {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
       {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
@@ -731,6 +733,8 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strcat"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {{"wcsncat"}}},
+       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdupa"}}},
        TR::Prop({{0}}, {{ReturnValueIndex}})},
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index e58b9c71a757821..f08186fc8c4c1e5 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -56,7 +56,10 @@
 // CHECK-INVALID-ARG-SAME:        rules greater or equal to -1
 
 typedef long long rsize_t;
+typedef typeof(sizeof(int)) size_t;
+typedef __WCHAR_TYPE__ wchar_t;
 void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_wchar(wchar_t);
 void clang_analyzer_isTainted_charp(char*);
 void clang_analyzer_isTainted_int(int);
 
@@ -75,6 +78,17 @@ extern FILE *stdin;
 #define bool _Bool
 #define NULL (void*)0
 
+wchar_t *fgetws(wchar_t *ws, int n, FILE *stream);
+wchar_t *wmemset(wchar_t *wcs, wchar_t wc, unsigned long n);
+wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wmemmove(wchar_t *dest, const wchar_t *src, size_t n);
+size_t wcslen(const wchar_t *s);
+wchar_t *wcscpy(wchar_t * dest, const wchar_t * src);
+wchar_t *wcsncpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wcscat(wchar_t *dest, const wchar_t *src);
+wchar_t *wcsncat(wchar_t *dest,const wchar_t *src, size_t n);
+int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...);
+
 char *getenv(const char *name);
 
 FILE *fopen(const char *name, const char *mode);
@@ -430,6 +444,24 @@ int testSprintf_propagates_taint(char *buf, char *msg) {
   return 1 / x;                    // expected-warning {{Division by a tainted value, possibly zero}}
 }
 
+void test_wchar_apis_propagate(const char *path) {
+  FILE *f = fopen(path, "r");
+  clang_analyzer_isTainted_charp((char*)f);  // expected-warning {{YES}}
+  wchar_t wbuf[10];
+  fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f);
+  clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}}
+  int n = wcslen(wbuf);
+  clang_analyzer_isTainted_int(n); // expected-warning {{YES}}
+
+  wchar_t dst[100] = L"ABC";
+  clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}}
+  wcsncat(dst, wbuf, sizeof(wbuf)/sizeof(*wbuf));
+  clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}}
+
+  int m = wcslen(dst);
+  clang_analyzer_isTainted_int(m); // expected-warning {{YES}}
+}
+
 int scanf_s(const char *format, ...);
 int testScanf_s_(int *out) {
   scanf_s("%d", out);
@@ -644,7 +676,6 @@ void testRawmemchr(int c) {
   clang_analyzer_isTainted_charp(result); // expected-warning {{YES}}
 }
 
-typedef char wchar_t;
 int mbtowc(wchar_t *pwc, const char *s, size_t n);
 void testMbtowc(wchar_t *pwc, size_t n) {
   char buf[10];
@@ -657,8 +688,7 @@ void testMbtowc(wchar_t *pwc, size_t n) {
 
 int wctomb(char *s, wchar_t wc);
 void testWctomb(char *buf) {
-  wchar_t wc;
-  scanf("%c", &wc);
+  wchar_t wc = getchar();
 
   int result = wctomb(buf, wc);
   clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}}
@@ -667,8 +697,7 @@ void testWctomb(char *buf) {
 
 int wcwidth(wchar_t c);
 void testWcwidth() {
-  wchar_t wc;
-  scanf("%c", &wc);
+  wchar_t wc = getchar();
 
   int width = wcwidth(wc);
   clang_analyzer_isTainted_int(width); // expected-warning {{YES}}



More information about the cfe-commits mailing list