[clang] 61924da - [analyzer] Propagate taint for wchar variants of some APIs (#66074)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 14 02:55:52 PDT 2023


Author: Balazs Benics
Date: 2023-09-14T11:55:10+02:00
New Revision: 61924da630532c91f00351b7e84548eb42e2e1e0

URL: https://github.com/llvm/llvm-project/commit/61924da630532c91f00351b7e84548eb42e2e1e0
DIFF: https://github.com/llvm/llvm-project/commit/61924da630532c91f00351b7e84548eb42e2e1e0.diff

LOG: [analyzer] Propagate taint for wchar variants of some APIs (#66074)

Functions like `fgets`, `strlen`, `strcat` propagate taint.
However, their `wchar_t` variants don't. This patch fixes that.

Notice, that there could be many more APIs missing.
This patch intends to fix those that so far surfaced,
instead of exhaustively fixing this issue.

https://github.com/llvm/llvm-project/pull/66074

Added: 
    

Modified: 
    clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
    clang/test/Analysis/taint-generic.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 54c3f6dcdddaf59..5da0f34b3d0464f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -628,6 +628,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
+      {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
       {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
       {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
       {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
@@ -695,6 +696,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+      {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
       {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
@@ -731,6 +733,8 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strcat"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {{"wcsncat"}}},
+       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdupa"}}},
        TR::Prop({{0}}, {{ReturnValueIndex}})},

diff  --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index e58b9c71a757821..f08186fc8c4c1e5 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -56,7 +56,10 @@
 // CHECK-INVALID-ARG-SAME:        rules greater or equal to -1
 
 typedef long long rsize_t;
+typedef typeof(sizeof(int)) size_t;
+typedef __WCHAR_TYPE__ wchar_t;
 void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_wchar(wchar_t);
 void clang_analyzer_isTainted_charp(char*);
 void clang_analyzer_isTainted_int(int);
 
@@ -75,6 +78,17 @@ extern FILE *stdin;
 #define bool _Bool
 #define NULL (void*)0
 
+wchar_t *fgetws(wchar_t *ws, int n, FILE *stream);
+wchar_t *wmemset(wchar_t *wcs, wchar_t wc, unsigned long n);
+wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wmemmove(wchar_t *dest, const wchar_t *src, size_t n);
+size_t wcslen(const wchar_t *s);
+wchar_t *wcscpy(wchar_t * dest, const wchar_t * src);
+wchar_t *wcsncpy(wchar_t *dest, const wchar_t *src, size_t n);
+wchar_t *wcscat(wchar_t *dest, const wchar_t *src);
+wchar_t *wcsncat(wchar_t *dest,const wchar_t *src, size_t n);
+int swprintf(wchar_t *wcs, size_t maxlen, const wchar_t *format, ...);
+
 char *getenv(const char *name);
 
 FILE *fopen(const char *name, const char *mode);
@@ -430,6 +444,24 @@ int testSprintf_propagates_taint(char *buf, char *msg) {
   return 1 / x;                    // expected-warning {{Division by a tainted value, possibly zero}}
 }
 
+void test_wchar_apis_propagate(const char *path) {
+  FILE *f = fopen(path, "r");
+  clang_analyzer_isTainted_charp((char*)f);  // expected-warning {{YES}}
+  wchar_t wbuf[10];
+  fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f);
+  clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}}
+  int n = wcslen(wbuf);
+  clang_analyzer_isTainted_int(n); // expected-warning {{YES}}
+
+  wchar_t dst[100] = L"ABC";
+  clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}}
+  wcsncat(dst, wbuf, sizeof(wbuf)/sizeof(*wbuf));
+  clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}}
+
+  int m = wcslen(dst);
+  clang_analyzer_isTainted_int(m); // expected-warning {{YES}}
+}
+
 int scanf_s(const char *format, ...);
 int testScanf_s_(int *out) {
   scanf_s("%d", out);
@@ -644,7 +676,6 @@ void testRawmemchr(int c) {
   clang_analyzer_isTainted_charp(result); // expected-warning {{YES}}
 }
 
-typedef char wchar_t;
 int mbtowc(wchar_t *pwc, const char *s, size_t n);
 void testMbtowc(wchar_t *pwc, size_t n) {
   char buf[10];
@@ -657,8 +688,7 @@ void testMbtowc(wchar_t *pwc, size_t n) {
 
 int wctomb(char *s, wchar_t wc);
 void testWctomb(char *buf) {
-  wchar_t wc;
-  scanf("%c", &wc);
+  wchar_t wc = getchar();
 
   int result = wctomb(buf, wc);
   clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}}
@@ -667,8 +697,7 @@ void testWctomb(char *buf) {
 
 int wcwidth(wchar_t c);
 void testWcwidth() {
-  wchar_t wc;
-  scanf("%c", &wc);
+  wchar_t wc = getchar();
 
   int width = wcwidth(wc);
   clang_analyzer_isTainted_int(width); // expected-warning {{YES}}


        


More information about the cfe-commits mailing list