[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

Daniel Krupp via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 10 02:45:22 PDT 2023


https://github.com/dkrupp updated https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp <daniel.krupp at ericsson.com>
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/3] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer constraints.
---
 .../Checkers/GenericTaintChecker.cpp          | 49 +++++--------
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +++++++++----------
 clang/test/Analysis/taint-generic.c           | 26 ++++---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc84..b949cac504eddfe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
     "Untrusted data is passed to a system call "
     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-    "Untrusted data is used to specify the buffer size "
-    "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-    "for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
     "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{CDF_MaybeBuiltin, {{"stpcpy"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strcat"}}},
-       TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+       TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"wcsncat"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdupa"}}},
        TR::Prop({{0}}, {{ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+       TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+       TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+       TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+       TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+      {{CDF_MaybeBuiltin, {"bcopy"}},
+       TR::Prop({{0, 2}}, {{1}})},
 
       // Sinks
       {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
       {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
       {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-      {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {{"memccpy"}}},
-       TR::Sink({{3}}, MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {{"realloc"}}},
-       TR::Sink({{1}}, MsgTaintedBufferSize)},
+       // malloc, calloc, alloca, realloc, memccpy
+       // are intentionally left out as taint sinks
+       // because unconditional reporting for these functions
+       // generate many false positives.
+       // These taint sinks should be implemented in other checkers
+       // with more sophisticated sanitation heuristics.
       {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
       {{{{"setproctitle_fast"}}},
        TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-      // SinkProps
-      {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-       TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-                    MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-       TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-                    MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
-       TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-                    MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
-       TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
-                    MsgTaintedBufferSize)},
-      {{CDF_MaybeBuiltin, {{"bcopy"}}},
-       TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
+       };
 
   // `getenv` returns taint only in untrusted environments.
   if (TR::UntrustedEnv(C)) {
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e444..e3370e64dab319f 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -10,7 +10,8 @@ int scanf(const char *restrict format, ...);
 int system(const char *command);
 char* getenv( const char* env_var );
 size_t strlen( const char* str );
-int atoi( const char* str );
+char *strcat( char *dest, const char *src );
+char* strcpy( char* dest, const char* src );
 void *malloc(size_t size );
 void free( void *ptr );
 char *fgets(char *str, int n, FILE *stream);
@@ -53,34 +54,32 @@ void taintDiagnosticVLA(void) {
 
 // Tests if the originated note is correctly placed even if the path is
 // propagating through variables and expressions
-char *taintDiagnosticPropagation(){
-  char *pathbuf;
-  char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
-                                 // expected-note at -1 {{Taint propagated to the return value}}
-  if (size){ // expected-note {{Assuming 'size' is non-null}}
-	               // expected-note at -1 {{Taking true branch}}
-    pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}}
-                                                // expected-note at -1{{Untrusted data is used to specify the buffer size}}
-                                                // expected-note at -2 {{Taint propagated to the return value}}
-    return pathbuf;
+int taintDiagnosticPropagation(){
+  int res;
+  char *cmd=getenv("CMD"); // expected-note {{Taint originated here}}
+                           // expected-note at -1 {{Taint propagated to the return value}}
+  if (cmd){ // expected-note {{Assuming 'cmd' is non-null}}
+	          // expected-note at -1 {{Taking true branch}}
+    res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}}
+                       // expected-note at -1{{Untrusted data is passed to a system call}}
+    return res;
   }
-  return 0;
+  return -1;
 }
 
 // Taint origin should be marked correctly even if there are multiple taint
 // sources in the function
-char *taintDiagnosticPropagation2(){
-  char *pathbuf;
+int taintDiagnosticPropagation2(){
+  int res;
   char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
-  char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
-                                 // expected-note at -1 {{Taint propagated to the return value}}
+  char *cmd=getenv("CMD"); // expected-note {{Taint originated here}}
+                           // expected-note at -1 {{Taint propagated to the return value}}
   char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
-  if (size){ // expected-note {{Assuming 'size' is non-null}}
-	               // expected-note at -1 {{Taking true branch}}
-    pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
-                                                // expected-note at -1{{Untrusted data is used to specify the buffer size}}
-                                                // expected-note at -2 {{Taint propagated to the return value}}
-    return pathbuf;
+  if (cmd){ // expected-note {{Assuming 'cmd' is non-null}}
+	          // expected-note at -1 {{Taking true branch}}
+    res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}}
+                       // expected-note at -1{{Untrusted data is passed to a system call}}
+    return res;
   }
   return 0;
 }
@@ -95,22 +94,23 @@ void testReadStdIn(){
 }
 
 void multipleTaintSources(void) {
-  int x,y,z;
-  scanf("%d", &x); // expected-note {{Taint originated here}}
+  char cmd[2048],file[1024];
+  scanf ("%1022[^\n] ", cmd); // expected-note {{Taint originated here}}
                    // expected-note at -1 {{Taint propagated to the 2nd argument}}
-  scanf("%d", &y); // expected-note {{Taint originated here}}
+  scanf ("%1023[^\n]", file); // expected-note {{Taint originated here}}
                    // expected-note at -1 {{Taint propagated to the 2nd argument}}
-  scanf("%d", &z);
-  int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}}
-                                   // expected-note at -1{{Untrusted data is used to specify the buffer size}}
-  free (ptr);
+  strcat(cmd, file);// expected-note {{Taint propagated to the 1st argument}}
+  system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
+               // expected-note at -1{{Untrusted data is passed to a system call}}
 }
 
 void multipleTaintedArgs(void) {
-  int x,y;
-  scanf("%d %d", &x, &y); // expected-note {{Taint originated here}}
+  char cmd[1024],file[1024], buf[2048];
+  scanf("%1022s %1023s", cmd, file); // expected-note {{Taint originated here}}
                           // expected-note at -1 {{Taint propagated to the 2nd argument, 3rd argument}}
-  int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}}
-                                   // expected-note at -1{{Untrusted data is used to specify the buffer size}}
-  free (ptr);
+  strcpy(buf, cmd);// expected-note {{Taint propagated to the 1st argument}}
+  strcat(buf," ");// expected-note {{Taint propagated to the 1st argument}}
+  strcat(buf, file);// expected-note {{Taint propagated to the 1st argument}}
+  system(buf); // expected-warning {{Untrusted data is passed to a system call}}
+               // expected-note at -1{{Untrusted data is passed to a system call}}
 }
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb7..78305bfbb9d35f0 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -305,15 +305,19 @@ void testGets_s(void) {
 
 void testTaintedBufferSize(void) {
   size_t ts;
+  // malloc, calloc, bcopy, memcpy functions are removed as unconditional sinks
+  // from the GenericTaintChecker's default configuration,
+  // because it generated too many false positives.
+  // We would need more sophisticated handling of these reports to enable
+  // these test-cases again.
   scanf("%zd", &ts);
-
-  int *buf1 = (int*)malloc(ts*sizeof(int)); // expected-warning {{Untrusted data is used to specify the buffer size}}
-  char *dst = (char*)calloc(ts, sizeof(char)); //expected-warning {{Untrusted data is used to specify the buffer size}}
-  bcopy(buf1, dst, ts); // expected-warning {{Untrusted data is used to specify the buffer size}}
-  __builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}}
+  int *buf1 = (int*)malloc(ts*sizeof(int)); // warn here, ts is unbounded and tainted
+  char *dst = (char*)calloc(ts, sizeof(char)); // warn here, ts is unbounded tainted
+  bcopy(buf1, dst, ts); // no warning here, since the size of buf1, dst equals ts. Cannot overflow.
+  __builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // warn here, dst overflows (whatever the value of ts)
 
   // If both buffers are trusted, do not issue a warning.
-  char *dst2 = (char*)malloc(ts*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}}
+  char *dst2 = (char*)malloc(ts*sizeof(char)); // warn here, ts in unbounded
   strncat(dst2, dst, ts); // no-warning
 }
 
@@ -353,7 +357,7 @@ void testStruct(void) {
 
   sock = socket(AF_INET, SOCK_STREAM, 0);
   read(sock, &tainted, sizeof(tainted));
-  __builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+  clang_analyzer_isTainted_int(tainted.length); // expected-warning {{YES }}
 }
 
 void testStructArray(void) {
@@ -368,17 +372,17 @@ void testStructArray(void) {
   __builtin_memset(srcbuf, 0, sizeof(srcbuf));
 
   read(sock, &tainted[0], sizeof(tainted));
-  __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+  clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}}
 
   __builtin_memset(&tainted, 0, sizeof(tainted));
   read(sock, &tainted, sizeof(tainted));
-  __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+  clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}}
 
   __builtin_memset(&tainted, 0, sizeof(tainted));
   // If we taint element 1, we should not raise an alert on taint for element 0 or element 2
   read(sock, &tainted[1], sizeof(tainted));
-  __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning
-  __builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning
+  clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{NO}}
+  clang_analyzer_isTainted_int(tainted[2].length); // expected-warning {{NO}}
 }
 
 void testUnion(void) {

>From 88f226f60649dc43553f55ced123fb3f5ae5a39b Mon Sep 17 00:00:00 2001
From: Daniel Krupp <daniel.krupp at ericsson.com>
Date: Tue, 10 Oct 2023 11:38:18 +0200
Subject: [PATCH 2/3] fixup!

---
 clang/docs/analyzer/checkers.rst                      | 11 +----------
 .../StaticAnalyzer/Checkers/GenericTaintChecker.cpp   | 11 +++++------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dbd6d7787823530..f8e465302c3d8b0 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2572,13 +2572,6 @@ Further examples of injection vulnerabilities this checker can find.
     sprintf(buf, s); // warn: untrusted data used as a format string
   }
 
-  void test() {
-    size_t ts;
-    scanf("%zd", &ts); // 'ts' marked as tainted
-    int *p = (int *)malloc(ts * sizeof(int));
-      // warn: untrusted data used as buffer size
-  }
-
 There are built-in sources, propagations and sinks even if no external taint
 configuration is provided.
 
@@ -2606,9 +2599,7 @@ Default propagations rules:
 
 Default sinks:
  ``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``,
- ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``,
- ``memcpy``, ``memmove``, ``strncpy``, ``strndup``, ``malloc``, ``calloc``,
- ``alloca``, ``memccpy``, ``realloc``, ``bcopy``
+ ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``
 
 Please note that there are no built-in filter functions.
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index b949cac504eddfe..f21828781503382 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -726,7 +726,7 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{CDF_MaybeBuiltin, {{"stpcpy"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strcat"}}},
-       TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
+       TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"wcsncat"}}},
        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
       {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
@@ -757,11 +757,10 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
       {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
       {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
        // malloc, calloc, alloca, realloc, memccpy
-       // are intentionally left out as taint sinks
-       // because unconditional reporting for these functions
-       // generate many false positives.
-       // These taint sinks should be implemented in other checkers
-       // with more sophisticated sanitation heuristics.
+       // are intentionally not marked as taint sinks because unconditional
+       // reporting for these functions generates many false positives.
+       // These taint sinks should be implemented in other checkers with more
+       // sophisticated sanitation heuristics.
       {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
       {{{{"setproctitle_fast"}}},
        TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},

>From 584187cd8a7a9f2076afaa9701ae265360dc2dce Mon Sep 17 00:00:00 2001
From: Daniel Krupp <daniel.krupp at ericsson.com>
Date: Tue, 10 Oct 2023 11:44:12 +0200
Subject: [PATCH 3/3] fixup!

---
 clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index f21828781503382..f41ae08701fcf92 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -291,14 +291,6 @@ class GenericTaintRule {
     return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
   }
 
-  /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
-  static GenericTaintRule
-  SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
-           std::optional<StringRef> Msg = std::nullopt) {
-    return {
-        std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
-  }
-
   /// Process a function which could either be a taint source, a taint sink, a
   /// taint filter or a taint propagator.
   void process(const GenericTaintChecker &Checker, const CallEvent &Call,



More information about the cfe-commits mailing list