[clang] [clang][analyzer] StreamChecker: Add more APIs, invalidate fscanf args (PR #82476)

Alejandro Álvarez Ayllón via cfe-commits cfe-commits at lists.llvm.org
Mon Feb 26 05:01:54 PST 2024


https://github.com/alejandro-alvarez-sonarsource updated https://github.com/llvm/llvm-project/pull/82476

>From a21881d82fe3674b344d4a3807e9d2590c98ce93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20=C3=81lvarez=20Ayll=C3=B3n?=
 <alejandro.alvarez at sonarsource.com>
Date: Tue, 14 Nov 2023 09:28:45 +0100
Subject: [PATCH 1/4] [clang][analyzer] StreamChecker: add more APIs,
 invalidate fscanf args

1. Model getc, vfscanf, putc, vfprintf.
2. fscanf invalidates all arguments after the format string.
---
 .../StaticAnalyzer/Checkers/StreamChecker.cpp |  39 +++++-
 ...ystem-header-simulator-for-simple-stream.h |   2 +-
 .../system-header-simulator-for-valist.h      |   4 +
 .../Analysis/Inputs/system-header-simulator.h |   3 +
 clang/test/Analysis/stream.c                  | 128 ++++++++++++++++++
 5 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index a070f451694a3b..7938a0d30a91a3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -21,6 +21,8 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
 #include <functional>
 #include <optional>
 
@@ -171,7 +173,7 @@ using FnCheck = std::function<void(const StreamChecker *, const FnDescription *,
                                    const CallEvent &, CheckerContext &)>;
 
 using ArgNoTy = unsigned int;
-static const ArgNoTy ArgNone = std::numeric_limits<ArgNoTy>::max();
+const ArgNoTy ArgNone = std::numeric_limits<ArgNoTy>::max();
 
 struct FnDescription {
   FnCheck PreFn;
@@ -179,6 +181,26 @@ struct FnDescription {
   ArgNoTy StreamArgNo;
 };
 
+[[nodiscard]] ProgramStateRef
+escapeArgsAfterIndex(ProgramStateRef State, CheckerContext &C,
+                     const CallEvent &Call, unsigned FirstEscapingArgIndex) {
+  const auto *CE = Call.getOriginExpr();
+  assert(CE);
+
+  if (Call.getNumArgs() <= FirstEscapingArgIndex)
+    return State;
+
+  SmallVector<SVal> EscapingArgs;
+  EscapingArgs.reserve(Call.getNumArgs() - FirstEscapingArgIndex);
+  for (auto EscArgIdx :
+       llvm::seq<int>(FirstEscapingArgIndex, Call.getNumArgs()))
+    EscapingArgs.push_back(Call.getArgSVal(EscArgIdx));
+  State = State->invalidateRegions(EscapingArgs, CE, C.blockCount(),
+                                   C.getLocationContext(),
+                                   /*CausesPointerEscape=*/false);
+  return State;
+}
+
 /// Get the value of the stream argument out of the passed call event.
 /// The call should contain a function that is described by Desc.
 SVal getStreamArg(const FnDescription *Desc, const CallEvent &Call) {
@@ -396,6 +418,18 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
         0}},
       {{{"fileno"}, 1},
        {&StreamChecker::preDefault, &StreamChecker::evalFileno, 0}},
+      {{{"getc"}, 1},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
+        std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}},
+      {{{"vfscanf"}, 3},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
+        &StreamChecker::evalFscanf, 0}},
+      {{{"putc"}, 2},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
+        std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
+      {{{"vfprintf"}, 3},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
+        &StreamChecker::evalFprintf, 0}},
   };
 
   CallDescriptionMap<FnDescription> FnTestDescriptions = {
@@ -997,6 +1031,9 @@ void StreamChecker::evalFscanf(const FnDescription *Desc, const CallEvent &Call,
   if (!E.Init(Desc, Call, C, State))
     return;
 
+  // The pointers passed to fscanf escape and get invalidated.
+  State = escapeArgsAfterIndex(State, C, Call, /*FirstEscapingArgIndex=*/2);
+
   // Add the success state.
   // In this context "success" means there is not an EOF or other read error
   // before any item is matched in 'fscanf'. But there may be match failure,
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h b/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
index 098a2208fecbe9..c26d3582149120 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
@@ -5,7 +5,7 @@
 // suppressed.
 #pragma clang system_header
 
-typedef struct __sFILE {
+typedef struct _FILE {
   unsigned char *_p;
 } FILE;
 FILE *fopen(const char *restrict, const char *restrict) __asm("_" "fopen" );
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h b/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h
index 7299b61353d460..87688bd8b312f4 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h
@@ -10,6 +10,8 @@
 #define restrict /*restrict*/
 #endif
 
+typedef struct _FILE FILE;
+
 typedef __builtin_va_list va_list;
 
 #define va_start(ap, param) __builtin_va_start(ap, param)
@@ -21,6 +23,8 @@ int vprintf (const char *restrict format, va_list arg);
 
 int vsprintf (char *restrict s, const char *restrict format, va_list arg);
 
+int vfscanf(FILE *stream, const char *format, va_list ap);
+
 int some_library_function(int n, va_list arg);
 
 // No warning from system header.
diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h
index 15986984802c0e..8fd51449ecc0a4 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator.h
@@ -73,6 +73,9 @@ int ferror(FILE *stream);
 int fileno(FILE *stream);
 int fflush(FILE *stream);
 
+
+int getc(FILE *stream);
+
 size_t strlen(const char *);
 
 char *strcpy(char *restrict, const char *restrict);
diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c
index 378c9154f8f6a8..d0fee68d482e7f 100644
--- a/clang/test/Analysis/stream.c
+++ b/clang/test/Analysis/stream.c
@@ -1,8 +1,11 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection -verify %s
 
 #include "Inputs/system-header-simulator.h"
+#include "Inputs/system-header-simulator-for-valist.h"
 
 void clang_analyzer_eval(int);
+void clang_analyzer_dump_char(char);
+void clang_analyzer_dump_int(int);
 
 void check_fread(void) {
   FILE *fp = tmpfile();
@@ -65,12 +68,24 @@ void check_fseek(void) {
   fclose(fp);
 }
 
+void check_fseeko(void) {
+  FILE *fp = tmpfile();
+  fseeko(fp, 0, 0); // expected-warning {{Stream pointer might be NULL}}
+  fclose(fp);
+}
+
 void check_ftell(void) {
   FILE *fp = tmpfile();
   ftell(fp); // expected-warning {{Stream pointer might be NULL}}
   fclose(fp);
 }
 
+void check_ftello(void) {
+  FILE *fp = tmpfile();
+  ftello(fp); // expected-warning {{Stream pointer might be NULL}}
+  fclose(fp);
+}
+
 void check_rewind(void) {
   FILE *fp = tmpfile();
   rewind(fp); // expected-warning {{Stream pointer might be NULL}}
@@ -138,6 +153,15 @@ void f_seek(void) {
   fclose(p);
 }
 
+void f_seeko(void) {
+  FILE *p = fopen("foo", "r");
+  if (!p)
+    return;
+  fseeko(p, 1, SEEK_SET); // no-warning
+  fseeko(p, 1, 3); // expected-warning {{The whence argument to fseek() should be SEEK_SET, SEEK_END, or SEEK_CUR}}
+  fclose(p);
+}
+
 void f_double_close(void) {
   FILE *p = fopen("foo", "r");
   if (!p)
@@ -339,3 +363,107 @@ void fflush_on_open_failed_stream(void) {
   }
   fclose(F);
 }
+
+void test_fscanf_eof() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  int a;
+  unsigned b;
+  int ret = fscanf(F1, "%d %u", &a, &b);
+  char c = fgetc(F1); // expected-warning {{Read function called when stream is in EOF state. Function has no effect}}
+  // expected-warning at -1 {{File position of the stream might be 'indeterminate' after a failed operation. Can cause undefined behavior}}
+  fclose(F1);
+}
+
+void test_fscanf_escape() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  int a = 48;
+  unsigned b = 127;
+  char buffer[] = "FSCANF"; // 70 83 67 65 78 70
+
+  clang_analyzer_dump_int(a); // expected-warning {{48 S32b}}
+  clang_analyzer_dump_int(b); // expected-warning {{127 S32b}}
+  clang_analyzer_dump_char(buffer[2]); // expected-warning {{67 S8b}}
+
+  int ret = fscanf(F1, "%d %u %s", &a, &b, buffer);
+  clang_analyzer_dump_int(a); // expected-warning {{conj_$}}
+  clang_analyzer_dump_int(b); // expected-warning {{conj_$}}
+  clang_analyzer_dump_char(buffer[2]); // expected-warning {{derived_$}}
+
+  if (ret != EOF) {
+    char c = fgetc(F1); // ok
+  }
+
+  fclose(F1);
+}
+
+void test_fputc() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  char a = 'y'; // 'y' = 121 ASCII
+  char r = fputc(a, F1);
+  if (r != EOF) {
+    clang_analyzer_dump_char(r); // expected-warning {{121 S8b}}
+    char z = fgetc(F1);
+  } else {
+    clang_analyzer_dump_char(r);  // expected-warning {{-1 S8b}}
+  }
+
+  fclose(F1);
+}
+
+void test_fputs() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  char buffer[] = "HELLO";
+  int r = fputs(buffer, F1);
+  if (r >= 0) {
+    // fputs does not invalidate the input buffer (72 is ascii for 'H')
+    clang_analyzer_dump_char(buffer[0]); // expected-warning {{72 S8b}}
+  } else if (r == EOF) {
+    // fputs does not invalidate the input buffer, *and* this branch
+    // can happen
+    clang_analyzer_dump_char(buffer[0]); // expected-warning {{72 S8b}}
+  } else {
+    // This branch can not happen
+    int *p = NULL;
+    *p = 0;
+  }
+
+  fclose(F1);
+}
+
+void test_fprintf() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  unsigned a = 42;
+  char *output = "HELLO";
+  int r = fprintf(F1, "%s\t%u\n", output, a);
+  // fprintf does not invalidate any of its input
+  // 69 is ascii for 'E'
+  clang_analyzer_dump_int(a); // expected-warning {{42 S32b}}
+  clang_analyzer_dump_char(output[1]); // expected-warning {{69 S8b}}
+  if (r < 0) {
+    // Failure
+    fprintf(F1, "%s\t%u\n", output, a); // expected-warning {{File position of the stream might be 'indeterminate' after a failed operation. Can cause undefined behavior}}
+  } else {
+    char buffer[10];
+    fscanf(F1, "%s", buffer);
+    if (fseek(F1, 0, SEEK_SET) == 0) {
+      fprintf(F1, "%s\t%u\n", buffer, a); // ok
+    }
+  }
+
+  fclose(F1);
+}

>From 4de634f0bd559085d50aa5f89afb79a246d1a03f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20=C3=81lvarez=20Ayll=C3=B3n?=
 <alejandro.alvarez at sonarsource.com>
Date: Wed, 21 Feb 2024 14:05:39 +0100
Subject: [PATCH 2/4] Reintroduce static, rename escapeArgsAfterIndex

---
 clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 7938a0d30a91a3..44a62cf08c8862 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -173,7 +173,7 @@ using FnCheck = std::function<void(const StreamChecker *, const FnDescription *,
                                    const CallEvent &, CheckerContext &)>;
 
 using ArgNoTy = unsigned int;
-const ArgNoTy ArgNone = std::numeric_limits<ArgNoTy>::max();
+static const ArgNoTy ArgNone = std::numeric_limits<ArgNoTy>::max();
 
 struct FnDescription {
   FnCheck PreFn;
@@ -182,8 +182,9 @@ struct FnDescription {
 };
 
 [[nodiscard]] ProgramStateRef
-escapeArgsAfterIndex(ProgramStateRef State, CheckerContext &C,
-                     const CallEvent &Call, unsigned FirstEscapingArgIndex) {
+escapeArgsStartingFromIndex(ProgramStateRef State, CheckerContext &C,
+                            const CallEvent &Call,
+                            unsigned FirstEscapingArgIndex) {
   const auto *CE = Call.getOriginExpr();
   assert(CE);
 
@@ -1032,7 +1033,8 @@ void StreamChecker::evalFscanf(const FnDescription *Desc, const CallEvent &Call,
     return;
 
   // The pointers passed to fscanf escape and get invalidated.
-  State = escapeArgsAfterIndex(State, C, Call, /*FirstEscapingArgIndex=*/2);
+  State =
+      escapeArgsStartingFromIndex(State, C, Call, /*FirstEscapingArgIndex=*/2);
 
   // Add the success state.
   // In this context "success" means there is not an EOF or other read error

>From 3dc80ccaf2647d2d270e5b91f6a13538e68e2831 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20=C3=81lvarez=20Ayll=C3=B3n?=
 <alejandro.alvarez at sonarsource.com>
Date: Wed, 21 Feb 2024 14:16:14 +0100
Subject: [PATCH 3/4] Reorder functions

---
 .../StaticAnalyzer/Checkers/StreamChecker.cpp | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 44a62cf08c8862..29ea88b77da120 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -370,18 +370,30 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
       {{{"fgets"}, 3},
        {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
         std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, false), 2}},
+      {{{"getc"}, 1},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
+        std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}},
       {{{"fputc"}, 2},
        {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
         std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
       {{{"fputs"}, 2},
        {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
         std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, false), 1}},
+      {{{"putc"}, 2},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
+        std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
       {{{"fprintf"}},
        {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
         std::bind(&StreamChecker::evalFprintf, _1, _2, _3, _4), 0}},
+      {{{"vfprintf"}, 3},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
+        std::bind(&StreamChecker::evalFprintf, _1, _2, _3, _4), 0}},
       {{{"fscanf"}},
        {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
         std::bind(&StreamChecker::evalFscanf, _1, _2, _3, _4), 0}},
+      {{{"vfscanf"}, 3},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
+        std::bind(&StreamChecker::evalFscanf, _1, _2, _3, _4), 0}},
       {{{"ungetc"}, 2},
        {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
         std::bind(&StreamChecker::evalUngetc, _1, _2, _3, _4), 1}},
@@ -419,18 +431,6 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
         0}},
       {{{"fileno"}, 1},
        {&StreamChecker::preDefault, &StreamChecker::evalFileno, 0}},
-      {{{"getc"}, 1},
-       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
-        std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}},
-      {{{"vfscanf"}, 3},
-       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
-        &StreamChecker::evalFscanf, 0}},
-      {{{"putc"}, 2},
-       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
-        std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
-      {{{"vfprintf"}, 3},
-       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
-        &StreamChecker::evalFprintf, 0}},
   };
 
   CallDescriptionMap<FnDescription> FnTestDescriptions = {

>From c07738959a790515c2dcab1ff5a43ca7a1c6d8a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20=C3=81lvarez=20Ayll=C3=B3n?=
 <alejandro.alvarez at sonarsource.com>
Date: Mon, 26 Feb 2024 13:38:06 +0100
Subject: [PATCH 4/4] Remove redundant invalidation in scanf

---
 .../StaticAnalyzer/Checkers/StreamChecker.cpp | 25 -------------------
 clang/test/Analysis/stream.c                  | 12 ++++++---
 2 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 0ef878df5f5104..8c8c7ff6fb832a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -180,27 +180,6 @@ struct FnDescription {
   ArgNoTy StreamArgNo;
 };
 
-[[nodiscard]] ProgramStateRef
-escapeArgsStartingFromIndex(ProgramStateRef State, CheckerContext &C,
-                            const CallEvent &Call,
-                            unsigned FirstEscapingArgIndex) {
-  const auto *CE = Call.getOriginExpr();
-  assert(CE);
-
-  if (Call.getNumArgs() <= FirstEscapingArgIndex)
-    return State;
-
-  SmallVector<SVal> EscapingArgs;
-  EscapingArgs.reserve(Call.getNumArgs() - FirstEscapingArgIndex);
-  for (auto EscArgIdx :
-       llvm::seq<int>(FirstEscapingArgIndex, Call.getNumArgs()))
-    EscapingArgs.push_back(Call.getArgSVal(EscArgIdx));
-  State = State->invalidateRegions(EscapingArgs, CE, C.blockCount(),
-                                   C.getLocationContext(),
-                                   /*CausesPointerEscape=*/false);
-  return State;
-}
-
 /// Get the value of the stream argument out of the passed call event.
 /// The call should contain a function that is described by Desc.
 SVal getStreamArg(const FnDescription *Desc, const CallEvent &Call) {
@@ -1054,10 +1033,6 @@ void StreamChecker::evalFscanf(const FnDescription *Desc, const CallEvent &Call,
   if (!E.Init(Desc, Call, C, State))
     return;
 
-  // The pointers passed to fscanf escape and get invalidated.
-  State =
-      escapeArgsStartingFromIndex(State, C, Call, /*FirstEscapingArgIndex=*/2);
-
   // Add the success state.
   // In this context "success" means there is not an EOF or other read error
   // before any item is matched in 'fscanf'. But there may be match failure,
diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c
index d0fee68d482e7f..52520c967abefc 100644
--- a/clang/test/Analysis/stream.c
+++ b/clang/test/Analysis/stream.c
@@ -391,9 +391,15 @@ void test_fscanf_escape() {
   clang_analyzer_dump_char(buffer[2]); // expected-warning {{67 S8b}}
 
   int ret = fscanf(F1, "%d %u %s", &a, &b, buffer);
-  clang_analyzer_dump_int(a); // expected-warning {{conj_$}}
-  clang_analyzer_dump_int(b); // expected-warning {{conj_$}}
-  clang_analyzer_dump_char(buffer[2]); // expected-warning {{derived_$}}
+  if (ret != EOF) {
+    clang_analyzer_dump_int(a); // expected-warning {{conj_$}}
+    clang_analyzer_dump_int(b); // expected-warning {{conj_$}}
+    clang_analyzer_dump_char(buffer[2]); // expected-warning {{derived_$}}
+  } else {
+    clang_analyzer_dump_int(a); // expected-warning {{48 S32b}}
+    clang_analyzer_dump_int(b); // expected-warning {{127 S32b}}
+    clang_analyzer_dump_char(buffer[2]); // expected-warning {{67 S8b}}
+  }
 
   if (ret != EOF) {
     char c = fgetc(F1); // ok



More information about the cfe-commits mailing list