[clang] [clang][analyzer] StreamChecker: Add more APIs, invalidate fscanf args (PR #82476)

via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 21 01:54:25 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Alejandro Álvarez Ayllón (alejandro-alvarez-sonarsource)

<details>
<summary>Changes</summary>

1. Model `getc`, `vfscanf`, `putc`, `vfprintf`.
2. `fscanf` invalidates all arguments after the format string.

Also, add tests for `ftello` and `fseeko`.

---
Full diff: https://github.com/llvm/llvm-project/pull/82476.diff


5 Files Affected:

- (modified) clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp (+38-1) 
- (modified) clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h (+1-1) 
- (modified) clang/test/Analysis/Inputs/system-header-simulator-for-valist.h (+4) 
- (modified) clang/test/Analysis/Inputs/system-header-simulator.h (+3) 
- (modified) clang/test/Analysis/stream.c (+128) 


``````````diff
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index a070f451694a3b..7938a0d30a91a3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -21,6 +21,8 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
 #include <functional>
 #include <optional>
 
@@ -171,7 +173,7 @@ using FnCheck = std::function<void(const StreamChecker *, const FnDescription *,
                                    const CallEvent &, CheckerContext &)>;
 
 using ArgNoTy = unsigned int;
-static const ArgNoTy ArgNone = std::numeric_limits<ArgNoTy>::max();
+const ArgNoTy ArgNone = std::numeric_limits<ArgNoTy>::max();
 
 struct FnDescription {
   FnCheck PreFn;
@@ -179,6 +181,26 @@ struct FnDescription {
   ArgNoTy StreamArgNo;
 };
 
+[[nodiscard]] ProgramStateRef
+escapeArgsAfterIndex(ProgramStateRef State, CheckerContext &C,
+                     const CallEvent &Call, unsigned FirstEscapingArgIndex) {
+  const auto *CE = Call.getOriginExpr();
+  assert(CE);
+
+  if (Call.getNumArgs() <= FirstEscapingArgIndex)
+    return State;
+
+  SmallVector<SVal> EscapingArgs;
+  EscapingArgs.reserve(Call.getNumArgs() - FirstEscapingArgIndex);
+  for (auto EscArgIdx :
+       llvm::seq<int>(FirstEscapingArgIndex, Call.getNumArgs()))
+    EscapingArgs.push_back(Call.getArgSVal(EscArgIdx));
+  State = State->invalidateRegions(EscapingArgs, CE, C.blockCount(),
+                                   C.getLocationContext(),
+                                   /*CausesPointerEscape=*/false);
+  return State;
+}
+
 /// Get the value of the stream argument out of the passed call event.
 /// The call should contain a function that is described by Desc.
 SVal getStreamArg(const FnDescription *Desc, const CallEvent &Call) {
@@ -396,6 +418,18 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
         0}},
       {{{"fileno"}, 1},
        {&StreamChecker::preDefault, &StreamChecker::evalFileno, 0}},
+      {{{"getc"}, 1},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
+        std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}},
+      {{{"vfscanf"}, 3},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
+        &StreamChecker::evalFscanf, 0}},
+      {{{"putc"}, 2},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
+        std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
+      {{{"vfprintf"}, 3},
+       {std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
+        &StreamChecker::evalFprintf, 0}},
   };
 
   CallDescriptionMap<FnDescription> FnTestDescriptions = {
@@ -997,6 +1031,9 @@ void StreamChecker::evalFscanf(const FnDescription *Desc, const CallEvent &Call,
   if (!E.Init(Desc, Call, C, State))
     return;
 
+  // The pointers passed to fscanf escape and get invalidated.
+  State = escapeArgsAfterIndex(State, C, Call, /*FirstEscapingArgIndex=*/2);
+
   // Add the success state.
   // In this context "success" means there is not an EOF or other read error
   // before any item is matched in 'fscanf'. But there may be match failure,
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h b/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
index 098a2208fecbe9..c26d3582149120 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
@@ -5,7 +5,7 @@
 // suppressed.
 #pragma clang system_header
 
-typedef struct __sFILE {
+typedef struct _FILE {
   unsigned char *_p;
 } FILE;
 FILE *fopen(const char *restrict, const char *restrict) __asm("_" "fopen" );
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h b/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h
index 7299b61353d460..87688bd8b312f4 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-for-valist.h
@@ -10,6 +10,8 @@
 #define restrict /*restrict*/
 #endif
 
+typedef struct _FILE FILE;
+
 typedef __builtin_va_list va_list;
 
 #define va_start(ap, param) __builtin_va_start(ap, param)
@@ -21,6 +23,8 @@ int vprintf (const char *restrict format, va_list arg);
 
 int vsprintf (char *restrict s, const char *restrict format, va_list arg);
 
+int vfscanf(FILE *stream, const char *format, va_list ap);
+
 int some_library_function(int n, va_list arg);
 
 // No warning from system header.
diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h
index 15986984802c0e..8fd51449ecc0a4 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator.h
@@ -73,6 +73,9 @@ int ferror(FILE *stream);
 int fileno(FILE *stream);
 int fflush(FILE *stream);
 
+
+int getc(FILE *stream);
+
 size_t strlen(const char *);
 
 char *strcpy(char *restrict, const char *restrict);
diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c
index 378c9154f8f6a8..d0fee68d482e7f 100644
--- a/clang/test/Analysis/stream.c
+++ b/clang/test/Analysis/stream.c
@@ -1,8 +1,11 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection -verify %s
 
 #include "Inputs/system-header-simulator.h"
+#include "Inputs/system-header-simulator-for-valist.h"
 
 void clang_analyzer_eval(int);
+void clang_analyzer_dump_char(char);
+void clang_analyzer_dump_int(int);
 
 void check_fread(void) {
   FILE *fp = tmpfile();
@@ -65,12 +68,24 @@ void check_fseek(void) {
   fclose(fp);
 }
 
+void check_fseeko(void) {
+  FILE *fp = tmpfile();
+  fseeko(fp, 0, 0); // expected-warning {{Stream pointer might be NULL}}
+  fclose(fp);
+}
+
 void check_ftell(void) {
   FILE *fp = tmpfile();
   ftell(fp); // expected-warning {{Stream pointer might be NULL}}
   fclose(fp);
 }
 
+void check_ftello(void) {
+  FILE *fp = tmpfile();
+  ftello(fp); // expected-warning {{Stream pointer might be NULL}}
+  fclose(fp);
+}
+
 void check_rewind(void) {
   FILE *fp = tmpfile();
   rewind(fp); // expected-warning {{Stream pointer might be NULL}}
@@ -138,6 +153,15 @@ void f_seek(void) {
   fclose(p);
 }
 
+void f_seeko(void) {
+  FILE *p = fopen("foo", "r");
+  if (!p)
+    return;
+  fseeko(p, 1, SEEK_SET); // no-warning
+  fseeko(p, 1, 3); // expected-warning {{The whence argument to fseek() should be SEEK_SET, SEEK_END, or SEEK_CUR}}
+  fclose(p);
+}
+
 void f_double_close(void) {
   FILE *p = fopen("foo", "r");
   if (!p)
@@ -339,3 +363,107 @@ void fflush_on_open_failed_stream(void) {
   }
   fclose(F);
 }
+
+void test_fscanf_eof() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  int a;
+  unsigned b;
+  int ret = fscanf(F1, "%d %u", &a, &b);
+  char c = fgetc(F1); // expected-warning {{Read function called when stream is in EOF state. Function has no effect}}
+  // expected-warning at -1 {{File position of the stream might be 'indeterminate' after a failed operation. Can cause undefined behavior}}
+  fclose(F1);
+}
+
+void test_fscanf_escape() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  int a = 48;
+  unsigned b = 127;
+  char buffer[] = "FSCANF"; // 70 83 67 65 78 70
+
+  clang_analyzer_dump_int(a); // expected-warning {{48 S32b}}
+  clang_analyzer_dump_int(b); // expected-warning {{127 S32b}}
+  clang_analyzer_dump_char(buffer[2]); // expected-warning {{67 S8b}}
+
+  int ret = fscanf(F1, "%d %u %s", &a, &b, buffer);
+  clang_analyzer_dump_int(a); // expected-warning {{conj_$}}
+  clang_analyzer_dump_int(b); // expected-warning {{conj_$}}
+  clang_analyzer_dump_char(buffer[2]); // expected-warning {{derived_$}}
+
+  if (ret != EOF) {
+    char c = fgetc(F1); // ok
+  }
+
+  fclose(F1);
+}
+
+void test_fputc() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  char a = 'y'; // 'y' = 121 ASCII
+  char r = fputc(a, F1);
+  if (r != EOF) {
+    clang_analyzer_dump_char(r); // expected-warning {{121 S8b}}
+    char z = fgetc(F1);
+  } else {
+    clang_analyzer_dump_char(r);  // expected-warning {{-1 S8b}}
+  }
+
+  fclose(F1);
+}
+
+void test_fputs() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  char buffer[] = "HELLO";
+  int r = fputs(buffer, F1);
+  if (r >= 0) {
+    // fputs does not invalidate the input buffer (72 is ascii for 'H')
+    clang_analyzer_dump_char(buffer[0]); // expected-warning {{72 S8b}}
+  } else if (r == EOF) {
+    // fputs does not invalidate the input buffer, *and* this branch
+    // can happen
+    clang_analyzer_dump_char(buffer[0]); // expected-warning {{72 S8b}}
+  } else {
+    // This branch can not happen
+    int *p = NULL;
+    *p = 0;
+  }
+
+  fclose(F1);
+}
+
+void test_fprintf() {
+  FILE *F1 = tmpfile();
+  if (!F1)
+    return;
+
+  unsigned a = 42;
+  char *output = "HELLO";
+  int r = fprintf(F1, "%s\t%u\n", output, a);
+  // fprintf does not invalidate any of its input
+  // 69 is ascii for 'E'
+  clang_analyzer_dump_int(a); // expected-warning {{42 S32b}}
+  clang_analyzer_dump_char(output[1]); // expected-warning {{69 S8b}}
+  if (r < 0) {
+    // Failure
+    fprintf(F1, "%s\t%u\n", output, a); // expected-warning {{File position of the stream might be 'indeterminate' after a failed operation. Can cause undefined behavior}}
+  } else {
+    char buffer[10];
+    fscanf(F1, "%s", buffer);
+    if (fseek(F1, 0, SEEK_SET) == 0) {
+      fprintf(F1, "%s\t%u\n", buffer, a); // ok
+    }
+  }
+
+  fclose(F1);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/82476


More information about the cfe-commits mailing list