[clang] bd1917c - [analyzer] Done some changes to detect Uninitialized read by the char array manipulation functions

via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 3 09:51:39 PST 2022


Author: Shivam
Date: 2022-03-03T23:21:26+05:30
New Revision: bd1917c88a32c0930864d04f4e71155dcc3fa592

URL: https://github.com/llvm/llvm-project/commit/bd1917c88a32c0930864d04f4e71155dcc3fa592
DIFF: https://github.com/llvm/llvm-project/commit/bd1917c88a32c0930864d04f4e71155dcc3fa592.diff

LOG: [analyzer] Done some changes to detect Uninitialized read by the char array manipulation functions

Few weeks back I was experimenting with reading the uninitialized values from src , which is actually a bug but the CSA seems to give up at that point . I was curious about that and I pinged @steakhal on the discord and according to him this seems to be a genuine issue and needs to be fix. So I goes with fixing this bug and thanks to @steakhal who help me creating this patch. This feature seems to break some tests but this was the genuine problem and the broken tests also needs to fix in certain manner. I add a test but yeah we need more tests,I'll try to add more tests.Thanks

Reviewed By: steakhal, NoQ

Differential Revision: https://reviews.llvm.org/D120489

Added: 
    clang/test/Analysis/bstring_UninitRead.c

Modified: 
    clang/docs/analyzer/checkers.rst
    clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
    clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
    clang/test/Analysis/bstring.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index a42e3e6960777..a9ebe063c6c8b 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2625,13 +2625,44 @@ alpha.unix.cstring.OutOfBounds (C)
 """"""""""""""""""""""""""""""""""
 Check for out-of-bounds access in string functions; applies to:`` strncopy, strncat``.
 
-
 .. code-block:: c
 
  void test() {
    int y = strlen((char *)&test); // warn
  }
 
+.. _alpha-unix-cstring-UninitializedRead:
+
+alpha.unix.cstring.UninitializedRead (C)
+""""""""""""""""""""""""""""""""""""""""
+Check for uninitialized reads from common memory copy/manipulation functions such as:
+ ``memcpy, mempcpy, memmove, memcmp, strcmp, strncmp, strcpy, strlen, strsep`` and many more.
+
+.. code-block:: c 
+
+ void test() {
+  char src[10];
+  char dst[5];
+  memcpy(dst,src,sizeof(dst)); // warn: Bytes string function accesses uninitialized/garbage values
+ }
+
+Limitations:
+  
+   - Due to limitations of the memory modeling in the analyzer, one can likely
+     observe a lot of false-positive reports like this:
+      .. code-block:: c
+  
+        void false_positive() {
+          int src[] = {1, 2, 3, 4};
+          int dst[5] = {0};
+          memcpy(dst, src, 4 * sizeof(int)); // false-positive:
+          // The 'src' buffer was correctly initialized, yet we cannot conclude
+          // that since the analyzer could not see a direct initialization of the
+          // very last byte of the source buffer.
+        }
+  
+     More details at the corresponding `GitHub issue <https://github.com/llvm/llvm-project/issues/43459>`_.
+  
 .. _alpha-nondeterminism-PointerIteration:
 
 alpha.nondeterminism.PointerIteration (C++)

diff  --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 6b72d64950106..9340a114ac456 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -475,7 +475,12 @@ def CStringNotNullTerm : Checker<"NotNullTerminated">,
   HelpText<"Check for arguments which are not null-terminating strings">,
   Dependencies<[CStringModeling]>,
   Documentation<HasAlphaDocumentation>;
-
+ 
+def CStringUninitializedRead : Checker<"UninitializedRead">,
+  HelpText<"Checks if the string manipulation function would read uninitialized bytes">,
+  Dependencies<[CStringModeling]>,
+  Documentation<HasAlphaDocumentation>;
+  
 } // end "alpha.unix.cstring"
 
 let ParentPackage = Unix in {

diff  --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 8483a1a47cea1..54c9e887b8c8c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -80,7 +80,7 @@ class CStringChecker : public Checker< eval::Call,
                                          check::RegionChanges
                                          > {
   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
-      BT_NotCString, BT_AdditionOverflow;
+      BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
 
   mutable const char *CurrentFunctionDescription;
 
@@ -92,11 +92,13 @@ class CStringChecker : public Checker< eval::Call,
     DefaultBool CheckCStringOutOfBounds;
     DefaultBool CheckCStringBufferOverlap;
     DefaultBool CheckCStringNotNullTerm;
+    DefaultBool CheckCStringUninitializedRead;
 
     CheckerNameRef CheckNameCStringNullArg;
     CheckerNameRef CheckNameCStringOutOfBounds;
     CheckerNameRef CheckNameCStringBufferOverlap;
     CheckerNameRef CheckNameCStringNotNullTerm;
+    CheckerNameRef CheckNameCStringUninitializedRead;
   };
 
   CStringChecksFilter Filter;
@@ -367,6 +369,15 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
     return nullptr;
   }
 
+  // Ensure that we wouldn't read uninitialized value.
+  if (Access == AccessKind::read) {
+    if (Filter.CheckCStringUninitializedRead &&
+        StInBound->getSVal(ER).isUndef()) {
+      emitUninitializedReadBug(C, StInBound, Buffer.Expression);
+      return nullptr;
+    }
+  }
+
   // Array bound check succeeded.  From this point forward the array bound
   // should always succeed.
   return StInBound;
@@ -578,6 +589,26 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
   }
 }
 
+void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
+                                              ProgramStateRef State,
+                                              const Expr *E) const {
+  if (ExplodedNode *N = C.generateErrorNode(State)) {
+    const char *Msg =
+        "Bytes string function accesses uninitialized/garbage values";
+    if (!BT_UninitRead)
+      BT_UninitRead.reset(
+          new BuiltinBug(Filter.CheckNameCStringUninitializedRead,
+                         "Accessing unitialized/garbage values", Msg));
+
+    BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get());
+
+    auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+    Report->addRange(E->getSourceRange());
+    bugreporter::trackExpressionValue(N, E, *Report);
+    C.emitReport(std::move(Report));
+  }
+}
+
 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
                                         ProgramStateRef State, const Stmt *S,
                                         StringRef WarningMsg) const {
@@ -2458,3 +2489,4 @@ REGISTER_CHECKER(CStringNullArg)
 REGISTER_CHECKER(CStringOutOfBounds)
 REGISTER_CHECKER(CStringBufferOverlap)
 REGISTER_CHECKER(CStringNotNullTerm)
+REGISTER_CHECKER(CStringUninitializedRead)
\ No newline at end of file

diff  --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c
index c88452e49075d..a7c7bdb23683e 100644
--- a/clang/test/Analysis/bstring.c
+++ b/clang/test/Analysis/bstring.c
@@ -2,13 +2,15 @@
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
-// RUN:   -analyzer-config eagerly-assume=false
+// RUN:   -analyzer-config eagerly-assume=false  
 //
 // RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false
 //
@@ -16,6 +18,7 @@
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false
 //
@@ -23,6 +26,7 @@
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false
 
@@ -315,7 +319,7 @@ void mempcpy15(void) {
 
   p1 = (&s2) + 1;
   p2 = mempcpy(&s2, &s1, sizeof(struct st));
-
+  
   clang_analyzer_eval(p1 == p2); // expected-warning{{TRUE}}
 }
 

diff  --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
new file mode 100644
index 0000000000000..c535e018e62c2
--- /dev/null
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -0,0 +1,59 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN: -analyzer-checker=core,alpha.unix.cstring
+
+
+// This file is generally for the alpha.unix.cstring.UninitializedRead Checker, the reason for putting it into
+// the separate file because the checker is break the some existing test cases in bstring.c file , so we don't 
+// wanna mess up with some existing test case so it's better to create separate file for it, this file also include 
+// the broken test for the reference in future about the broken tests.
+
+
+typedef typeof(sizeof(int)) size_t;
+
+void clang_analyzer_eval(int);
+
+void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
+
+void top(char *dst) {
+  char buf[10];
+  memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+  (void)buf;
+}
+
+//===----------------------------------------------------------------------===
+// mempcpy()
+//===----------------------------------------------------------------------===
+
+void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
+
+void mempcpy14() {
+  int src[] = {1, 2, 3, 4};
+  int dst[5] = {0};
+  int *p;
+
+  p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+   // FIXME: This behaviour is actually surprising and needs to be fixed, 
+   // mempcpy seems to consider the very last byte of the src buffer uninitialized
+   // and returning undef unfortunately. It should have returned unknown or a conjured value instead.
+
+  clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
+}
+
+struct st {
+  int i;
+  int j;
+};
+
+
+void mempcpy15() {
+  struct st s1 = {0};
+  struct st s2;
+  struct st *p1;
+  struct st *p2;
+
+  p1 = (&s2) + 1;
+  p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+  // FIXME: It seems same as mempcpy14() case.
+  
+  clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
+}


        


More information about the cfe-commits mailing list