[clang] 1c8add1 - [analyzer] Add hack in ArrayBound to cover up missing casts (#127117)

via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 18 02:19:47 PST 2025


Author: DonĂ¡t Nagy
Date: 2025-02-18T11:19:43+01:00
New Revision: 1c8add1ec70d8d730572029ac11a70f4dfac8ed5

URL: https://github.com/llvm/llvm-project/commit/1c8add1ec70d8d730572029ac11a70f4dfac8ed5
DIFF: https://github.com/llvm/llvm-project/commit/1c8add1ec70d8d730572029ac11a70f4dfac8ed5.diff

LOG: [analyzer] Add hack in ArrayBound to cover up missing casts (#127117)

Currently there are many casts that are not modeled (i.e. ignored) by
the analyzer, which can cause paradox states (e.g. negative value stored
in `unsigned` variable) and false positive reports from various
checkers, e.g. from `security.ArrayBound`.

Unfortunately this issue is deeply rooted in the architectural
limitations of the analyzer (if we started to model the casts, it would
break other things). For details see the umbrella ticket
https://github.com/llvm/llvm-project/issues/39492

This commit adds an ugly hack in `security.ArrayBound` to silence most
of the false positives caused by this shortcoming of the engine.

Fixes #126884

Added: 
    

Modified: 
    clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
    clang/test/Analysis/out-of-bounds.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index f56e9192d1d66..954b4763034e7 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -34,24 +34,37 @@ using namespace taint;
 using llvm::formatv;
 
 namespace {
-/// If `E` is a "clean" array subscript expression, return the type of the
-/// accessed element. If the base of the subscript expression is modified by
-/// pointer arithmetic (and not the beginning of a "full" memory region), this
-/// always returns nullopt because that's the right (or the least bad) thing to
-/// do for the diagnostic output that's relying on this.
-static std::optional<QualType> determineElementType(const Expr *E,
-                                                    const CheckerContext &C) {
+/// If `E` is an array subscript expression with a base that is "clean" (= not
+/// modified by pointer arithmetic = the beginning of a memory region), return
+/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
+/// This helper function is used by two separate heuristics that are only valid
+/// in these "clean" cases.
+static const ArraySubscriptExpr *
+getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) {
   const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
   if (!ASE)
-    return std::nullopt;
+    return nullptr;
 
   const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
   if (!SubscriptBaseReg)
-    return std::nullopt;
+    return nullptr;
 
   // The base of the subscript expression is affected by pointer arithmetics,
-  // so we want to report byte offsets instead of indices.
+  // so we want to report byte offsets instead of indices and we don't want to
+  // activate the "index is unsigned -> cannot be negative" shortcut.
   if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
+    return nullptr;
+
+  return ASE;
+}
+
+/// If `E` is a "clean" array subscript expression, return the type of the
+/// accessed element; otherwise return std::nullopt because that's the best (or
+/// least bad) option for the diagnostic generation that relies on this.
+static std::optional<QualType> determineElementType(const Expr *E,
+                                                    const CheckerContext &C) {
+  const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+  if (!ASE)
     return std::nullopt;
 
   return ASE->getType();
@@ -140,7 +153,9 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
                                    ProgramStateRef ErrorState, NonLoc Val,
                                    bool MarkTaint);
 
-  static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
+  static bool isFromCtypeMacro(const Expr *E, ASTContext &AC);
+
+  static bool isOffsetObviouslyNonnegative(const Expr *E, CheckerContext &C);
 
   static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
                                        NonLoc Offset, NonLoc Limit,
@@ -587,20 +602,48 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
         State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
 
     if (PrecedesLowerBound) {
-      // The offset may be invalid (negative)...
-      if (!WithinLowerBound) {
-        // ...and it cannot be valid (>= 0), so report an error.
-        Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
-        reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
-        return;
+      // The analyzer thinks that the offset may be invalid (negative)...
+
+      if (isOffsetObviouslyNonnegative(E, C)) {
+        // ...but the offset is obviously non-negative (clear array subscript
+        // with an unsigned index), so we're in a buggy situation.
+
+        // TODO: Currently the analyzer ignores many casts (e.g. signed ->
+        // unsigned casts), so it can easily reach states where it will load a
+        // signed (and negative) value from an unsigned variable. This sanity
+        // check is a duct tape "solution" that silences most of the ugly false
+        // positives that are caused by this buggy behavior. Note that this is
+        // not a complete solution: this cannot silence reports where pointer
+        // arithmetic complicates the picture and cannot ensure modeling of the
+        // "unsigned index is positive with highest bit set" cases which are
+        // "usurped" by the nonsense "unsigned index is negative" case.
+        // For more information about this topic, see the umbrella ticket
+        // https://github.com/llvm/llvm-project/issues/39492
+        // TODO: Remove this hack once 'SymbolCast's are modeled properly.
+
+        if (!WithinLowerBound) {
+          // The state is completely nonsense -- let's just sink it!
+          C.addSink();
+          return;
+        }
+        // Otherwise continue on the 'WithinLowerBound' branch where the
+        // unsigned index _is_ non-negative. Don't mention this assumption as a
+        // note tag, because it would just confuse the users!
+      } else {
+        if (!WithinLowerBound) {
+          // ...and it cannot be valid (>= 0), so report an error.
+          Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
+          reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
+          return;
+        }
+        // ...but it can be valid as well, so the checker will (optimistically)
+        // assume that it's valid and mention this in the note tag.
+        SUR.recordNonNegativeAssumption();
       }
-      // ...but it can be valid as well, so the checker will (optimistically)
-      // assume that it's valid and mention this in the note tag.
-      SUR.recordNonNegativeAssumption();
     }
 
     // Actually update the state. The "if" only fails in the extremely unlikely
-    // case when compareValueToThreshold returns {nullptr, nullptr} becasue
+    // case when compareValueToThreshold returns {nullptr, nullptr} because
     // evalBinOpNN fails to evaluate the less-than operator.
     if (WithinLowerBound)
       State = WithinLowerBound;
@@ -660,7 +703,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
     }
 
     // Actually update the state. The "if" only fails in the extremely unlikely
-    // case when compareValueToThreshold returns {nullptr, nullptr} becasue
+    // case when compareValueToThreshold returns {nullptr, nullptr} because
     // evalBinOpNN fails to evaluate the less-than operator.
     if (WithinUpperBound)
       State = WithinUpperBound;
@@ -725,8 +768,8 @@ void ArrayBoundChecker::reportOOB(CheckerContext &C, ProgramStateRef ErrorState,
   C.emitReport(std::move(BR));
 }
 
-bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
-  SourceLocation Loc = S->getBeginLoc();
+bool ArrayBoundChecker::isFromCtypeMacro(const Expr *E, ASTContext &ACtx) {
+  SourceLocation Loc = E->getBeginLoc();
   if (!Loc.isMacroID())
     return false;
 
@@ -744,6 +787,14 @@ bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
           (MacroName == "isupper") || (MacroName == "isxdigit"));
 }
 
+bool ArrayBoundChecker::isOffsetObviouslyNonnegative(const Expr *E,
+                                                     CheckerContext &C) {
+  const ArraySubscriptExpr *ASE = getAsCleanArraySubscriptExpr(E, C);
+  if (!ASE)
+    return false;
+  return ASE->getIdx()->getType()->isUnsignedIntegerOrEnumerationType();
+}
+
 bool ArrayBoundChecker::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
   ParentMapContext &ParentCtx = ACtx.getParentMapContext();
   do {

diff  --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c
index 7a094b8fdc840..7d6cb4ecf1b24 100644
--- a/clang/test/Analysis/out-of-bounds.c
+++ b/clang/test/Analysis/out-of-bounds.c
@@ -188,29 +188,31 @@ int test_cast_to_unsigned(signed char x) {
   if (x >= 0)
     return x;
   // FIXME: Here the analyzer ignores the signed -> unsigned cast, and manages to
-  // load a negative value from an unsigned variable. This causes an underflow
-  // report, which is an ugly false positive.
+  // load a negative value from an unsigned variable.
   // The underlying issue is tracked by Github ticket #39492.
   clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }}
-  return table[y]; // expected-warning {{Out of bound access to memory preceding}}
+  // However, a hack in the ArrayBound checker suppresses the false positive
+  // underflow report that would be generated here.
+  return table[y]; // no-warning
 }
 
 int test_cast_to_unsigned_overflow(signed char x) {
   unsigned char y = x;
   if (x >= 0)
     return x;
-  // A variant of 'test_cast_to_unsigned' where the correct behavior would be
-  // an overflow report (because the negative values are cast to `unsigned
-  // char` values that are too large).
-  // FIXME: See comment in 'test_cast_to_unsigned'.
+  // FIXME: As in 'test_cast_to_unsigned', the analyzer thinks that this
+  // unsigned variable contains a negative value.
   clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }}
-  return small_table[y]; // expected-warning {{Out of bound access to memory preceding}}
+  // FIXME: The following subscript expression should produce an overflow
+  // report (because negative signed char corresponds to unsigned char >= 128);
+  // but the hack in ArrayBound just silences reports and cannot "restore" the
+  // real execution paths.
+  return small_table[y]; // no-warning
 }
 
 int test_negative_offset_with_unsigned_idx(void) {
   // An example where the subscript operator uses an unsigned index, but the
-  // underflow report is still justified. (We should try to keep this if we
-  // silence false positives like the one in 'test_cast_to_unsigned'.)
+  // underflow report is still justified.
   int *p = table - 10;
   unsigned idx = 2u;
   return p[idx]; // expected-warning {{Out of bound access to memory preceding}}


        


More information about the cfe-commits mailing list