[clang] [analyzer] Improve diagnostics from ArrayBoundCheckerV2 (PR #70056)

via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 7 04:14:34 PST 2023


=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/70056 at github.com>


https://github.com/DonatNagyE updated https://github.com/llvm/llvm-project/pull/70056

>From 77143e74edda6177248bebdf0424db915aa68a05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 2 Oct 2023 13:32:31 +0200
Subject: [PATCH 1/4] [analyzer] Improve reports from ArrayBoundCheckerV2

Previously alpha.security.ArrayBoundV2 produced very spartan bug
reports; this commit ensures that the relevant (and known) details are
reported to the user.

The logic for detecting bugs is not changed, after this commit the
checker will report the same set of issues, but with better messages.

To test the details of the message generation this commit adds a new
test file 'out-of-bounds-diagnostics.c'. Three of the testcases are
added with FIXME notes because they reveal shortcomings of the existing
modeling and bounds checking code. I will try to fix them in separate
follow-up commits.
---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 181 ++++++++++++++----
 .../test/Analysis/out-of-bounds-diagnostics.c | 149 ++++++++++++++
 clang/test/Analysis/out-of-bounds-new.cpp     |  26 +--
 clang/test/Analysis/out-of-bounds.c           |  28 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |   4 +-
 clang/test/Analysis/taint-generic.c           |  20 +-
 clang/test/Analysis/taint-generic.cpp         |  14 +-
 7 files changed, 339 insertions(+), 83 deletions(-)
 create mode 100644 clang/test/Analysis/out-of-bounds-diagnostics.c

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e80a06e38587520..7d97f36e4cffe44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -22,23 +22,25 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
 using namespace ento;
 using namespace taint;
+using llvm::formatv;
 
 namespace {
+enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+
 class ArrayBoundCheckerV2 :
     public Checker<check::Location> {
   BugType BT{this, "Out-of-bound access"};
   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
 
-  enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
-
   void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
-                 SVal TaintedSVal = UnknownVal()) const;
+                 NonLoc Offset, std::string RegName, std::string Msg) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
 
@@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
 /// Arr and the distance of Location from the beginning of Arr (expressed in a
 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
 /// cannot be determined.
-std::optional<std::pair<const SubRegion *, NonLoc>>
+static std::optional<std::pair<const SubRegion *, NonLoc>>
 computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
   QualType T = SVB.getArrayIndexType();
   auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
@@ -174,9 +176,119 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
   return {nullptr, nullptr};
 }
 
+static std::string getRegionName(const SubRegion *Region) {
+  std::string RegName = Region->getDescriptiveName();
+  if (!RegName.empty())
+    return RegName;
+
+  // Field regions only have descriptive names when their parent has a
+  // descriptive name; so we provide a fallback representation for them:
+  if (const auto *FR = Region->getAs<FieldRegion>()) {
+    StringRef Name = FR->getDecl()->getName();
+    if (!Name.empty())
+      return formatv("the field '{0}'", Name);
+    return "the unnamed field";
+  }
+
+  if (isa<AllocaRegion>(Region))
+    return "the memory returned by 'alloca'";
+
+  if (isa<SymbolicRegion>(Region) &&
+      isa<HeapSpaceRegion>(Region->getMemorySpace()))
+    return "the heap area";
+
+  if (isa<StringRegion>(Region))
+    return "the string literal";
+
+  return "the region";
+}
+
+static std::optional<int64_t> getConcreteValue(NonLoc SV) {
+  if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
+    const llvm::APSInt &IntVal = ConcreteVal->getValue();
+    return IntVal.tryExtValue();
+  }
+  return std::nullopt;
+}
+
+static const char *ShortMsgTemplates[] = {
+    "Out of bound access to memory preceding {0}",
+    "Out of bound access to memory after the end of {0}",
+    "Potential out of bound access to {0} with tainted offset"};
+
+static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  return formatv(ShortMsgTemplates[Kind], RegName);
+}
+
+static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of " << RegName << " at negative byte offset";
+  if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
+    Out << ' ' << ConcreteIdx->getValue();
+  return std::string(Buf);
+}
+static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
+                                 NonLoc Offset, NonLoc Extent, SVal Location) {
+  const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+  assert(EReg && "this checker only handles element access");
+  QualType ElemType = EReg->getElementType();
+
+  std::optional<int64_t> OffsetN = getConcreteValue(Offset);
+  std::optional<int64_t> ExtentN = getConcreteValue(Extent);
+
+  bool UseByteOffsets = true;
+  if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
+    const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
+    const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
+    if (!OffsetHasRemainder && !ExtentHasRemainder) {
+      UseByteOffsets = false;
+      if (OffsetN)
+        *OffsetN /= ElemSize;
+      if (ExtentN)
+        *ExtentN /= ElemSize;
+    }
+  }
+
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of ";
+  if (!ExtentN && !UseByteOffsets)
+    Out << "'" << ElemType.getAsString() << "' element in ";
+  Out << RegName << " at ";
+  if (OffsetN) {
+    Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN;
+  } else {
+    Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index");
+  }
+  if (ExtentN) {
+    Out << ", while it holds only ";
+    if (*ExtentN != 1)
+      Out << *ExtentN;
+    else
+      Out << "a single";
+    if (UseByteOffsets)
+      Out << " byte";
+    else
+      Out << " '" << ElemType.getAsString() << "' element";
+
+    if (*ExtentN > 1)
+      Out << "s";
+  }
+
+  return std::string(Buf);
+}
+static std::string getTaintMsg(std::string RegName) {
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of " << RegName
+      << " with a tainted offset that may be too large";
+  return std::string(Buf);
+}
+
 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
                                         const Stmt* LoadS,
-                                        CheckerContext &checkerContext) const {
+                                        CheckerContext &C) const {
 
   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
   // some new logic here that reasons directly about memory region extents.
@@ -193,11 +305,11 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
   // and incomplete analysis of these leads to false positives. As even
   // accurate reports would be confusing for the users, just disable reports
   // from these macros:
-  if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
+  if (isFromCtypeMacro(LoadS, C.getASTContext()))
     return;
 
-  ProgramStateRef state = checkerContext.getState();
-  SValBuilder &svalBuilder = checkerContext.getSValBuilder();
+  ProgramStateRef state = C.getState();
+  SValBuilder &svalBuilder = C.getSValBuilder();
 
   const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
       computeOffset(state, svalBuilder, location);
@@ -223,7 +335,10 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
 
     if (state_precedesLowerBound && !state_withinLowerBound) {
       // We know that the index definitely precedes the lower bound.
-      reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
+      std::string RegName = getRegionName(Reg);
+      std::string Msg = getPrecedesMsg(RegName, ByteOffset);
+      reportOOB(C, state_precedesLowerBound, OOB_Precedes, ByteOffset, RegName,
+                Msg);
       return;
     }
 
@@ -240,13 +355,19 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
     if (state_exceedsUpperBound) {
       if (!state_withinUpperBound) {
         // We know that the index definitely exceeds the upper bound.
-        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
+        std::string RegName = getRegionName(Reg);
+        std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset,
+                                        *KnownSize, location);
+        reportOOB(C, state_exceedsUpperBound, OOB_Exceeds, ByteOffset, RegName,
+                  Msg);
         return;
       }
       if (isTainted(state, ByteOffset)) {
         // Both cases are possible, but the index is tainted, so report.
-        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
-                  ByteOffset);
+        std::string RegName = getRegionName(Reg);
+        std::string Msg = getTaintMsg(RegName);
+        reportOOB(C, state_exceedsUpperBound, OOB_Taint, ByteOffset, RegName,
+                  Msg);
         return;
       }
     }
@@ -255,42 +376,28 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
       state = state_withinUpperBound;
   }
 
-  checkerContext.addTransition(state);
+  C.addTransition(state);
 }
 
 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
                                     ProgramStateRef ErrorState, OOB_Kind Kind,
-                                    SVal TaintedSVal) const {
+                                    NonLoc Offset, std::string RegName,
+                                    std::string Msg) const {
 
   ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
   if (!ErrorNode)
     return;
 
-  // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
-  // a follow-up commit.
+  std::string ShortMsg = getShortMsg(Kind, RegName);
 
-  SmallString<128> Buf;
-  llvm::raw_svector_ostream Out(Buf);
-  Out << "Out of bound memory access ";
-
-  switch (Kind) {
-  case OOB_Precedes:
-    Out << "(accessed memory precedes memory block)";
-    break;
-  case OOB_Exceeds:
-    Out << "(access exceeds upper limit of memory block)";
-    break;
-  case OOB_Taint:
-    Out << "(index is tainted)";
-    break;
-  }
   auto BR = std::make_unique<PathSensitiveBugReport>(
-      Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
-  // Track back the propagation of taintedness, or do nothing if TaintedSVal is
-  // the default UnknownVal().
-  for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
-    BR->markInteresting(Sym);
-  }
+      Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);
+
+  // Track back the propagation of taintedness.
+  if (Kind == OOB_Taint)
+    for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
+      BR->markInteresting(Sym);
+
   C.emitReport(std::move(BR));
 }
 
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
new file mode 100644
index 000000000000000..4f5d0d0bc91c54a
--- /dev/null
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -0,0 +1,149 @@
+// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text        \
+// RUN:     -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint -verify %s
+
+int array[10];
+
+void arrayUnderflow(void) {
+  array[-3] = 5;
+  // expected-warning at -1 {{Out of bound access to memory preceding 'array'}}
+  // expected-note at -2 {{Access of 'array' at negative byte offset -12}}
+}
+
+int scanf(const char *restrict fmt, ...);
+
+void taintedIndex(void) {
+  int index;
+  scanf("%d", &index);
+  // expected-note at -1 {{Taint originated here}}
+  // expected-note at -2 {{Taint propagated to the 2nd argument}}
+  array[index] = 5;
+  // expected-warning at -1 {{Potential out of bound access to 'array' with tainted offset}}
+  // expected-note at -2 {{Access of 'array' with a tainted offset that may be too large}}
+}
+
+void arrayOverflow(void) {
+  array[12] = 5;
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+  // expected-note at -2 {{Access of 'array' at index 12, while it holds only 10 'int' elements}}
+}
+
+int scalar;
+int scalarOverflow(void) {
+  return (&scalar)[1];
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'scalar'}}
+  // expected-note at -2 {{Access of 'scalar' at index 1, while it holds only a single 'int' element}}
+}
+
+int oneElementArray[1];
+int oneElementArrayOverflow(void) {
+  return oneElementArray[1];
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'oneElementArray'}}
+  // expected-note at -2 {{Access of 'oneElementArray' at index 1, while it holds only a single 'int' element}}
+}
+
+short convertedArray(void) {
+  return ((short*)array)[47];
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+  // expected-note at -2 {{Access of 'array' at index 47, while it holds only 20 'short' elements}}
+}
+
+struct vec {
+  int len;
+  double elems[64];
+} v;
+
+double arrayInStruct(void) {
+  return v.elems[64];
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'v.elems'}}
+  // expected-note at -2 {{Access of 'v.elems' at index 64, while it holds only 64 'double' elements}}
+}
+
+double arrayInStructPtr(struct vec *pv) {
+  return pv->elems[64];
+  // expected-warning at -1 {{Out of bound access to memory after the end of the field 'elems'}}
+  // expected-note at -2 {{Access of the field 'elems' at index 64, while it holds only 64 'double' elements}}
+}
+
+struct two_bytes {
+  char lo, hi;
+};
+
+struct two_bytes convertedArray2(void) {
+  // We report this with byte offsets because the offset is not divisible by the element size.
+  struct two_bytes a = {0, 0};
+  char *p = (char*)&a;
+  return *((struct two_bytes*)(p + 7));
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'a'}}
+  // expected-note at -2 {{Access of 'a' at byte offset 7, while it holds only 2 bytes}}
+}
+
+int intFromString(void) {
+  // We report this with byte offsets because the extent is not divisible by the element size.
+  return ((const int*)"this is a string of 33 characters")[20];
+  // expected-warning at -1 {{Out of bound access to memory after the end of the string literal}}
+  // expected-note at -2 {{Access of the string literal at byte offset 80, while it holds only 34 bytes}}
+}
+
+int intFromStringDivisible(void) {
+  // However, this is reported with indices/elements, because the extent happens to be a multiple of 4.
+  return ((const int*)"abc")[20];
+  // expected-warning at -1 {{Out of bound access to memory after the end of the string literal}}
+  // expected-note at -2 {{Access of the string literal at index 20, while it holds only a single 'int' element}}
+}
+
+typedef __typeof(sizeof(int)) size_t;
+void *malloc(size_t size);
+
+int *mallocRegion(void) {
+  int *mem = (int*)malloc(2*sizeof(int));
+  mem[3] = -2;
+  // expected-warning at -1 {{Out of bound access to memory after the end of the heap area}}
+  // expected-note at -2 {{Access of the heap area at index 3, while it holds only 2 'int' elements}}
+  return mem;
+}
+
+void *alloca(size_t size);
+
+int allocaRegion(void) {
+  int *mem = (int*)alloca(2*sizeof(int));
+  mem[3] = -2;
+  // expected-warning at -1 {{Out of bound access to memory after the end of the heap area}}
+  // expected-note at -2 {{Access of the heap area at index 3, while it holds only 2 'int' elements}}
+  // FIXME: this should be
+  //   {{Out of bound access to memory after the end of the memory returned by 'alloca'}}
+  //   {{Access of the memory returned by 'alloca' at index 3, while it holds only 2 'int' elements}}
+  // but apparently something models 'alloca' as if it was allocating on the heap
+  return *mem;
+}
+
+int *unknownExtent(int arg) {
+  if (arg >= 2)
+    return 0;
+  int *mem = (int*)malloc(arg);
+  mem[8] = -2;
+  // FIXME: this should produce
+  //   {{Out of bound access to memory after the end of the heap area}}
+  //   {{Access of 'int' element in the heap area at index 8}}
+  return mem;
+}
+
+void unknownIndex(int arg) {
+  // expected-note at +2 {{Assuming 'arg' is >= 12}}
+  // expected-note at +1 {{Taking true branch}}
+  if (arg >= 12)
+    array[arg] = -2;
+  // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+  // expected-note at -2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}}
+}
+
+int *nothingIsCertain(int x, int y) {
+  if (x >= 2)
+    return 0;
+  int *mem = (int*)malloc(x);
+  if (y >= 8)
+    mem[y] = -2;
+  // FIXME: this should produce
+  //   {{Out of bound access to memory after the end of the heap area}}
+  //   {{Access of 'int' element in the heap area at an overflowing index}}
+  return mem;
+}
diff --git a/clang/test/Analysis/out-of-bounds-new.cpp b/clang/test/Analysis/out-of-bounds-new.cpp
index b7ceea72a270db4..af4ec47d8358aab 100644
--- a/clang/test/Analysis/out-of-bounds-new.cpp
+++ b/clang/test/Analysis/out-of-bounds-new.cpp
@@ -5,7 +5,7 @@
 // - constant integer size for buffer
 void test1(int x) {
   int *buf = new int[100];
-  buf[100] = 1; // expected-warning{{Out of bound memory access}}
+  buf[100] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ok(int x) {
@@ -20,7 +20,7 @@ void test1_ok(int x) {
 void test1_ptr(int x) {
   int *buf = new int[100];
   int *p = buf;
-  p[101] = 1; // expected-warning{{Out of bound memory access}}
+  p[101] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ptr_ok(int x) {
@@ -37,7 +37,7 @@ void test1_ptr_arith(int x) {
   int *buf = new int[100];
   int *p = buf;
   p = p + 100;
-  p[0] = 1; // expected-warning{{Out of bound memory access}}
+  p[0] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ptr_arith_ok(int x) {
@@ -51,7 +51,7 @@ void test1_ptr_arith_bad(int x) {
   int *buf = new int[100];
   int *p = buf;
   p = p + 99;
-  p[1] = 1; // expected-warning{{Out of bound memory access}}
+  p[1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ptr_arith_ok2(int x) {
@@ -66,7 +66,7 @@ void test1_ptr_arith_ok2(int x) {
 // - constant integer size for buffer
 void test2(int x) {
   int *buf = new int[100];
-  buf[-1] = 1; // expected-warning{{Out of bound memory access}}
+  buf[-1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests doing an out-of-bounds access before the start of an array using:
@@ -76,7 +76,7 @@ void test2(int x) {
 void test2_ptr(int x) {
   int *buf = new int[100];
   int *p = buf;
-  p[-1] = 1; // expected-warning{{Out of bound memory access}}
+  p[-1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests doing an out-of-bounds access before the start of an array using:
@@ -87,35 +87,35 @@ void test2_ptr_arith(int x) {
   int *buf = new int[100];
   int *p = buf;
   --p;
-  p[0] = 1; // expected-warning {{Out of bound memory access (accessed memory precedes memory block)}}
+  p[0] = 1; // expected-warning {{Out of bound access to memory preceding}}
 }
 
 // Tests under-indexing
 // of a multi-dimensional array
 void test2_multi(int x) {
   auto buf = new int[100][100];
-  buf[0][-1] = 1; // expected-warning{{Out of bound memory access}}
+  buf[0][-1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests under-indexing
 // of a multi-dimensional array
 void test2_multi_b(int x) {
   auto buf = new int[100][100];
-  buf[-1][0] = 1; // expected-warning{{Out of bound memory access}}
+  buf[-1][0] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests over-indexing
 // of a multi-dimensional array
 void test2_multi_c(int x) {
   auto buf = new int[100][100];
-  buf[100][0] = 1; // expected-warning{{Out of bound memory access}}
+  buf[100][0] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests over-indexing
 // of a multi-dimensional array
 void test2_multi_2(int x) {
   auto buf = new int[100][100];
-  buf[99][100] = 1; // expected-warning{{Out of bound memory access}}
+  buf[99][100] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests normal access of
@@ -131,7 +131,7 @@ void test_diff_types(int x) {
   int *buf = new int[10]; //10*sizeof(int) Bytes allocated
   char *cptr = (char *)buf;
   cptr[sizeof(int) * 9] = 1;  // no-warning
-  cptr[sizeof(int) * 10] = 1; // expected-warning{{Out of bound memory access}}
+  cptr[sizeof(int) * 10] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests over-indexing
@@ -139,7 +139,7 @@ void test_diff_types(int x) {
 void test_non_array(int x) {
   int *ip = new int;
   ip[0] = 1; // no-warning
-  ip[1] = 2; // expected-warning{{Out of bound memory access}}
+  ip[1] = 2; // expected-warning{{Out of bound access to memory}}
 }
 
 //Tests over-indexing
diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c
index cf8e60f66ac063d..ed457e869600630 100644
--- a/clang/test/Analysis/out-of-bounds.c
+++ b/clang/test/Analysis/out-of-bounds.c
@@ -7,7 +7,7 @@ void clang_analyzer_eval(int);
 // - constant integer size for buffer
 void test1(int x) {
   int buf[100];
-  buf[100] = 1; // expected-warning{{Out of bound memory access}}
+  buf[100] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ok(int x) {
@@ -17,12 +17,12 @@ void test1_ok(int x) {
 
 const char test1_strings_underrun(int x) {
   const char *mystr = "mary had a little lamb";
-  return mystr[-1]; // expected-warning{{Out of bound memory access}}
+  return mystr[-1]; // expected-warning{{Out of bound access to memory}}
 }
 
 const char test1_strings_overrun(int x) {
   const char *mystr = "mary had a little lamb";
-  return mystr[1000];  // expected-warning{{Out of bound memory access}}
+  return mystr[1000];  // expected-warning{{Out of bound access to memory}}
 }
 
 const char test1_strings_ok(int x) {
@@ -37,7 +37,7 @@ const char test1_strings_ok(int x) {
 void test1_ptr(int x) {
   int buf[100];
   int *p = buf;
-  p[101] = 1; // expected-warning{{Out of bound memory access}}
+  p[101] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ptr_ok(int x) {
@@ -54,7 +54,7 @@ void test1_ptr_arith(int x) {
   int buf[100];
   int *p = buf;
   p = p + 100;
-  p[0] = 1; // expected-warning{{Out of bound memory access}}
+  p[0] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ptr_arith_ok(int x) {
@@ -68,7 +68,7 @@ void test1_ptr_arith_bad(int x) {
   int buf[100];
   int *p = buf;
   p = p + 99;
-  p[1] = 1; // expected-warning{{Out of bound memory access}}
+  p[1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test1_ptr_arith_ok2(int x) {
@@ -83,7 +83,7 @@ void test1_ptr_arith_ok2(int x) {
 // - constant integer size for buffer
 void test2(int x) {
   int buf[100];
-  buf[-1] = 1; // expected-warning{{Out of bound memory access}}
+  buf[-1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests doing an out-of-bounds access before the start of an array using:
@@ -93,7 +93,7 @@ void test2(int x) {
 void test2_ptr(int x) {
   int buf[100];
   int *p = buf;
-  p[-1] = 1; // expected-warning{{Out of bound memory access}}
+  p[-1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests doing an out-of-bounds access before the start of an array using:
@@ -104,7 +104,7 @@ void test2_ptr_arith(int x) {
   int buf[100];
   int *p = buf;
   --p;
-  p[0] = 1; // expected-warning {{Out of bound memory access (accessed memory precedes memory block)}}
+  p[0] = 1; // expected-warning {{Out of bound access to memory preceding}}
 }
 
 // Tests doing an out-of-bounds access before the start of a multi-dimensional
@@ -113,7 +113,7 @@ void test2_ptr_arith(int x) {
 // - constant integer sizes for the array
 void test2_multi(int x) {
   int buf[100][100];
-  buf[0][-1] = 1; // expected-warning{{Out of bound memory access}}
+  buf[0][-1] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 // Tests doing an out-of-bounds access before the start of a multi-dimensional
@@ -122,7 +122,7 @@ void test2_multi(int x) {
 // - constant integer sizes for the array
 void test2_multi_b(int x) {
   int buf[100][100];
-  buf[-1][0] = 1; // expected-warning{{Out of bound memory access}}
+  buf[-1][0] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test2_multi_ok(int x) {
@@ -133,13 +133,13 @@ void test2_multi_ok(int x) {
 void test3(int x) {
   int buf[100];
   if (x < 0)
-    buf[x] = 1; // expected-warning{{Out of bound memory access}}
+    buf[x] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test4(int x) {
   int buf[100];
   if (x > 99)
-    buf[x] = 1; // expected-warning{{Out of bound memory access}}
+    buf[x] = 1; // expected-warning{{Out of bound access to memory}}
 }
 
 void test_assume_after_access(unsigned long x) {
@@ -165,7 +165,7 @@ typedef struct {
 user_t *get_symbolic_user(void);
 char test_underflow_symbolic_2() {
   user_t *user = get_symbolic_user();
-  return user->name[-1]; // expected-warning{{Out of bound memory access}}
+  return user->name[-1]; // expected-warning{{Out of bound access to memory}}
 }
 
 void test_incomplete_struct(void) {
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e444..67dc67e627b3b9c 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -29,8 +29,8 @@ int taintDiagnosticOutOfBound(void) {
   int Array[] = {1, 2, 3, 4, 5};
   scanf("%d", &index); // expected-note {{Taint originated here}}
                        // expected-note at -1 {{Taint propagated to the 2nd argument}}
-  return Array[index]; // expected-warning {{Out of bound memory access (index is tainted)}}
-                       // expected-note at -1 {{Out of bound memory access (index is tainted)}}
+  return Array[index]; // expected-warning {{Potential out of bound access to 'Array' with tainted offset}}
+                       // expected-note at -1 {{Access of 'Array' with a tainted offset that may be too large}}
 }
 
 int taintDiagnosticDivZero(int operand) {
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb7..4ff474b2ed40d81 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -169,21 +169,21 @@ void bufferScanfDirect(void)
 {
   int n;
   scanf("%d", &n);
-  Buffer[n] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[n] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void bufferScanfArithmetic1(int x) {
   int n;
   scanf("%d", &n);
   int m = (n - 3);
-  Buffer[m] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[m] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void bufferScanfArithmetic2(int x) {
   int n;
   scanf("%d", &n);
   int m = 100 - (n + 3) * x;
-  Buffer[m] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[m] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void bufferScanfAssignment(int x) {
@@ -192,7 +192,7 @@ void bufferScanfAssignment(int x) {
   int m;
   if (x > 0) {
     m = n;
-    Buffer[m] = 1; // expected-warning {{Out of bound memory access }}
+    Buffer[m] = 1; // expected-warning {{Potential out of bound access }}
   }
 }
 
@@ -203,7 +203,7 @@ void scanfArg(void) {
 
 void bufferGetchar(int x) {
   int m = getchar();
-  Buffer[m] = 1;  //expected-warning {{Out of bound memory access (index is tainted)}}
+  Buffer[m] = 1;  //expected-warning {{Potential out of bound access}}
 }
 
 extern const unsigned short int **__ctype_b_loc (void);
@@ -216,7 +216,7 @@ int isdigitImplFalsePositive(void) {
   // macros in ctypes.h.
   int c = getchar();
   return ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit);
-  //expected-warning at -1 {{Out of bound memory access (index is tainted)}}
+  //expected-warning at -1 {{Potential out of bound access}}
 }
 
 int isdigitSuppressed(void) {
@@ -1113,26 +1113,26 @@ void mySink(int, int, int);
 
 void testConfigurationSources1(void) {
   int x = mySource1();
-  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[x] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationSources2(void) {
   int x;
   mySource2(&x);
-  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[x] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationSources3(void) {
   int x, y;
   myScanf("%d %d", &x, &y);
-  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[y] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationPropagation(void) {
   int x = mySource1();
   int y;
   myPropagator(x, &y);
-  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[y] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationFilter(void) {
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index c907c8f5eeb958b..0aadef88c704cfb 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -46,7 +46,7 @@ void testConfigurationNamespacePropagation1() {
   Buffer[x] = 1; // no-warning
 
   scanf("%d", &x);
-  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[x] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationNamespacePropagation2() {
@@ -54,19 +54,19 @@ void testConfigurationNamespacePropagation2() {
   Buffer[x] = 1; // no-warning
 
   int y = myNamespace::mySource3();
-  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[y] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationNamespacePropagation3() {
   int x = myAnotherNamespace::mySource3();
-  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[x] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationNamespacePropagation4() {
   int x;
   // Configured functions without scope should match for all function.
   myNamespace::myScanf("%d", &x);
-  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[x] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationNamespaceFilter1() {
@@ -78,7 +78,7 @@ void testConfigurationNamespaceFilter1() {
   int y = mySource1();
   if (isOutOfRange2(&y))
     return;
-  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[y] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testConfigurationNamespaceFilter2() {
@@ -128,11 +128,11 @@ void testConfigurationMemberFunc() {
   Buffer[x] = 1; // no-warning
 
   foo.myMemberScanf("%d", &x);
-  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+  Buffer[x] = 1; // expected-warning {{Potential out of bound access }}
 }
 
 void testReadingFromStdin(char **p) {
   int n;
   fscanf(stdin, "%d", &n);
-  Buffer[n] = 1; // expected-warning {{Out of bound memory access (index is tainted)}}
+  Buffer[n] = 1; // expected-warning {{Potential out of bound access }}
 }

>From ffd3edb38149f2f6055a28c9d2b7372742c6b25e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 25 Oct 2023 10:46:40 +0200
Subject: [PATCH 2/4] Fix git-clang-format and rename some variables to
 CamelCase

---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 50 +++++++++----------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 7d97f36e4cffe44..cba04b4d7077e09 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -286,8 +286,8 @@ static std::string getTaintMsg(std::string RegName) {
   return std::string(Buf);
 }
 
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
-                                        const Stmt* LoadS,
+void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
+                                        const Stmt *LoadS,
                                         CheckerContext &C) const {
 
   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
@@ -308,11 +308,11 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
   if (isFromCtypeMacro(LoadS, C.getASTContext()))
     return;
 
-  ProgramStateRef state = C.getState();
-  SValBuilder &svalBuilder = C.getSValBuilder();
+  ProgramStateRef State = C.getState();
+  SValBuilder &SVB = C.getSValBuilder();
 
   const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
-      computeOffset(state, svalBuilder, location);
+      computeOffset(State, SVB, Location);
 
   if (!RawOffset)
     return;
@@ -329,54 +329,50 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
     // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
     // non-symbolic regions (e.g. a field subregion of a symbolic region) in
     // unknown space.
-    auto [state_precedesLowerBound, state_withinLowerBound] =
-        compareValueToThreshold(state, ByteOffset,
-                                svalBuilder.makeZeroArrayIndex(), svalBuilder);
+    auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
+        State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
 
-    if (state_precedesLowerBound && !state_withinLowerBound) {
+    if (PrecedesLowerBound && !WithinLowerBound) {
       // We know that the index definitely precedes the lower bound.
       std::string RegName = getRegionName(Reg);
       std::string Msg = getPrecedesMsg(RegName, ByteOffset);
-      reportOOB(C, state_precedesLowerBound, OOB_Precedes, ByteOffset, RegName,
-                Msg);
+      reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
       return;
     }
 
-    if (state_withinLowerBound)
-      state = state_withinLowerBound;
+    if (WithinLowerBound)
+      State = WithinLowerBound;
   }
 
   // CHECK UPPER BOUND
-  DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
+  DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
   if (auto KnownSize = Size.getAs<NonLoc>()) {
-    auto [state_withinUpperBound, state_exceedsUpperBound] =
-        compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
+    auto [WithinUpperBound, ExceedsUpperBound] =
+        compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
 
-    if (state_exceedsUpperBound) {
-      if (!state_withinUpperBound) {
+    if (ExceedsUpperBound) {
+      if (!WithinUpperBound) {
         // We know that the index definitely exceeds the upper bound.
         std::string RegName = getRegionName(Reg);
         std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset,
-                                        *KnownSize, location);
-        reportOOB(C, state_exceedsUpperBound, OOB_Exceeds, ByteOffset, RegName,
-                  Msg);
+                                        *KnownSize, Location);
+        reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
         return;
       }
-      if (isTainted(state, ByteOffset)) {
+      if (isTainted(State, ByteOffset)) {
         // Both cases are possible, but the index is tainted, so report.
         std::string RegName = getRegionName(Reg);
         std::string Msg = getTaintMsg(RegName);
-        reportOOB(C, state_exceedsUpperBound, OOB_Taint, ByteOffset, RegName,
-                  Msg);
+        reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
         return;
       }
     }
 
-    if (state_withinUpperBound)
-      state = state_withinUpperBound;
+    if (WithinUpperBound)
+      State = WithinUpperBound;
   }
 
-  C.addTransition(state);
+  C.addTransition(State);
 }
 
 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,

>From 64c5ffe04c2506d3650277b09e9a20081f0784dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Tue, 31 Oct 2023 15:35:21 +0100
Subject: [PATCH 3/4] Implement suggestions from @steakhal

---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index cba04b4d7077e09..ffc7236d1e2551a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -177,15 +177,13 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
 }
 
 static std::string getRegionName(const SubRegion *Region) {
-  std::string RegName = Region->getDescriptiveName();
-  if (!RegName.empty())
+  if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
     return RegName;
 
   // Field regions only have descriptive names when their parent has a
   // descriptive name; so we provide a fallback representation for them:
   if (const auto *FR = Region->getAs<FieldRegion>()) {
-    StringRef Name = FR->getDecl()->getName();
-    if (!Name.empty())
+    if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
       return formatv("the field '{0}'", Name);
     return "the unnamed field";
   }
@@ -205,18 +203,17 @@ static std::string getRegionName(const SubRegion *Region) {
 
 static std::optional<int64_t> getConcreteValue(NonLoc SV) {
   if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
-    const llvm::APSInt &IntVal = ConcreteVal->getValue();
-    return IntVal.tryExtValue();
+    return ConcreteVal->getValue().tryExtValue();
   }
   return std::nullopt;
 }
 
-static const char *ShortMsgTemplates[] = {
-    "Out of bound access to memory preceding {0}",
-    "Out of bound access to memory after the end of {0}",
-    "Potential out of bound access to {0} with tainted offset"};
-
 static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
+  static const char *ShortMsgTemplates[] = {
+      "Out of bound access to memory preceding {0}",
+      "Out of bound access to memory after the end of {0}",
+      "Potential out of bound access to {0} with tainted offset"};
+
   return formatv(ShortMsgTemplates[Kind], RegName);
 }
 

>From 979a38721cb3ebeda89b8a1bf6a39aaca2ed32e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Tue, 7 Nov 2023 13:13:40 +0100
Subject: [PATCH 4/4] Make the message for tainted offsets more general

---
 clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 2 +-
 clang/test/Analysis/out-of-bounds-diagnostics.c           | 2 +-
 clang/test/Analysis/taint-diagnostic-visitor.c            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index ffc7236d1e2551a..7352df809f520bc 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -279,7 +279,7 @@ static std::string getTaintMsg(std::string RegName) {
   SmallString<128> Buf;
   llvm::raw_svector_ostream Out(Buf);
   Out << "Access of " << RegName
-      << " with a tainted offset that may be too large";
+      << " with a tainted offset that may be out of bounds";
   return std::string(Buf);
 }
 
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 4f5d0d0bc91c54a..ec2cce19a9e6f65 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -18,7 +18,7 @@ void taintedIndex(void) {
   // expected-note at -2 {{Taint propagated to the 2nd argument}}
   array[index] = 5;
   // expected-warning at -1 {{Potential out of bound access to 'array' with tainted offset}}
-  // expected-note at -2 {{Access of 'array' with a tainted offset that may be too large}}
+  // expected-note at -2 {{Access of 'array' with a tainted offset that may be out of bounds}}
 }
 
 void arrayOverflow(void) {
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 67dc67e627b3b9c..c3dd769088274ad 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -30,7 +30,7 @@ int taintDiagnosticOutOfBound(void) {
   scanf("%d", &index); // expected-note {{Taint originated here}}
                        // expected-note at -1 {{Taint propagated to the 2nd argument}}
   return Array[index]; // expected-warning {{Potential out of bound access to 'Array' with tainted offset}}
-                       // expected-note at -1 {{Access of 'Array' with a tainted offset that may be too large}}
+                       // expected-note at -1 {{Access of 'Array' with a tainted offset that may be out of bounds}}
 }
 
 int taintDiagnosticDivZero(int operand) {



More information about the cfe-commits mailing list