[clang] [analyzer] Mention possibility of underflow in array overflow errors (PR #84201)

via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 11 02:39:07 PDT 2024


=?utf-8?q?DonĂ¡t?= Nagy <donat.nagy at ericsson.com>,NagyDonat
 <donat.nagy at ericsson.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/84201 at github.com>


https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/84201

>From 74c7c04cb456c0a058ca36a2ca4ffa4d70d576e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 6 Mar 2024 18:09:02 +0100
Subject: [PATCH 1/3] [analyzer] Mention possibility of underflow in array
 overflow errors

The checker alpha.security.ArrayBoundV2 performs bounds checking in two
steps: first it checks for underflow, and if it isn't guaranteed then it
assumes that there is no underflow. After this, it checks for overflow,
and if it's guaranteed or the index is tainted then it reports it.

This meant that in situations where overflow and underflow are both
possible (but the index is either tainted or guaranteed to be invalid),
the checker was reporting an overflow error.

This commit modifies the messages printed in these cases to mention the
possibility of an underflow.
---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 37 +++++++-----
 .../test/Analysis/out-of-bounds-diagnostics.c | 56 ++++++++++++++++++-
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 3 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 29eb932584027d..664d26b00a3cab 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -83,6 +83,8 @@ class StateUpdateReporter {
     AssumedUpperBound = UpperBoundVal;
   }
 
+  bool assumedNonNegative() { return AssumedNonNegative; }
+
   const NoteTag *createNoteTag(CheckerContext &C) const;
 
 private:
@@ -402,7 +404,8 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
 }
 
 static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
-                               NonLoc Offset, NonLoc Extent, SVal Location) {
+                               NonLoc Offset, NonLoc Extent, SVal Location,
+                               bool AssumedNonNegative) {
   std::string RegName = getRegionName(Region);
   const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
   assert(EReg && "this checker only handles element access");
@@ -414,6 +417,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
   int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
 
   bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
+  const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
 
   SmallString<256> Buf;
   llvm::raw_svector_ostream Out(Buf);
@@ -421,10 +425,12 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
   if (!ExtentN && !UseByteOffsets)
     Out << "'" << ElemType.getAsString() << "' element in ";
   Out << RegName << " at ";
-  if (OffsetN) {
-    Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN;
+  if (AssumedNonNegative) {
+    Out << "a negative or overflowing " << OffsetOrIndex;
+  } else if (OffsetN) {
+    Out << OffsetOrIndex << " " << *OffsetN;
   } else {
-    Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index");
+    Out << "an overflowing " << OffsetOrIndex;
   }
   if (ExtentN) {
     Out << ", while it holds only ";
@@ -441,17 +447,19 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
       Out << "s";
   }
 
-  return {
-      formatv("Out of bound access to memory after the end of {0}", RegName),
-      std::string(Buf)};
+  return {formatv("Out of bound access to memory {0} {1}",
+                  AssumedNonNegative ? "around" : "after the end of", RegName),
+          std::string(Buf)};
 }
 
-static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
+static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
+                             bool AssumedNonNegative) {
   std::string RegName = getRegionName(Region);
   return {formatv("Potential out of bound access to {0} with tainted {1}",
                   RegName, OffsetName),
-          formatv("Access of {0} with a tainted {1} that may be too large",
-                  RegName, OffsetName)};
+          formatv("Access of {0} with a tainted {1} that may be {2}too large",
+                  RegName, OffsetName,
+                  AssumedNonNegative ? "negative or " : "")};
 }
 
 const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
@@ -603,6 +611,8 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
     auto [WithinUpperBound, ExceedsUpperBound] =
         compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
 
+    bool AssumedNonNegative = SUR.assumedNonNegative();
+
     if (ExceedsUpperBound) {
       // The offset may be invalid (>= Size)...
       if (!WithinUpperBound) {
@@ -615,8 +625,9 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
           return;
         }
 
-        Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
-                                       *KnownSize, Location);
+        Messages Msgs =
+            getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
+                           Location, AssumedNonNegative);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
         return;
       }
@@ -632,7 +643,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
           if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
             OffsetName = "index";
 
-        Messages Msgs = getTaintMsgs(Reg, OffsetName);
+        Messages Msgs = getTaintMsgs(Reg, OffsetName, AssumedNonNegative);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
                   /*IsTaintBug=*/true);
         return;
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 0c3c67c6a546ad..db1bebe10a2f58 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -24,6 +24,33 @@ void taintedIndex(void) {
   scanf("%d", &index);
   // expected-note at -1 {{Taint originated here}}
   // expected-note at -2 {{Taint propagated to the 2nd argument}}
+  TenElements[index] = 5;
+  // expected-warning at -1 {{Potential out of bound access to 'TenElements' with tainted index}}
+  // expected-note at -2 {{Access of 'TenElements' with a tainted index that may be negative or too large}}
+}
+
+void taintedIndexNonneg(void) {
+  int index;
+  scanf("%d", &index);
+  // expected-note at -1 {{Taint originated here}}
+  // expected-note at -2 {{Taint propagated to the 2nd argument}}
+
+  // expected-note at +2 {{Assuming 'index' is >= 0}}
+  // expected-note at +1 {{Taking false branch}}
+  if (index < 0)
+    return;
+
+  TenElements[index] = 5;
+  // expected-warning at -1 {{Potential out of bound access to 'TenElements' with tainted index}}
+  // expected-note at -2 {{Access of 'TenElements' with a tainted index that may be too large}}
+}
+
+void taintedIndexUnsigned(void) {
+  unsigned index;
+  scanf("%u", &index);
+  // expected-note at -1 {{Taint originated here}}
+  // expected-note at -2 {{Taint propagated to the 2nd argument}}
+
   TenElements[index] = 5;
   // expected-warning at -1 {{Potential out of bound access to 'TenElements' with tainted index}}
   // expected-note at -2 {{Access of 'TenElements' with a tainted index that may be too large}}
@@ -59,7 +86,7 @@ void taintedOffset(void) {
   int *p = TenElements + index;
   p[0] = 5;
   // expected-warning at -1 {{Potential out of bound access to 'TenElements' with tainted offset}}
-  // expected-note at -2 {{Access of 'TenElements' with a tainted offset that may be too large}}
+  // expected-note at -2 {{Access of 'TenElements' with a tainted offset that may be negative or too large}}
 }
 
 void arrayOverflow(void) {
@@ -109,6 +136,33 @@ int *potentialAfterTheEndPtr(int idx) {
   // &TenElements[idx].
 }
 
+int overflowOrUnderflow(int arg) {
+  // expected-note at +2 {{Assuming 'arg' is < 0}}
+  // expected-note at +1 {{Taking false branch}}
+  if (arg >= 0)
+    return 0;
+
+  return TenElements[arg - 1];
+  // expected-warning at -1 {{Out of bound access to memory around 'TenElements'}}
+  // expected-note at -2 {{Access of 'TenElements' at a negative or overflowing index, while it holds only 10 'int' elements}}
+}
+
+char TwoElements[2] = {11, 22};
+char overflowOrUnderflowConcrete(int arg) {
+  // expected-note at +6 {{Assuming 'arg' is < 3}}
+  // expected-note at +5 {{Left side of '||' is false}}
+  // expected-note at +4 {{Assuming 'arg' is not equal to 0}}
+  // expected-note at +3 {{Left side of '||' is false}}
+  // expected-note at +2 {{Assuming 'arg' is not equal to 1}}
+  // expected-note at +1 {{Taking false branch}}
+  if (arg >= 3 || arg == 0 || arg == 1)
+    return 0;
+
+  return TwoElements[arg];
+  // expected-warning at -1 {{Out of bound access to memory around 'TwoElements'}}
+  // expected-note at -2 {{Access of 'TwoElements' at a negative or overflowing index, while it holds only 2 'char' elements}}
+}
+
 int scalar;
 int scalarOverflow(void) {
   return (&scalar)[1];
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c
index 020e9579ac535c..2ba7d9938fc3d8 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -30,7 +30,7 @@ int taintDiagnosticOutOfBound(void) {
   scanf("%d", &index); // expected-note {{Taint originated here}}
                        // expected-note at -1 {{Taint propagated to the 2nd argument}}
   return Array[index]; // expected-warning {{Potential out of bound access to 'Array' with tainted index}}
-                       // expected-note at -1 {{Access of 'Array' with a tainted index that may be too large}}
+                       // expected-note at -1 {{Access of 'Array' with a tainted index}}
 }
 
 int taintDiagnosticDivZero(int operand) {

>From 1d5662fe14e7231f3c105b2190d699dc40804757 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Thu, 7 Mar 2024 17:26:22 +0100
Subject: [PATCH 2/3] Fix a misleading variable name

---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 664d26b00a3cab..f82288f1099e88 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -405,7 +405,7 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
 
 static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
                                NonLoc Offset, NonLoc Extent, SVal Location,
-                               bool AssumedNonNegative) {
+                               bool AlsoMentionUnderflow) {
   std::string RegName = getRegionName(Region);
   const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
   assert(EReg && "this checker only handles element access");
@@ -425,7 +425,7 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
   if (!ExtentN && !UseByteOffsets)
     Out << "'" << ElemType.getAsString() << "' element in ";
   Out << RegName << " at ";
-  if (AssumedNonNegative) {
+  if (AlsoMentionUnderflow) {
     Out << "a negative or overflowing " << OffsetOrIndex;
   } else if (OffsetN) {
     Out << OffsetOrIndex << " " << *OffsetN;
@@ -448,18 +448,19 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
   }
 
   return {formatv("Out of bound access to memory {0} {1}",
-                  AssumedNonNegative ? "around" : "after the end of", RegName),
+                  AlsoMentionUnderflow ? "around" : "after the end of",
+                  RegName),
           std::string(Buf)};
 }
 
 static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
-                             bool AssumedNonNegative) {
+                             bool AlsoMentionUnderflow) {
   std::string RegName = getRegionName(Region);
   return {formatv("Potential out of bound access to {0} with tainted {1}",
                   RegName, OffsetName),
           formatv("Access of {0} with a tainted {1} that may be {2}too large",
                   RegName, OffsetName,
-                  AssumedNonNegative ? "negative or " : "")};
+                  AlsoMentionUnderflow ? "negative or " : "")};
 }
 
 const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
@@ -608,11 +609,16 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
   // CHECK UPPER BOUND
   DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
   if (auto KnownSize = Size.getAs<NonLoc>()) {
+    // In a situation where both overflow and overflow are possible (but the
+    // index is either tainted or known to be invalid), the logic of this
+    // checker will first assume that the offset is non-negative, and then
+    // (with this additional assumption) it will detect an overflow error.
+    // In this situation the warning message should mention both possibilities.
+    bool AlsoMentionUnderflow = SUR.assumedNonNegative();
+
     auto [WithinUpperBound, ExceedsUpperBound] =
         compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
 
-    bool AssumedNonNegative = SUR.assumedNonNegative();
-
     if (ExceedsUpperBound) {
       // The offset may be invalid (>= Size)...
       if (!WithinUpperBound) {
@@ -627,7 +633,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
 
         Messages Msgs =
             getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
-                           Location, AssumedNonNegative);
+                           Location, AlsoMentionUnderflow);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
         return;
       }
@@ -643,7 +649,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
           if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
             OffsetName = "index";
 
-        Messages Msgs = getTaintMsgs(Reg, OffsetName, AssumedNonNegative);
+        Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
         reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
                   /*IsTaintBug=*/true);
         return;

>From 192cdcfcf1d75f48fb6cafeda81c6e45e11f98ff Mon Sep 17 00:00:00 2001
From: NagyDonat <donat.nagy at ericsson.com>
Date: Mon, 11 Mar 2024 10:39:00 +0100
Subject: [PATCH 3/3] Tweak expected-note handling in a test

Co-authored-by: Balazs Benics <benicsbalazs at gmail.com>
---
 clang/test/Analysis/out-of-bounds-diagnostics.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index db1bebe10a2f58..92f983d8b15612 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -149,13 +149,13 @@ int overflowOrUnderflow(int arg) {
 
 char TwoElements[2] = {11, 22};
 char overflowOrUnderflowConcrete(int arg) {
-  // expected-note at +6 {{Assuming 'arg' is < 3}}
-  // expected-note at +5 {{Left side of '||' is false}}
-  // expected-note at +4 {{Assuming 'arg' is not equal to 0}}
-  // expected-note at +3 {{Left side of '||' is false}}
-  // expected-note at +2 {{Assuming 'arg' is not equal to 1}}
-  // expected-note at +1 {{Taking false branch}}
-  if (arg >= 3 || arg == 0 || arg == 1)
+  // expected-note@#cond {{Assuming 'arg' is < 3}}
+  // expected-note@#cond {{Left side of '||' is false}}
+  // expected-note@#cond {{Assuming 'arg' is not equal to 0}}
+  // expected-note@#cond {{Left side of '||' is false}}
+  // expected-note@#cond {{Assuming 'arg' is not equal to 1}}
+  // expected-note@#cond {{Taking false branch}}
+  if (arg >= 3 || arg == 0 || arg == 1) // #cond
     return 0;
 
   return TwoElements[arg];



More information about the cfe-commits mailing list