[clang] 8c22cbe - [analyzer] ArrayBoundCheckerV2: suppress false positives from ctype macros

Donát Nagy via cfe-commits cfe-commits at lists.llvm.org
Wed May 3 09:55:12 PDT 2023


Author: Donát Nagy
Date: 2023-05-03T18:52:27+02:00
New Revision: 8c22cbea87beb74da3dc5891c40cdf574cd5fe56

URL: https://github.com/llvm/llvm-project/commit/8c22cbea87beb74da3dc5891c40cdf574cd5fe56
DIFF: https://github.com/llvm/llvm-project/commit/8c22cbea87beb74da3dc5891c40cdf574cd5fe56.diff

LOG: [analyzer] ArrayBoundCheckerV2: suppress false positives from ctype macros

The checker alpha.security.ArrayBoundV2 created bug reports in
situations when the (tainted) result of fgetc() or getchar() was passed
to one of the isXXXXX() macros from ctype.h.

This is a common input handling pattern (within the limited toolbox of
the C language) and several open source projects contained code where it
led to false positive reports; so this commit suppresses ArrayBoundV2
reports generated within the isXXXXX() macros.

Note that here even true positive reports would be difficult to
understand, as they'd refer to the implementation details of these
macros.

Differential Revision: https://reviews.llvm.org/D149460

Added: 
    

Modified: 
    clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
    clang/test/Analysis/taint-generic.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index a476613b6dcc7..6d0cc505756b8 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -42,8 +42,10 @@ class ArrayBoundCheckerV2 :
   void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
                       SVal TaintedSVal) const;
 
+  static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
+
 public:
-  void checkLocation(SVal l, bool isLoad, const Stmt*S,
+  void checkLocation(SVal l, bool isLoad, const Stmt *S,
                      CheckerContext &C) const;
 };
 
@@ -155,6 +157,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
   // memory access is within the extent of the base region.  Since we
   // have some flexibility in defining the base region, we can achieve
   // various levels of conservatism in our buffer overflow checking.
+
+  // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
+  //   #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
+  // and incomplete analysis of these leads to false positives. As even
+  // accurate reports would be confusing for the users, just disable reports
+  // from these macros:
+  if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
+    return;
+
   ProgramStateRef state = checkerContext.getState();
 
   SValBuilder &svalBuilder = checkerContext.getSValBuilder();
@@ -267,6 +278,25 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
   checkerContext.emitReport(std::move(BR));
 }
 
+bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
+  SourceLocation Loc = S->getBeginLoc();
+  if (!Loc.isMacroID())
+    return false;
+
+  StringRef MacroName = Lexer::getImmediateMacroName(
+      Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
+
+  if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
+    return false;
+
+  return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
+          (MacroName == "isblank") || (MacroName == "isdigit") ||
+          (MacroName == "isgraph") || (MacroName == "islower") ||
+          (MacroName == "isnctrl") || (MacroName == "isprint") ||
+          (MacroName == "ispunct") || (MacroName == "isspace") ||
+          (MacroName == "isupper") || (MacroName == "isxdigit"));
+}
+
 #ifndef NDEBUG
 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
   dumpToStream(llvm::errs());

diff  --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index 626e01e39d158..62e1f570b6622 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -156,6 +156,28 @@ void bufferGetchar(int x) {
   Buffer[m] = 1;  //expected-warning {{Out of bound memory access (index is tainted)}}
 }
 
+extern const unsigned short int **__ctype_b_loc (void);
+enum { _ISdigit = 2048 };
+# define isdigit(c) ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit)
+
+int isdigitImplFalsePositive(void) {
+  // If this code no longer produces a bug report, then consider removing the
+  // special case that disables buffer overflow reports coming from the isXXXXX
+  // macros in ctypes.h.
+  int c = getchar();
+  return ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit);
+  //expected-warning at -1 {{Out of bound memory access (index is tainted)}}
+}
+
+int isdigitSuppressed(void) {
+  // Same code as above, but reports are suppressed based on macro name:
+  int c = getchar();
+  return isdigit(c); //no-warning
+}
+
+// Some later tests use isdigit as a function, so we need to undef it:
+#undef isdigit
+
 void testUncontrolledFormatString(char **p) {
   char s[80];
   fscanf(stdin, "%s", s);


        


More information about the cfe-commits mailing list