[clang] 8c22cbe - [analyzer] ArrayBoundCheckerV2: suppress false positives from ctype macros
Donát Nagy via cfe-commits
cfe-commits at lists.llvm.org
Wed May 3 09:55:12 PDT 2023
Author: Donát Nagy
Date: 2023-05-03T18:52:27+02:00
New Revision: 8c22cbea87beb74da3dc5891c40cdf574cd5fe56
URL: https://github.com/llvm/llvm-project/commit/8c22cbea87beb74da3dc5891c40cdf574cd5fe56
DIFF: https://github.com/llvm/llvm-project/commit/8c22cbea87beb74da3dc5891c40cdf574cd5fe56.diff
LOG: [analyzer] ArrayBoundCheckerV2: suppress false positives from ctype macros
The checker alpha.security.ArrayBoundV2 created bug reports in
situations when the (tainted) result of fgetc() or getchar() was passed
to one of the isXXXXX() macros from ctype.h.
This is a common input handling pattern (within the limited toolbox of
the C language) and several open source projects contained code where it
led to false positive reports; so this commit suppresses ArrayBoundV2
reports generated within the isXXXXX() macros.
Note that here even true positive reports would be difficult to
understand, as they'd refer to the implementation details of these
macros.
Differential Revision: https://reviews.llvm.org/D149460
Added:
Modified:
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
clang/test/Analysis/taint-generic.c
Removed:
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index a476613b6dcc7..6d0cc505756b8 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -42,8 +42,10 @@ class ArrayBoundCheckerV2 :
void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
SVal TaintedSVal) const;
+ static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
+
public:
- void checkLocation(SVal l, bool isLoad, const Stmt*S,
+ void checkLocation(SVal l, bool isLoad, const Stmt *S,
CheckerContext &C) const;
};
@@ -155,6 +157,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
// memory access is within the extent of the base region. Since we
// have some flexibility in defining the base region, we can achieve
// various levels of conservatism in our buffer overflow checking.
+
+ // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
+ // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
+ // and incomplete analysis of these leads to false positives. As even
+ // accurate reports would be confusing for the users, just disable reports
+ // from these macros:
+ if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
+ return;
+
ProgramStateRef state = checkerContext.getState();
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
@@ -267,6 +278,25 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
checkerContext.emitReport(std::move(BR));
}
+bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
+ SourceLocation Loc = S->getBeginLoc();
+ if (!Loc.isMacroID())
+ return false;
+
+ StringRef MacroName = Lexer::getImmediateMacroName(
+ Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
+
+ if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
+ return false;
+
+ return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
+ (MacroName == "isblank") || (MacroName == "isdigit") ||
+ (MacroName == "isgraph") || (MacroName == "islower") ||
+ (MacroName == "isnctrl") || (MacroName == "isprint") ||
+ (MacroName == "ispunct") || (MacroName == "isspace") ||
+ (MacroName == "isupper") || (MacroName == "isxdigit"));
+}
+
#ifndef NDEBUG
LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
dumpToStream(llvm::errs());
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index 626e01e39d158..62e1f570b6622 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -156,6 +156,28 @@ void bufferGetchar(int x) {
Buffer[m] = 1; //expected-warning {{Out of bound memory access (index is tainted)}}
}
+extern const unsigned short int **__ctype_b_loc (void);
+enum { _ISdigit = 2048 };
+# define isdigit(c) ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit)
+
+int isdigitImplFalsePositive(void) {
+ // If this code no longer produces a bug report, then consider removing the
+ // special case that disables buffer overflow reports coming from the isXXXXX
+ // macros in ctypes.h.
+ int c = getchar();
+ return ((*__ctype_b_loc ())[(int) (c)] & (unsigned short int) _ISdigit);
+ //expected-warning at -1 {{Out of bound memory access (index is tainted)}}
+}
+
+int isdigitSuppressed(void) {
+ // Same code as above, but reports are suppressed based on macro name:
+ int c = getchar();
+ return isdigit(c); //no-warning
+}
+
+// Some later tests use isdigit as a function, so we need to undef it:
+#undef isdigit
+
void testUncontrolledFormatString(char **p) {
char s[80];
fscanf(stdin, "%s", s);
More information about the cfe-commits
mailing list