[clang] [clang][analyzer] Improve checker 'unix.cstring.NotNullTerminated' (PR #149106)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 16 07:20:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-static-analyzer-1
Author: Balázs Kéri (balazske)
<details>
<summary>Changes</summary>
Check for non-presence of terminating zero in simple cases.
---
Full diff: https://github.com/llvm/llvm-project/pull/149106.diff
4 Files Affected:
- (modified) clang/docs/analyzer/checkers.rst (+16-4)
- (modified) clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (+87-7)
- (modified) clang/test/Analysis/Inputs/system-header-simulator.h (+2)
- (added) clang/test/Analysis/cstring-notnullterm.c (+158)
``````````diff
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 26c5028e04955..3d6e0e7d73756 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2105,10 +2105,11 @@ unix.cstring.NotNullTerminated (C)
Check for arguments which are not null-terminated strings;
applies to the ``strlen``, ``strcpy``, ``strcat``, ``strcmp`` family of functions.
-Only very fundamental cases are detected where the passed memory block is
-absolutely different from a null-terminated string. This checker does not
-find if a memory buffer is passed where the terminating zero character
-is missing.
+The checker can detect if the passed memory block is not a string object at all,
+like address of a label or function. Additionally it can detect simple cases
+when the terminating zero is missing, for example if the data was initialized
+as array without terminating zero, or the terminating zero was overwritten by
+an assignment (with a value that is provably not zero).
.. code-block:: c
@@ -2121,6 +2122,17 @@ is missing.
int l = strlen((char *)&&label); // warn
}
+ int test3() {
+ char buf[4] = {1, 2, 3, 4};
+ return strlen(buf); // warn
+ }
+
+ int test4() {
+ char buf[] = "abcd";
+ buf[4] = 'e';
+ return strlen(buf); // warn
+ }
+
.. _unix-cstring-NullArg:
unix.cstring.NullArg (C)
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 31cb150892a5d..4f14853931c54 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -78,12 +78,9 @@ static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
: Ctx.WideCharTy);
}
-class CStringChecker : public Checker< eval::Call,
- check::PreStmt<DeclStmt>,
- check::LiveSymbols,
- check::DeadSymbols,
- check::RegionChanges
- > {
+class CStringChecker
+ : public Checker<eval::Call, check::PreStmt<DeclStmt>, check::LiveSymbols,
+ check::DeadSymbols, check::RegionChanges> {
mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
@@ -324,11 +321,14 @@ class CStringChecker : public Checker< eval::Call,
SizeArgExpr Size, AnyArgExpr First,
AnyArgExpr Second,
CharKind CK = CharKind::Regular) const;
+ // Check for presence of terminating zero in a string argument.
+ ProgramStateRef checkNullTerminated(CheckerContext &C, ProgramStateRef State,
+ AnyArgExpr Arg, SVal ArgVal) const;
+
void emitOverlapBug(CheckerContext &C,
ProgramStateRef state,
const Stmt *First,
const Stmt *Second) const;
-
void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
StringRef WarningMsg) const;
void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
@@ -959,6 +959,66 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
return state;
}
+ProgramStateRef CStringChecker::checkNullTerminated(CheckerContext &C,
+ ProgramStateRef State,
+ AnyArgExpr Arg,
+ SVal ArgVal) const {
+ if (!State)
+ return nullptr;
+
+ if (!Filter.CheckCStringNotNullTerm)
+ return State;
+
+ SValBuilder &SVB = C.getSValBuilder();
+
+ auto TryGetTypedValueR = [](const MemRegion *R) -> const TypedValueRegion * {
+ if (!R)
+ return nullptr;
+ return R->StripCasts()->getAs<TypedValueRegion>();
+ };
+
+ const TypedValueRegion *StrReg = TryGetTypedValueR(ArgVal.getAsRegion());
+ if (!StrReg)
+ return State;
+ int Offset = 0;
+ if (const auto *ElemReg = StrReg->getAs<ElementRegion>()) {
+ RegionRawOffset ROffset = ElemReg->getAsArrayOffset();
+ StrReg = TryGetTypedValueR(ROffset.getRegion());
+ if (!StrReg)
+ return State;
+ Offset = ROffset.getOffset().getQuantity();
+ }
+
+ DefinedOrUnknownSVal Extent = getDynamicExtent(State, StrReg, SVB);
+ if (Extent.isUnknown())
+ return State;
+ const llvm::APSInt *KnownExtent = SVB.getKnownValue(State, Extent);
+ if (!KnownExtent)
+ return State;
+ MemRegionManager &RegionM = SVB.getRegionManager();
+ int RegionExtent = KnownExtent->getExtValue();
+ if (Offset >= RegionExtent)
+ return State;
+ for (int I = Offset; I < RegionExtent; ++I) {
+ const ElementRegion *ElemR = RegionM.getElementRegion(C.getASTContext().CharTy, SVB.makeArrayIndex(I), StrReg, C.getASTContext());
+ SVal ElemVal = State->getSValAsScalarOrLoc(ElemR);
+ if (!State->isNonNull(ElemVal).isConstrainedTrue())
+ // We have here a lower bound for the string length.
+ // Try to update the CStringLength value?
+ return State;
+ }
+
+ SmallString<80> Buf;
+ llvm::raw_svector_ostream OS(Buf);
+ assert(CurrentFunctionDescription);
+ OS << "Terminating zero missing from string passed as "
+ << (Arg.ArgumentIndex + 1) << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1)
+ << " argument to " << CurrentFunctionDescription;
+
+ emitNotCStringBug(C, State, Arg.Expression, OS.str());
+ return nullptr;
+}
+
ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
const MemRegion *MR,
SVal strLength) {
@@ -1718,6 +1778,9 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C,
SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
state = checkNonNull(C, state, Arg, ArgVal);
+ if (!IsStrnlen)
+ state = checkNullTerminated(C, state, Arg, ArgVal);
+
if (!state)
return;
@@ -1882,6 +1945,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
SVal DstVal = state->getSVal(Dst.Expression, LCtx);
state = checkNonNull(C, state, Dst, DstVal);
+ // Look for terminating zero.
+ if (!IsBounded || appendK != ConcatFnKind::none)
+ state = checkNullTerminated(C, state, Dst, DstVal);
if (!state)
return;
@@ -1889,6 +1955,9 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
state = checkNonNull(C, state, srcExpr, srcVal);
+ // Look for terminating zero.
+ if (!IsBounded)
+ state = checkNullTerminated(C, state, srcExpr, srcVal);
if (!state)
return;
@@ -2340,6 +2409,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
AnyArgExpr Left = {Call.getArgExpr(0), 0};
SVal LeftVal = state->getSVal(Left.Expression, LCtx);
state = checkNonNull(C, state, Left, LeftVal);
+ // Look for terminating zero.
+ if (!IsBounded)
+ state = checkNullTerminated(C, state, Left, LeftVal);
if (!state)
return;
@@ -2347,6 +2419,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
AnyArgExpr Right = {Call.getArgExpr(1), 1};
SVal RightVal = state->getSVal(Right.Expression, LCtx);
state = checkNonNull(C, state, Right, RightVal);
+ // Look for terminating zero.
+ if (!IsBounded)
+ state = checkNullTerminated(C, state, Right, RightVal);
if (!state)
return;
@@ -2478,6 +2553,8 @@ void CStringChecker::evalStrsep(CheckerContext &C,
// a null string).
SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
+ // Look for terminating zero.
+ State = checkNullTerminated(C, State, SearchStrPtr, SearchStrVal);
if (!State)
return;
@@ -2485,6 +2562,8 @@ void CStringChecker::evalStrsep(CheckerContext &C,
AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
State = checkNonNull(C, State, DelimStr, DelimStrVal);
+ // Look for terminating zero.
+ State = checkNullTerminated(C, State, DelimStr, DelimStrVal);
if (!State)
return;
@@ -2675,6 +2754,7 @@ void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
// This is an invalid call, let's just ignore it.
return;
}
+ // FIXME: Why not check for null pointer (and null-terminated string)?
const auto AllArguments =
llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h
index fadc09f65d536..3a0cf75cfb9ff 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator.h
@@ -80,6 +80,8 @@ size_t strlen(const char *);
char *strcpy(char *restrict, const char *restrict);
char *strncpy(char *restrict dst, const char *restrict src, size_t n);
+char *strcat(char *restrict, const char *restrict);
+char *strncat(char *restrict, const char *restrict, size_t);
char *strsep(char **restrict stringp, const char *restrict delim);
void *memcpy(void *restrict dst, const void *restrict src, size_t n);
void *memset(void *s, int c, size_t n);
diff --git a/clang/test/Analysis/cstring-notnullterm.c b/clang/test/Analysis/cstring-notnullterm.c
new file mode 100644
index 0000000000000..8d586444fc835
--- /dev/null
+++ b/clang/test/Analysis/cstring-notnullterm.c
@@ -0,0 +1,158 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=unix.cstring.NotNullTerminated -verify %s
+
+#include "Inputs/system-header-simulator.h"
+
+extern void *malloc(size_t);
+
+size_t test_addr_fn() {
+ return strlen((char *)&malloc); // expected-warning{{Argument to string length function is the address of the function 'malloc', which is not a null-terminated string}}
+}
+
+size_t test_addr_label() {
+lab:
+ return strlen((char *)&&lab); // expected-warning{{Argument to string length function is the address of the label 'lab', which is not a null-terminated string}}
+}
+
+size_t test_init_compound(int i) {
+ char src1[6] = {1,2,3,4,5,6};
+ char src2[6] = {1,2,3,0,5,6};
+ switch (i) {
+ case 1:
+ return strlen(src1); // expected-warning{{Terminating zero missing from string passed as 1st argument to string length function}}
+ case 2:
+ return strlen(src1 + 1); // expected-warning{{Terminating zero missing from string}}
+ case 3:
+ return strlen(src2);
+ case 4:
+ return strlen(src2 + 4); // expected-warning{{Terminating zero missing from string}}
+ case 5:
+ return strlen(src2 + 3);
+ }
+ src1[5] = 0;
+ return strlen(src1);
+}
+
+size_t test_init_literal(int i) {
+ char src1[] = "abcdef";
+ int l = strlen(src1);
+ src1[6] = '.';
+ src1[3] = 0;
+ switch (i) {
+ case 1:
+ return strlen(src1);
+ case 2:
+ return strlen(src1 + 4); // expected-warning{{Terminating zero missing from string}}
+ }
+ return l;
+}
+
+size_t test_init_assign(int i, char a) {
+ char src[6];
+ src[1] = '1';
+ src[2] = '2';
+ src[4] = '4';
+ src[5] = '5';
+
+ switch (i) {
+ case 0:
+ return strlen(src);
+ case 1:
+ return strlen(src + 1);
+ case 2:
+ return strlen(src + 2);
+ case 3:
+ return strlen(src + 3);
+ case 4:
+ return strlen(src + 4); // expected-warning{{Terminating zero missing from string}}
+ }
+ src[5] = a;
+ size_t l = strlen(src + 4);
+ src[5] = 0;
+ l += strlen(src + 4);
+ src[5] = '5';
+ return l + strlen(src + 4); // expected-warning{{Terminating zero missing from string}}
+}
+
+size_t test_assign1() {
+ char str1[5] = {'0','1','2','3','4'};
+ char str2[5];
+ str2[0] = str1[0];
+ str2[1] = str1[1];
+ str2[4] = str1[4];
+ size_t l = strlen(str2);
+ return l + strlen(str2 + 4); // expected-warning{{Terminating zero missing from string}}
+}
+
+size_t test_assign2() {
+ char str1[5] = {1,2,3,4,5};
+ char str2[5];
+ str2[0] = str1[0];
+ str2[4] = str2[0];
+ return strlen(str2 + 4); // expected-warning{{Terminating zero missing from string}}
+}
+
+void test_ignore(char *dst) {
+ char str1[5] = {1,2,3,4,5};
+ strncpy(dst, str1, 5);
+ strcpy(dst, str1); // expected-warning{{Terminating zero missing from string}}
+}
+
+size_t test_malloc() {
+ char *buf = (char *)malloc(4);
+ if (!buf)
+ return 0;
+ buf[3] = 'a';
+ return strlen(buf);
+}
+
+extern void f_ext(char *);
+char *g_buf = 0;
+
+size_t test_escape1() {
+ char buf[4] = {1,2,3,4};
+ f_ext(buf);
+ return strlen(buf);
+}
+
+size_t test_escape2(char *x) {
+ char buf[4] = {1,2,3,4};
+ g_buf = buf;
+ f_ext(x);
+ return strlen(buf);
+}
+
+size_t test_escape3() {
+ char buf[4] = {1,2,3,4};
+ f_ext(buf + 3);
+ return strlen(buf);
+}
+
+void test_str_fn(int i, char *dst) {
+ char buf[] = {1, 2, 3};
+ switch (i) {
+ case 1:
+ strcpy(buf, "aa"); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 2:
+ strcpy(dst, buf); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 3:
+ strncpy(buf, "aa", 3);
+ break;
+ case 4:
+ strncpy(dst, buf, 3);
+ break;
+ case 5:
+ strcat(buf, "aa"); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 6:
+ strcat(dst, buf); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 7:
+ strncat(buf, "aa", 3); // expected-warning{{Terminating zero missing from string}}
+ break;
+ case 8:
+ strncat(dst, buf, 3);
+ break;
+ }
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/149106
More information about the cfe-commits
mailing list