[clang] [clang][analyzer] Support strlen with offset to string literal in CStringChecker (PR #159795)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 19 08:38:25 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-static-analyzer-1
Author: Balázs Kéri (balazske)
<details>
<summary>Changes</summary>
Handle cases like `strlen(string_constant + 3)` in `CStringChecker` by returning the original string length minus offset.
---
Full diff: https://github.com/llvm/llvm-project/pull/159795.diff
2 Files Affected:
- (modified) clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (+51-24)
- (modified) clang/test/Analysis/string.c (+39)
``````````diff
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 36f316df0c3ff..dd35173a6b109 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -251,6 +251,8 @@ class CStringChecker
const Expr *Ex,
const MemRegion *MR,
bool hypothetical);
+ static const StringLiteral *getStringLiteralFromRegion(const MemRegion *MR);
+
SVal getCStringLength(CheckerContext &C,
ProgramStateRef &state,
const Expr *Ex,
@@ -979,6 +981,22 @@ SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
return strLength;
}
+const StringLiteral *
+CStringChecker::getStringLiteralFromRegion(const MemRegion *MR) {
+ switch (MR->getKind()) {
+ case MemRegion::StringRegionKind:
+ return cast<StringRegion>(MR)->getStringLiteral();
+ case MemRegion::NonParamVarRegionKind:
+ if (const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
+ Decl->getType().isConstQualified() && Decl->hasGlobalStorage())
+ return dyn_cast_or_null<StringLiteral>(Decl->getInit());
+ return nullptr;
+ default:
+ return nullptr;
+ }
+ return nullptr;
+}
+
SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
const Expr *Ex, SVal Buf,
bool hypothetical) const {
@@ -1009,30 +1027,19 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
// its length. For anything we can't figure out, just return UnknownVal.
MR = MR->StripCasts();
- switch (MR->getKind()) {
- case MemRegion::StringRegionKind: {
- // Modifying the contents of string regions is undefined [C99 6.4.5p6],
- // so we can assume that the byte length is the correct C string length.
- SValBuilder &svalBuilder = C.getSValBuilder();
- QualType sizeTy = svalBuilder.getContext().getSizeType();
- const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
- return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
- }
- case MemRegion::NonParamVarRegionKind: {
+ if (const StringLiteral *StrLit = getStringLiteralFromRegion(MR)) {
// If we have a global constant with a string literal initializer,
// compute the initializer's length.
- const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
- if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
- if (const Expr *Init = Decl->getInit()) {
- if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
- SValBuilder &SvalBuilder = C.getSValBuilder();
- QualType SizeTy = SvalBuilder.getContext().getSizeType();
- return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
- }
- }
- }
- [[fallthrough]];
+ // Modifying the contents of string regions is undefined [C99 6.4.5p6],
+ // so we can assume that the byte length is the correct C string length.
+ // FIXME: Embedded null characters are not handled.
+ SValBuilder &SVB = C.getSValBuilder();
+ return SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType());
}
+
+ switch (MR->getKind()) {
+ case MemRegion::StringRegionKind:
+ case MemRegion::NonParamVarRegionKind:
case MemRegion::SymbolicRegionKind:
case MemRegion::AllocaRegionKind:
case MemRegion::ParamVarRegionKind:
@@ -1042,10 +1049,29 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
case MemRegion::CompoundLiteralRegionKind:
// FIXME: Can we track this? Is it necessary?
return UnknownVal();
- case MemRegion::ElementRegionKind:
- // FIXME: How can we handle this? It's not good enough to subtract the
- // offset from the base string length; consider "123\x00567" and &a[5].
+ case MemRegion::ElementRegionKind: {
+ // If an offset into the string literal is used, use the original length
+ // minus the offset.
+ // FIXME: Embedded null characters are not handled.
+ const ElementRegion *ER = cast<ElementRegion>(MR);
+ const SubRegion *SuperReg =
+ cast<SubRegion>(ER->getSuperRegion()->StripCasts());
+ const StringLiteral *StrLit = getStringLiteralFromRegion(SuperReg);
+ if (!StrLit)
+ return UnknownVal();
+ SValBuilder &SVB = C.getSValBuilder();
+ NonLoc Idx = ER->getIndex();
+ NonLoc LengthVal =
+ SVB.makeIntVal(StrLit->getLength(), SVB.getContext().getSizeType())
+ .castAs<NonLoc>();
+ if (state->assume(SVB.evalBinOpNN(state, BO_LE, Idx, LengthVal,
+ SVB.getConditionType())
+ .castAs<DefinedOrUnknownSVal>(),
+ true))
+ return SVB.evalBinOp(state, BO_Sub, LengthVal, Idx,
+ SVB.getContext().getSizeType());
return UnknownVal();
+ }
default:
// Other regions (mostly non-data) can't have a reliable C string length.
// In this case, an error is emitted and UndefinedVal is returned.
@@ -1070,6 +1096,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
ProgramStateRef &state, const Expr *expr, SVal val) const {
+ // FIXME: use getStringLiteralFromRegion (and remove unused parameters)?
// Get the memory region pointed to by the val.
const MemRegion *bufRegion = val.getAsRegion();
diff --git a/clang/test/Analysis/string.c b/clang/test/Analysis/string.c
index cdd36275568e3..c0216d758c377 100644
--- a/clang/test/Analysis/string.c
+++ b/clang/test/Analysis/string.c
@@ -82,16 +82,21 @@ size_t strlen(const char *s);
void strlen_constant0(void) {
clang_analyzer_eval(strlen("123") == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(&("123"[1])) == 2); // expected-warning{{TRUE}}
}
void strlen_constant1(void) {
const char *a = "123";
clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(a + 3) == 0); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(a + 4)); // expected-warning{{UNKNOWN}}
}
void strlen_constant2(char x) {
char a[] = "123";
clang_analyzer_eval(strlen(a) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(a + 1) == 2); // expected-warning{{UNKNOWN}}
a[0] = x;
clang_analyzer_eval(strlen(a) == 3); // expected-warning{{UNKNOWN}}
@@ -105,10 +110,12 @@ char global_non_const_arr[] = "op";
void strlen_global_constant_ptr(void) {
clang_analyzer_eval(strlen(global_str_ptr) == 4); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(global_str_ptr + 1) == 3); // expected-warning{{TRUE}}
}
void strlen_global_constant_arr(void) {
clang_analyzer_eval(strlen(global_str_arr) == 4); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strlen(global_str_arr + 1) == 3); // expected-warning{{TRUE}}
}
void strlen_global_non_const_ptr(void) {
@@ -235,6 +242,19 @@ void testStrlenCallee(void) {
clang_analyzer_eval(lenBefore == lenAfter); // expected-warning{{UNKNOWN}}
}
+void strlen_symbolic_offset(unsigned x) {
+ const char *str = "abcd";
+ if (x > 3)
+ return;
+ // FIXME: these should be known
+ clang_analyzer_eval(strlen(str + x) > 1); // expected-warning{{UNKNOWN}}
+ clang_analyzer_eval(strlen(str + x) > 2); // expected-warning{{UNKNOWN}}
+ clang_analyzer_eval(strlen(str + x) <= 4); // expected-warning{{UNKNOWN}}
+ clang_analyzer_eval(strlen(str + x) <= 3); // expected-warning{{UNKNOWN}}
+ if (x != 1)
+ return;
+ clang_analyzer_eval(strlen(str + x) == 3); // expected-warning{{TRUE}}
+}
//===----------------------------------------------------------------------===
// strnlen()
@@ -244,32 +264,38 @@ size_t strnlen(const char *s, size_t maxlen);
void strnlen_constant0(void) {
clang_analyzer_eval(strnlen("123", 10) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strnlen(&("123"[1]), 10) == 2); // expected-warning{{TRUE}}
}
void strnlen_constant1(void) {
const char *a = "123";
clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{TRUE}}
}
void strnlen_constant2(char x) {
char a[] = "123";
clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strnlen(a + 1, 10) == 2); // expected-warning{{UNKNOWN}}
a[0] = x;
clang_analyzer_eval(strnlen(a, 10) == 3); // expected-warning{{UNKNOWN}}
}
void strnlen_constant4(void) {
clang_analyzer_eval(strnlen("123456", 3) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strnlen(&("123456"[1]), 3) == 3); // expected-warning{{TRUE}}
}
void strnlen_constant5(void) {
const char *a = "123456";
clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{TRUE}}
}
void strnlen_constant6(char x) {
char a[] = "123456";
clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{TRUE}}
+ clang_analyzer_eval(strnlen(a + 1, 3) == 3); // expected-warning{{UNKNOWN}}
a[0] = x;
clang_analyzer_eval(strnlen(a, 3) == 3); // expected-warning{{UNKNOWN}}
}
@@ -326,6 +352,19 @@ void strnlen_at_actual(size_t limit) {
}
}
+void strnlen_at_actual_1(size_t limit) {
+ const char *str = "abc";
+ size_t len = strnlen(str + 1, limit);
+ clang_analyzer_eval(len <= 2); // expected-warning{{TRUE}}
+ // This is due to eager assertion in strnlen.
+ if (limit == 0) {
+ clang_analyzer_eval(len == 0); // expected-warning{{TRUE}}
+ } else {
+ clang_analyzer_eval(len == 2); // expected-warning{{UNKNOWN}}
+ clang_analyzer_eval(len < 2); // expected-warning{{UNKNOWN}}
+ }
+}
+
//===----------------------------------------------------------------------===
// strcpy()
//===----------------------------------------------------------------------===
``````````
</details>
https://github.com/llvm/llvm-project/pull/159795
More information about the cfe-commits
mailing list