[clang] e12e02d - [clang] Evaluate strlen of strcpy argument for -Wfortify-source.
George Burgess IV via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 28 13:53:05 PDT 2021
Author: Michael Benfield
Date: 2021-07-28T20:52:57Z
New Revision: e12e02df09a967f644cf28136a7361bce7a5bb91
URL: https://github.com/llvm/llvm-project/commit/e12e02df09a967f644cf28136a7361bce7a5bb91
DIFF: https://github.com/llvm/llvm-project/commit/e12e02df09a967f644cf28136a7361bce7a5bb91.diff
LOG: [clang] Evaluate strlen of strcpy argument for -Wfortify-source.
Also introduce Expr::tryEvaluateStrLen.
Differential Revision: https://reviews.llvm.org/D104887
Added:
Modified:
clang/include/clang/AST/Expr.h
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/lib/AST/ExprConstant.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/Analysis/security-syntax-checks.m
clang/test/Sema/warn-fortify-source.c
Removed:
################################################################################
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 06164411cc2d4..991abef733637 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -740,6 +740,12 @@ class Expr : public ValueStmt {
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
unsigned Type) const;
+ /// If the current Expr is a pointer, this will try to statically
+ /// determine the strlen of the string pointed to.
+ /// Returns true if all of the above holds and we were able to figure out the
+ /// strlen, false otherwise.
+ bool tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const;
+
/// Enumeration used to describe the kind of Null pointer constant
/// returned from \c isNullPointerConstant().
enum NullPointerConstantKind {
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 00b443b45f132..a777f08de8909 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -816,6 +816,11 @@ def warn_fortify_source_size_mismatch : Warning<
"'%0' size argument is too large; destination buffer has size %1,"
" but size argument is %2">, InGroup<FortifySource>;
+def warn_fortify_strlen_overflow: Warning<
+ "'%0' will always overflow; destination buffer has size %1,"
+ " but the source string has length %2 (including NUL byte)">,
+ InGroup<FortifySource>;
+
def warn_fortify_source_format_overflow : Warning<
"'%0' will always overflow; destination buffer has size %1,"
" but format string expands to at least %2">,
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 01c0168d61a40..f49a144879b3e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1826,6 +1826,8 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
EvalInfo &Info);
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
+static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
+ EvalInfo &Info);
/// Evaluate an integer or fixed point expression into an APResult.
static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@@ -11836,46 +11838,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_wcslen: {
// As an extension, we support __builtin_strlen() as a constant expression,
// and support folding strlen() to a constant.
- LValue String;
- if (!EvaluatePointer(E->getArg(0), String, Info))
- return false;
-
- QualType CharTy = E->getArg(0)->getType()->getPointeeType();
-
- // Fast path: if it's a string literal, search the string value.
- if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
- String.getLValueBase().dyn_cast<const Expr *>())) {
- // The string literal may have embedded null characters. Find the first
- // one and truncate there.
- StringRef Str = S->getBytes();
- int64_t Off = String.Offset.getQuantity();
- if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
- S->getCharByteWidth() == 1 &&
- // FIXME: Add fast-path for wchar_t too.
- Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
- Str = Str.substr(Off);
-
- StringRef::size_type Pos = Str.find(0);
- if (Pos != StringRef::npos)
- Str = Str.substr(0, Pos);
-
- return Success(Str.size(), E);
- }
-
- // Fall through to slow path to issue appropriate diagnostic.
- }
-
- // Slow path: scan the bytes of the string looking for the terminating 0.
- for (uint64_t Strlen = 0; /**/; ++Strlen) {
- APValue Char;
- if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
- !Char.isInt())
- return false;
- if (!Char.getInt())
- return Success(Strlen, E);
- if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
- return false;
- }
+ uint64_t StrLen;
+ if (EvaluateBuiltinStrLen(E->getArg(0), StrLen, Info))
+ return Success(StrLen, E);
+ return false;
}
case Builtin::BIstrcmp:
@@ -15736,3 +15702,58 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
}
+
+static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
+ EvalInfo &Info) {
+ if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
+ return false;
+
+ LValue String;
+
+ if (!EvaluatePointer(E, String, Info))
+ return false;
+
+ QualType CharTy = E->getType()->getPointeeType();
+
+ // Fast path: if it's a string literal, search the string value.
+ if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
+ String.getLValueBase().dyn_cast<const Expr *>())) {
+ StringRef Str = S->getBytes();
+ int64_t Off = String.Offset.getQuantity();
+ if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
+ S->getCharByteWidth() == 1 &&
+ // FIXME: Add fast-path for wchar_t too.
+ Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
+ Str = Str.substr(Off);
+
+ StringRef::size_type Pos = Str.find(0);
+ if (Pos != StringRef::npos)
+ Str = Str.substr(0, Pos);
+
+ Result = Str.size();
+ return true;
+ }
+
+ // Fall through to slow path.
+ }
+
+ // Slow path: scan the bytes of the string looking for the terminating 0.
+ for (uint64_t Strlen = 0; /**/; ++Strlen) {
+ APValue Char;
+ if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
+ !Char.isInt())
+ return false;
+ if (!Char.getInt()) {
+ Result = Strlen;
+ return true;
+ }
+ if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
+ return false;
+ }
+}
+
+bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const {
+ Expr::EvalStatus Status;
+ EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
+ return EvaluateBuiltinStrLen(this, Result, Info);
+}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index de75c10417e71..4c63781660fbe 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -588,14 +588,8 @@ class EstimateSizeFormatHandler
} // namespace
-/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
-/// __builtin_*_chk function, then use the object size argument specified in the
-/// source. Otherwise, infer the object size using __builtin_object_size.
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
- // FIXME: There are some more useful checks we could be doing here:
- // - Evaluate strlen of strcpy arguments, use as object size.
-
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
isConstantEvaluated())
return;
@@ -607,13 +601,66 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
const TargetInfo &TI = getASTContext().getTargetInfo();
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
+ auto ComputeExplicitObjectSizeArgument =
+ [&](unsigned Index) -> Optional<llvm::APSInt> {
+ Expr::EvalResult Result;
+ Expr *SizeArg = TheCall->getArg(Index);
+ if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
+ return llvm::None;
+ return Result.Val.getInt();
+ };
+
+ auto ComputeSizeArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
+ // If the parameter has a pass_object_size attribute, then we should use its
+ // (potentially) more strict checking mode. Otherwise, conservatively assume
+ // type 0.
+ int BOSType = 0;
+ if (const auto *POS =
+ FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
+ BOSType = POS->getType();
+
+ const Expr *ObjArg = TheCall->getArg(Index);
+ uint64_t Result;
+ if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
+ return llvm::None;
+
+ // Get the object size in the target's size_t width.
+ return llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
+ };
+
+ auto ComputeStrLenArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
+ Expr *ObjArg = TheCall->getArg(Index);
+ uint64_t Result;
+ if (!ObjArg->tryEvaluateStrLen(Result, getASTContext()))
+ return llvm::None;
+ // Add 1 for null byte.
+ return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth);
+ };
+
+ Optional<llvm::APSInt> SourceSize;
+ Optional<llvm::APSInt> DestinationSize;
unsigned DiagID = 0;
bool IsChkVariant = false;
- Optional<llvm::APSInt> UsedSize;
- unsigned SizeIndex, ObjectIndex;
+
switch (BuiltinID) {
default:
return;
+ case Builtin::BI__builtin_strcpy:
+ case Builtin::BIstrcpy: {
+ DiagID = diag::warn_fortify_strlen_overflow;
+ SourceSize = ComputeStrLenArgument(1);
+ DestinationSize = ComputeSizeArgument(0);
+ break;
+ }
+
+ case Builtin::BI__builtin___strcpy_chk: {
+ DiagID = diag::warn_fortify_strlen_overflow;
+ SourceSize = ComputeStrLenArgument(1);
+ DestinationSize = ComputeExplicitObjectSizeArgument(2);
+ IsChkVariant = true;
+ break;
+ }
+
case Builtin::BIsprintf:
case Builtin::BI__builtin___sprintf_chk: {
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
@@ -639,14 +686,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
Context.getTargetInfo(), false)) {
DiagID = diag::warn_fortify_source_format_overflow;
- UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
- .extOrTrunc(SizeTypeWidth);
+ SourceSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
+ .extOrTrunc(SizeTypeWidth);
if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
+ DestinationSize = ComputeExplicitObjectSizeArgument(2);
IsChkVariant = true;
- ObjectIndex = 2;
} else {
- IsChkVariant = false;
- ObjectIndex = 0;
+ DestinationSize = ComputeSizeArgument(0);
}
break;
}
@@ -664,18 +710,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BI__builtin___memccpy_chk:
case Builtin::BI__builtin___mempcpy_chk: {
DiagID = diag::warn_builtin_chk_overflow;
+ SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2);
+ DestinationSize =
+ ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
IsChkVariant = true;
- SizeIndex = TheCall->getNumArgs() - 2;
- ObjectIndex = TheCall->getNumArgs() - 1;
break;
}
case Builtin::BI__builtin___snprintf_chk:
case Builtin::BI__builtin___vsnprintf_chk: {
DiagID = diag::warn_builtin_chk_overflow;
+ SourceSize = ComputeExplicitObjectSizeArgument(1);
+ DestinationSize = ComputeExplicitObjectSizeArgument(3);
IsChkVariant = true;
- SizeIndex = 1;
- ObjectIndex = 3;
break;
}
@@ -691,8 +738,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
// size larger than the destination buffer though; this is a runtime abort
// in _FORTIFY_SOURCE mode, and is quite suspicious otherwise.
DiagID = diag::warn_fortify_source_size_mismatch;
- SizeIndex = TheCall->getNumArgs() - 1;
- ObjectIndex = 0;
+ SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
+ DestinationSize = ComputeSizeArgument(0);
break;
}
@@ -705,8 +752,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BImempcpy:
case Builtin::BI__builtin_mempcpy: {
DiagID = diag::warn_fortify_source_overflow;
- SizeIndex = TheCall->getNumArgs() - 1;
- ObjectIndex = 0;
+ SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
+ DestinationSize = ComputeSizeArgument(0);
break;
}
case Builtin::BIsnprintf:
@@ -714,50 +761,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BIvsnprintf:
case Builtin::BI__builtin_vsnprintf: {
DiagID = diag::warn_fortify_source_size_mismatch;
- SizeIndex = 1;
- ObjectIndex = 0;
+ SourceSize = ComputeExplicitObjectSizeArgument(1);
+ DestinationSize = ComputeSizeArgument(0);
break;
}
}
- llvm::APSInt ObjectSize;
- // For __builtin___*_chk, the object size is explicitly provided by the caller
- // (usually using __builtin_object_size). Use that value to check this call.
- if (IsChkVariant) {
- Expr::EvalResult Result;
- Expr *SizeArg = TheCall->getArg(ObjectIndex);
- if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
- return;
- ObjectSize = Result.Val.getInt();
-
- // Otherwise, try to evaluate an imaginary call to __builtin_object_size.
- } else {
- // If the parameter has a pass_object_size attribute, then we should use its
- // (potentially) more strict checking mode. Otherwise, conservatively assume
- // type 0.
- int BOSType = 0;
- if (const auto *POS =
- FD->getParamDecl(ObjectIndex)->getAttr<PassObjectSizeAttr>())
- BOSType = POS->getType();
-
- Expr *ObjArg = TheCall->getArg(ObjectIndex);
- uint64_t Result;
- if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
- return;
- // Get the object size in the target's size_t width.
- ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
- }
-
- // Evaluate the number of bytes of the object that this call will use.
- if (!UsedSize) {
- Expr::EvalResult Result;
- Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
- if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
- return;
- UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
- }
-
- if (UsedSize.getValue().ule(ObjectSize))
+ if (!SourceSize || !DestinationSize ||
+ SourceSize.getValue().ule(DestinationSize.getValue()))
return;
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
@@ -770,10 +781,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
}
+ SmallString<16> DestinationStr;
+ SmallString<16> SourceStr;
+ DestinationSize->toString(DestinationStr, /*Radix=*/10);
+ SourceSize->toString(SourceStr, /*Radix=*/10);
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
PDiag(DiagID)
- << FunctionName << toString(ObjectSize, /*Radix=*/10)
- << toString(UsedSize.getValue(), /*Radix=*/10));
+ << FunctionName << DestinationStr << SourceStr);
}
static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
diff --git a/clang/test/Analysis/security-syntax-checks.m b/clang/test/Analysis/security-syntax-checks.m
index 5c63f0686eec7..d14a3d2fddee3 100644
--- a/clang/test/Analysis/security-syntax-checks.m
+++ b/clang/test/Analysis/security-syntax-checks.m
@@ -1,37 +1,37 @@
-// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify \
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
-// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify \
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-cloudabi %s -verify -Wno-fortify-source \
// RUN: -DUSE_BUILTINS -DVARIANT \
// RUN: -analyzer-checker=security.insecureAPI \
// RUN: -analyzer-checker=security.FloatLoopCounter
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index 5ad2979bc29c6..6ec9c6e036ce8 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -58,6 +58,19 @@ void call_stpncpy() {
__builtin_stpncpy(s1, s2, 20); // expected-warning {{'stpncpy' size argument is too large; destination buffer has size 10, but size argument is 20}}
}
+void call_strcpy() {
+ const char *const src = "abcd";
+ char dst[4];
+ __builtin_strcpy(dst, src); // expected-warning {{'strcpy' will always overflow; destination buffer has size 4, but the source string has length 5 (including NUL byte)}}
+}
+
+void call_strcpy_nowarn() {
+ const char *const src = "abcd";
+ char dst[5];
+ // We should not get a warning here.
+ __builtin_strcpy(dst, src);
+}
+
void call_memmove() {
char s1[10], s2[20];
__builtin_memmove(s2, s1, 20);
More information about the cfe-commits
mailing list