[clang] [clang] Constant-evaluate format strings as last resort (PR #135864)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 15 14:46:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (apple-fcloutier)
<details>
<summary>Changes</summary>
I [asked on the forums](https://discourse.llvm.org/t/should-attribute-format-checking-try-to-const-evaluate-strings/85854/4) and people were generally supportive of the idea, so:
Clang's -Wformat checker can see through an inconsistent set of operations. We can fall back to the recently-updated constant string evaluation infrastructure when Clang's initial evaluation fails for a second chance at figuring out what the format string is intended to be. This enables analyzing format strings that were built at compile-time with std::string and other constexpr-capable types in C++, as long as all pieces are also constexpr-visible, and a number of other patterns.
As a side effect, it also enables `tryEvaluateString` on char arrays (rather than only char pointers).
Radar-ID: rdar://99940060
---
Full diff: https://github.com/llvm/llvm-project/pull/135864.diff
7 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+3)
- (modified) clang/include/clang/AST/Expr.h (+8-1)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2)
- (modified) clang/lib/AST/ExprConstant.cpp (+32-8)
- (modified) clang/lib/Sema/SemaChecking.cpp (+94-50)
- (modified) clang/test/Sema/format-strings.c (+14)
- (modified) clang/test/SemaCXX/format-strings.cpp (+74)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 77bf3355af9da..05566d66a65d2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,9 @@ related warnings within the method body.
``format_matches`` accepts an example valid format string as its third
argument. For more information, see the Clang attributes documentation.
+- Format string checking now supports the compile-time evaluation of format
+ strings as a fallback mechanism.
+
- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
fine-grained control over atomic code generation on a per-statement basis.
Supported options include ``[no_]remote_memory``,
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 20f70863a05b3..78eda8bc3c43e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -791,7 +791,14 @@ class Expr : public ValueStmt {
const Expr *PtrExpression, ASTContext &Ctx,
EvalResult &Status) const;
- /// If the current Expr can be evaluated to a pointer to a null-terminated
+ /// Fill \c Into with the first characters that can be constant-evaluated
+ /// from this \c Expr . When encountering a null character, stop and return
+ /// \c true (the null is not returned in \c Into ). Return \c false if
+ /// evaluation runs off the end of the constant-evaluated string before it
+ /// encounters a null character.
+ bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
+
+ /// If the current \c Expr can be evaluated to a pointer to a null-terminated
/// constant string, return the constant string (without the terminating
/// null).
std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3cb2731488fab..4139ff2737c76 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10170,6 +10170,8 @@ def warn_format_bool_as_character : Warning<
"using '%0' format specifier, but argument has boolean value">,
InGroup<Format>;
def note_format_string_defined : Note<"format string is defined here">;
+def note_format_string_evaluated_to : Note<
+ "format string was constant-evaluated">;
def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
def note_format_security_fixit: Note<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 80ece3c4ed7e1..fec92edf49096 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -17945,15 +17945,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
EvalInfo &Info, std::string *StringResult) {
- if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
+ QualType Ty = E->getType();
+ if (!E->isPRValue())
return false;
LValue String;
-
- if (!EvaluatePointer(E, String, Info))
+ QualType CharTy;
+ if (Ty->canDecayToPointerType()) {
+ if (E->isGLValue()) {
+ if (!EvaluateLValue(E, String, Info))
+ return false;
+ } else {
+ APValue &Value = Info.CurrentCall->createTemporary(
+ E, Ty, ScopeKind::FullExpression, String);
+ if (!EvaluateInPlace(Value, Info, String, E))
+ return false;
+ }
+ // The result is a pointer to the first element of the array.
+ auto *AT = Info.Ctx.getAsArrayType(Ty);
+ CharTy = AT->getElementType();
+ if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
+ String.addArray(Info, E, CAT);
+ else
+ String.addUnsizedArray(Info, E, CharTy);
+ } else if (Ty->hasPointerRepresentation()) {
+ if (!EvaluatePointer(E, String, Info))
+ return false;
+ CharTy = Ty->getPointeeType();
+ } else {
return false;
-
- QualType CharTy = E->getType()->getPointeeType();
+ }
// Fast path: if it's a string literal, search the string value.
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
@@ -17995,13 +18016,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
}
}
-std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const {
Expr::EvalStatus Status;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
uint64_t Result;
- std::string StringResult;
+ return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
+}
- if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
+std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+ std::string StringResult;
+ if (tryEvaluateString(Ctx, StringResult))
return StringResult;
return {};
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bffd0dd461d3d..017be929ca18e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -98,6 +98,7 @@
#include "llvm/Support/Locale.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/RISCVTargetParser.h"
#include "llvm/TargetParser/Triple.h"
@@ -5935,8 +5936,14 @@ static void CheckFormatString(
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
bool IgnoreStringsWithoutSpecifiers);
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
- const Expr *E);
+enum StringLiteralConstEvalResult {
+ SLCER_NotEvaluated,
+ SLCER_NotNullTerminated,
+ SLCER_Evaluated,
+};
+
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
// Determine if an expression is a string literal or constant string.
// If this function returns false on the arguments to a function expecting a
@@ -5968,14 +5975,9 @@ static StringLiteralCheckType checkFormatStringExpr(
switch (E->getStmtClass()) {
case Stmt::InitListExprClass:
- // Handle expressions like {"foobar"}.
- if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
- return checkFormatStringExpr(
- S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
- UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
- }
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
+
case Stmt::BinaryConditionalOperatorClass:
case Stmt::ConditionalOperatorClass: {
// The expression is a literal if both sub-expressions were, and it was
@@ -6066,10 +6068,9 @@ static StringLiteralCheckType checkFormatStringExpr(
if (InitList->isStringLiteralInit())
Init = InitList->getInit(0)->IgnoreParenImpCasts();
}
- return checkFormatStringExpr(
- S, ReferenceFormatString, Init, Args, APK, format_idx,
- firstDataArg, Type, CallType,
- /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
+ InFunctionCall = false;
+ E = Init;
+ goto tryAgain;
}
}
@@ -6142,11 +6143,9 @@ static StringLiteralCheckType checkFormatStringExpr(
}
return SLCT_UncheckedLiteral;
}
- return checkFormatStringExpr(
- S, ReferenceFormatString, PVFormatMatches->getFormatString(),
- Args, APK, format_idx, firstDataArg, Type, CallType,
- /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
- Offset, IgnoreStringsWithoutSpecifiers);
+ E = PVFormatMatches->getFormatString();
+ InFunctionCall = false;
+ goto tryAgain;
}
}
@@ -6214,20 +6213,13 @@ static StringLiteralCheckType checkFormatStringExpr(
unsigned BuiltinID = FD->getBuiltinID();
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
- const Expr *Arg = CE->getArg(0);
- return checkFormatStringExpr(
- S, ReferenceFormatString, Arg, Args, APK, format_idx,
- firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
- UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
+ E = CE->getArg(0);
+ goto tryAgain;
}
}
}
- if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
- return checkFormatStringExpr(
- S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
- UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
}
case Stmt::ObjCMessageExprClass: {
const auto *ME = cast<ObjCMessageExpr>(E);
@@ -6248,11 +6240,8 @@ static StringLiteralCheckType checkFormatStringExpr(
IgnoreStringsWithoutSpecifiers = true;
}
- const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
- return checkFormatStringExpr(
- S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
- Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
- Offset, IgnoreStringsWithoutSpecifiers);
+ E = ME->getArg(FA->getFormatIdx().getASTIndex());
+ goto tryAgain;
}
}
@@ -6314,7 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr(
}
}
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
}
case Stmt::UnaryOperatorClass: {
const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
@@ -6331,26 +6321,79 @@ static StringLiteralCheckType checkFormatStringExpr(
}
}
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
}
default:
+ // try to constant-evaluate the string
+ break;
+ }
+
+ const StringLiteral *FakeLiteral = nullptr;
+ switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
+ case SLCER_NotEvaluated:
return SLCT_NotALiteral;
+
+ case SLCER_NotNullTerminated:
+ S.Diag(Args[format_idx]->getBeginLoc(),
+ diag::warn_printf_format_string_not_null_terminated)
+ << Args[format_idx]->getSourceRange();
+ if (!InFunctionCall)
+ S.Diag(E->getBeginLoc(), diag::note_format_string_defined);
+ // Stop checking, as this might just mean we're missing a chunk of the
+ // format string and there would be other spurious format issues.
+ return SLCT_UncheckedLiteral;
+
+ case SLCER_Evaluated:
+ InFunctionCall = false;
+ E = FakeLiteral;
+ goto tryAgain;
}
}
-// If this expression can be evaluated at compile-time,
-// check if the result is a StringLiteral and return it
-// otherwise return nullptr
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
- const Expr *E) {
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
+ // As a last resort, try to constant-evaluate the format string. If it
+ // evaluates to a string literal in the first place, we can point to that
+ // string literal in source and use that.
Expr::EvalResult Result;
- if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) {
+ if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
- if (isa_and_nonnull<StringLiteral>(LVE))
- return LVE;
+ if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
+ SL = BaseSL;
+ return SLCER_Evaluated;
+ }
}
- return nullptr;
+
+ // Otherwise, try to evaluate the expression as a string constant.
+ std::string FormatString;
+ if (!E->tryEvaluateString(S.Context, FormatString)) {
+ return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
+ }
+
+ std::unique_ptr<llvm::MemoryBuffer> MemBuf;
+ {
+ llvm::SmallString<80> EscapedString;
+ {
+ llvm::raw_svector_ostream OS(EscapedString);
+ OS << '"';
+ OS.write_escaped(FormatString);
+ OS << '"';
+ }
+ MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
+ "<scratch space>", true));
+ }
+
+ // Plop that string into a scratch buffer, create a string literal and then
+ // go with that.
+ auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
+ SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile);
+ QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
+ FormatString.length());
+ SL = StringLiteral::Create(S.Context, FormatString,
+ StringLiteralKind::Ordinary, false, SLType, Begin);
+ return SLCER_Evaluated;
}
StringRef Sema::GetFormatStringTypeName(Sema::FormatStringType FST) {
@@ -6973,10 +7016,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(
S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag)
<< ArgumentExpr->getSourceRange();
- const Sema::SemaDiagnosticBuilder &Note =
- S.Diag(IsStringLocation ? Loc : StringRange.getBegin(),
- diag::note_format_string_defined);
-
+ SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin();
+ unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc)
+ ? diag::note_format_string_evaluated_to
+ : diag::note_format_string_defined;
+ const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID);
Note << StringRange;
Note << FixIt;
}
diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index af30ad5d15fe2..a94e0619ce843 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -3,6 +3,11 @@
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s
+// expected-note at -5{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_consteval_init_array test, that -verify treats as if it showed up at
+// line 1 of this file.
+
#include <stdarg.h>
#include <stddef.h>
#define __need_wint_t
@@ -900,3 +905,12 @@ void test_promotion(void) {
// pointers
printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
}
+
+void test_consteval_init_array(void) {
+ const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // expected-note{{format string is defined here}}
+ printf(buf_not_terminated, "hello"); // expected-warning{{format string is not null-terminated}}
+
+ const char buf[] = {'%', 55 * 2 + 5, '\n', 0};
+ printf(buf, "hello"); // no-warning
+ printf(buf, 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
+}
diff --git a/clang/test/SemaCXX/format-strings.cpp b/clang/test/SemaCXX/format-strings.cpp
index 48cf23999a94f..7b04ea7d8bc75 100644
--- a/clang/test/SemaCXX/format-strings.cpp
+++ b/clang/test/SemaCXX/format-strings.cpp
@@ -1,6 +1,14 @@
// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks %s
// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -fblocks -std=c++98 %s
// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++20 %s
+
+#if __cplusplus >= 202000l
+// expected-note at -6{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_constexpr_string test, that -verify treats as if it showed up at
+// line 1 of this file.
+#endif
#include <stdarg.h>
@@ -238,3 +246,69 @@ void f(Scoped1 S1, Scoped2 S2) {
}
#endif
+
+#if __cplusplus >= 202000L
+class my_string {
+ char *data;
+ unsigned size;
+
+public:
+ template<unsigned N>
+ constexpr my_string(const char (&literal)[N]) {
+ data = new char[N+1];
+ for (size = 0; size < N; ++size) {
+ data[size] = literal[size];
+ if (data[size] == 0)
+ break;
+ }
+ data[size] = 0;
+ }
+
+ my_string(const my_string &) = delete;
+
+ constexpr my_string(my_string &&that) {
+ data = that.data;
+ size = that.size;
+ that.data = nullptr;
+ that.size = 0;
+ }
+
+ constexpr ~my_string() {
+ delete[] data;
+ }
+
+ template<unsigned N>
+ constexpr void append(const char (&literal)[N]) {
+ char *cat = new char[size + N + 1];
+ char *tmp = cat;
+ for (unsigned i = 0; i < size; ++i) {
+ *tmp++ = data[i];
+ }
+ for (unsigned i = 0; i < N; ++i) {
+ *tmp = literal[i];
+ if (*tmp == 0)
+ break;
+ ++tmp;
+ }
+ *tmp = 0;
+ delete[] data;
+ size = tmp - cat;
+ data = cat;
+ }
+
+ constexpr const char *c_str() const {
+ return data;
+ }
+};
+
+constexpr my_string const_string() {
+ my_string str("hello %s");
+ str.append(", %d");
+ return str;
+}
+
+void test_constexpr_string() {
+ printf(const_string().c_str(), "hello", 123); // no-warning
+ printf(const_string().c_str(), 123, 456); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}}
+}
+#endif
``````````
</details>
https://github.com/llvm/llvm-project/pull/135864
More information about the cfe-commits
mailing list