[clang] [clang] Constant-evaluate format strings as last resort (PR #135864)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 22 14:13:17 PDT 2025
=?utf-8?q?Félix?= Cloutier <fcloutier at apple.com>,
=?utf-8?q?Félix?= Cloutier <fcloutier at apple.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/135864 at github.com>
https://github.com/apple-fcloutier updated https://github.com/llvm/llvm-project/pull/135864
>From 7e15ec24e6de96b9828f13f21099446e55831591 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 15 Apr 2025 14:28:02 -0700
Subject: [PATCH 1/3] [clang] Constant-evaluate format strings as last resort
Clang's -Wformat checker can see through an inconsistent set of
operations. We can fall back to the recently-updated constant string
evaluation infrastructure when Clang's initial evaluation fails for
a second chance at figuring out what the format string is intended
to be. This enables analyzing format strings that were built at
compile-time with std::string and other constexpr-capable types in
C++, as long as all pieces are also constexpr-visible, and a number
of other patterns.
Radar-ID: rdar://99940060
---
clang/docs/ReleaseNotes.rst | 3 +
clang/include/clang/AST/Expr.h | 9 +-
.../clang/Basic/DiagnosticSemaKinds.td | 2 +
clang/lib/AST/ExprConstant.cpp | 40 ++++-
clang/lib/Sema/SemaChecking.cpp | 144 ++++++++++++------
clang/test/Sema/format-strings.c | 14 ++
clang/test/SemaCXX/format-strings.cpp | 74 +++++++++
7 files changed, 227 insertions(+), 59 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 77bf3355af9da..05566d66a65d2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,9 @@ related warnings within the method body.
``format_matches`` accepts an example valid format string as its third
argument. For more information, see the Clang attributes documentation.
+- Format string checking now supports the compile-time evaluation of format
+ strings as a fallback mechanism.
+
- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
fine-grained control over atomic code generation on a per-statement basis.
Supported options include ``[no_]remote_memory``,
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 20f70863a05b3..78eda8bc3c43e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -791,7 +791,14 @@ class Expr : public ValueStmt {
const Expr *PtrExpression, ASTContext &Ctx,
EvalResult &Status) const;
- /// If the current Expr can be evaluated to a pointer to a null-terminated
+ /// Fill \c Into with the first characters that can be constant-evaluated
+ /// from this \c Expr . When encountering a null character, stop and return
+ /// \c true (the null is not returned in \c Into ). Return \c false if
+ /// evaluation runs off the end of the constant-evaluated string before it
+ /// encounters a null character.
+ bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
+
+ /// If the current \c Expr can be evaluated to a pointer to a null-terminated
/// constant string, return the constant string (without the terminating
/// null).
std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3cb2731488fab..4139ff2737c76 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10170,6 +10170,8 @@ def warn_format_bool_as_character : Warning<
"using '%0' format specifier, but argument has boolean value">,
InGroup<Format>;
def note_format_string_defined : Note<"format string is defined here">;
+def note_format_string_evaluated_to : Note<
+ "format string was constant-evaluated">;
def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
def note_format_security_fixit: Note<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 80ece3c4ed7e1..fec92edf49096 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -17945,15 +17945,36 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
EvalInfo &Info, std::string *StringResult) {
- if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
+ QualType Ty = E->getType();
+ if (!E->isPRValue())
return false;
LValue String;
-
- if (!EvaluatePointer(E, String, Info))
+ QualType CharTy;
+ if (Ty->canDecayToPointerType()) {
+ if (E->isGLValue()) {
+ if (!EvaluateLValue(E, String, Info))
+ return false;
+ } else {
+ APValue &Value = Info.CurrentCall->createTemporary(
+ E, Ty, ScopeKind::FullExpression, String);
+ if (!EvaluateInPlace(Value, Info, String, E))
+ return false;
+ }
+ // The result is a pointer to the first element of the array.
+ auto *AT = Info.Ctx.getAsArrayType(Ty);
+ CharTy = AT->getElementType();
+ if (auto *CAT = dyn_cast<ConstantArrayType>(AT))
+ String.addArray(Info, E, CAT);
+ else
+ String.addUnsizedArray(Info, E, CharTy);
+ } else if (Ty->hasPointerRepresentation()) {
+ if (!EvaluatePointer(E, String, Info))
+ return false;
+ CharTy = Ty->getPointeeType();
+ } else {
return false;
-
- QualType CharTy = E->getType()->getPointeeType();
+ }
// Fast path: if it's a string literal, search the string value.
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
@@ -17995,13 +18016,16 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
}
}
-std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const {
Expr::EvalStatus Status;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
uint64_t Result;
- std::string StringResult;
+ return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
+}
- if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
+std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+ std::string StringResult;
+ if (tryEvaluateString(Ctx, StringResult))
return StringResult;
return {};
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bffd0dd461d3d..017be929ca18e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -98,6 +98,7 @@
#include "llvm/Support/Locale.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/RISCVTargetParser.h"
#include "llvm/TargetParser/Triple.h"
@@ -5935,8 +5936,14 @@ static void CheckFormatString(
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
bool IgnoreStringsWithoutSpecifiers);
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
- const Expr *E);
+enum StringLiteralConstEvalResult {
+ SLCER_NotEvaluated,
+ SLCER_NotNullTerminated,
+ SLCER_Evaluated,
+};
+
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
// Determine if an expression is a string literal or constant string.
// If this function returns false on the arguments to a function expecting a
@@ -5968,14 +5975,9 @@ static StringLiteralCheckType checkFormatStringExpr(
switch (E->getStmtClass()) {
case Stmt::InitListExprClass:
- // Handle expressions like {"foobar"}.
- if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) {
- return checkFormatStringExpr(
- S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
- UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
- }
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
+
case Stmt::BinaryConditionalOperatorClass:
case Stmt::ConditionalOperatorClass: {
// The expression is a literal if both sub-expressions were, and it was
@@ -6066,10 +6068,9 @@ static StringLiteralCheckType checkFormatStringExpr(
if (InitList->isStringLiteralInit())
Init = InitList->getInit(0)->IgnoreParenImpCasts();
}
- return checkFormatStringExpr(
- S, ReferenceFormatString, Init, Args, APK, format_idx,
- firstDataArg, Type, CallType,
- /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset);
+ InFunctionCall = false;
+ E = Init;
+ goto tryAgain;
}
}
@@ -6142,11 +6143,9 @@ static StringLiteralCheckType checkFormatStringExpr(
}
return SLCT_UncheckedLiteral;
}
- return checkFormatStringExpr(
- S, ReferenceFormatString, PVFormatMatches->getFormatString(),
- Args, APK, format_idx, firstDataArg, Type, CallType,
- /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg,
- Offset, IgnoreStringsWithoutSpecifiers);
+ E = PVFormatMatches->getFormatString();
+ InFunctionCall = false;
+ goto tryAgain;
}
}
@@ -6214,20 +6213,13 @@ static StringLiteralCheckType checkFormatStringExpr(
unsigned BuiltinID = FD->getBuiltinID();
if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) {
- const Expr *Arg = CE->getArg(0);
- return checkFormatStringExpr(
- S, ReferenceFormatString, Arg, Args, APK, format_idx,
- firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs,
- UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
+ E = CE->getArg(0);
+ goto tryAgain;
}
}
}
- if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E))
- return checkFormatStringExpr(
- S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg,
- Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs,
- UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers);
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
}
case Stmt::ObjCMessageExprClass: {
const auto *ME = cast<ObjCMessageExpr>(E);
@@ -6248,11 +6240,8 @@ static StringLiteralCheckType checkFormatStringExpr(
IgnoreStringsWithoutSpecifiers = true;
}
- const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex());
- return checkFormatStringExpr(
- S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg,
- Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg,
- Offset, IgnoreStringsWithoutSpecifiers);
+ E = ME->getArg(FA->getFormatIdx().getASTIndex());
+ goto tryAgain;
}
}
@@ -6314,7 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr(
}
}
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
}
case Stmt::UnaryOperatorClass: {
const UnaryOperator *UnaOp = cast<UnaryOperator>(E);
@@ -6331,26 +6321,79 @@ static StringLiteralCheckType checkFormatStringExpr(
}
}
- return SLCT_NotALiteral;
+ // try to constant-evaluate the string
+ break;
}
default:
+ // try to constant-evaluate the string
+ break;
+ }
+
+ const StringLiteral *FakeLiteral = nullptr;
+ switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
+ case SLCER_NotEvaluated:
return SLCT_NotALiteral;
+
+ case SLCER_NotNullTerminated:
+ S.Diag(Args[format_idx]->getBeginLoc(),
+ diag::warn_printf_format_string_not_null_terminated)
+ << Args[format_idx]->getSourceRange();
+ if (!InFunctionCall)
+ S.Diag(E->getBeginLoc(), diag::note_format_string_defined);
+ // Stop checking, as this might just mean we're missing a chunk of the
+ // format string and there would be other spurious format issues.
+ return SLCT_UncheckedLiteral;
+
+ case SLCER_Evaluated:
+ InFunctionCall = false;
+ E = FakeLiteral;
+ goto tryAgain;
}
}
-// If this expression can be evaluated at compile-time,
-// check if the result is a StringLiteral and return it
-// otherwise return nullptr
-static const Expr *maybeConstEvalStringLiteral(ASTContext &Context,
- const Expr *E) {
+static StringLiteralConstEvalResult
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
+ // As a last resort, try to constant-evaluate the format string. If it
+ // evaluates to a string literal in the first place, we can point to that
+ // string literal in source and use that.
Expr::EvalResult Result;
- if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) {
+ if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
- if (isa_and_nonnull<StringLiteral>(LVE))
- return LVE;
+ if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
+ SL = BaseSL;
+ return SLCER_Evaluated;
+ }
}
- return nullptr;
+
+ // Otherwise, try to evaluate the expression as a string constant.
+ std::string FormatString;
+ if (!E->tryEvaluateString(S.Context, FormatString)) {
+ return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
+ }
+
+ std::unique_ptr<llvm::MemoryBuffer> MemBuf;
+ {
+ llvm::SmallString<80> EscapedString;
+ {
+ llvm::raw_svector_ostream OS(EscapedString);
+ OS << '"';
+ OS.write_escaped(FormatString);
+ OS << '"';
+ }
+ MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
+ "<scratch space>", true));
+ }
+
+ // Plop that string into a scratch buffer, create a string literal and then
+ // go with that.
+ auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
+ SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile);
+ QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
+ FormatString.length());
+ SL = StringLiteral::Create(S.Context, FormatString,
+ StringLiteralKind::Ordinary, false, SLType, Begin);
+ return SLCER_Evaluated;
}
StringRef Sema::GetFormatStringTypeName(Sema::FormatStringType FST) {
@@ -6973,10 +7016,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(
S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag)
<< ArgumentExpr->getSourceRange();
- const Sema::SemaDiagnosticBuilder &Note =
- S.Diag(IsStringLocation ? Loc : StringRange.getBegin(),
- diag::note_format_string_defined);
-
+ SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin();
+ unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc)
+ ? diag::note_format_string_evaluated_to
+ : diag::note_format_string_defined;
+ const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID);
Note << StringRange;
Note << FixIt;
}
diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index af30ad5d15fe2..a94e0619ce843 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -3,6 +3,11 @@
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s
// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s
+// expected-note at -5{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_consteval_init_array test, that -verify treats as if it showed up at
+// line 1 of this file.
+
#include <stdarg.h>
#include <stddef.h>
#define __need_wint_t
@@ -900,3 +905,12 @@ void test_promotion(void) {
// pointers
printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
}
+
+void test_consteval_init_array(void) {
+ const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // expected-note{{format string is defined here}}
+ printf(buf_not_terminated, "hello"); // expected-warning{{format string is not null-terminated}}
+
+ const char buf[] = {'%', 55 * 2 + 5, '\n', 0};
+ printf(buf, "hello"); // no-warning
+ printf(buf, 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
+}
diff --git a/clang/test/SemaCXX/format-strings.cpp b/clang/test/SemaCXX/format-strings.cpp
index 48cf23999a94f..7b04ea7d8bc75 100644
--- a/clang/test/SemaCXX/format-strings.cpp
+++ b/clang/test/SemaCXX/format-strings.cpp
@@ -1,6 +1,14 @@
// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks %s
// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -fblocks -std=c++98 %s
// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++20 %s
+
+#if __cplusplus >= 202000l
+// expected-note at -6{{format string was constant-evaluated}}
+// ^^^ there will be a <scratch space> SourceLocation caused by the
+// test_constexpr_string test, that -verify treats as if it showed up at
+// line 1 of this file.
+#endif
#include <stdarg.h>
@@ -238,3 +246,69 @@ void f(Scoped1 S1, Scoped2 S2) {
}
#endif
+
+#if __cplusplus >= 202000L
+class my_string {
+ char *data;
+ unsigned size;
+
+public:
+ template<unsigned N>
+ constexpr my_string(const char (&literal)[N]) {
+ data = new char[N+1];
+ for (size = 0; size < N; ++size) {
+ data[size] = literal[size];
+ if (data[size] == 0)
+ break;
+ }
+ data[size] = 0;
+ }
+
+ my_string(const my_string &) = delete;
+
+ constexpr my_string(my_string &&that) {
+ data = that.data;
+ size = that.size;
+ that.data = nullptr;
+ that.size = 0;
+ }
+
+ constexpr ~my_string() {
+ delete[] data;
+ }
+
+ template<unsigned N>
+ constexpr void append(const char (&literal)[N]) {
+ char *cat = new char[size + N + 1];
+ char *tmp = cat;
+ for (unsigned i = 0; i < size; ++i) {
+ *tmp++ = data[i];
+ }
+ for (unsigned i = 0; i < N; ++i) {
+ *tmp = literal[i];
+ if (*tmp == 0)
+ break;
+ ++tmp;
+ }
+ *tmp = 0;
+ delete[] data;
+ size = tmp - cat;
+ data = cat;
+ }
+
+ constexpr const char *c_str() const {
+ return data;
+ }
+};
+
+constexpr my_string const_string() {
+ my_string str("hello %s");
+ str.append(", %d");
+ return str;
+}
+
+void test_constexpr_string() {
+ printf(const_string().c_str(), "hello", 123); // no-warning
+ printf(const_string().c_str(), 123, 456); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}}
+}
+#endif
>From 3632a83d3af5ab3958226ebf6e416643d7bfb3f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 22 Apr 2025 13:01:40 -0700
Subject: [PATCH 2/3] Create StringEvalResult to hold result of compile-time
string evaluation
Several users of compile-time string evaluation can meaningfully use the
special case that compile-time string evaluation resolves to a string
literal in source (for instance, to improve diagnostics). This changes
Expr::tryEvaluateString to return a StringEvalResult, which can hold
either a string literal and an offset or a std::string of evaluated
characters.
---
clang/include/clang/AST/Expr.h | 30 ++--
.../clang/Basic/DiagnosticSemaKinds.td | 2 +-
clang/lib/AST/ExprConstant.cpp | 141 +++++++++++++-----
clang/lib/Analysis/UnsafeBufferUsage.cpp | 5 +-
clang/lib/CodeGen/CGBuiltin.cpp | 5 +-
clang/lib/Sema/SemaChecking.cpp | 80 +++++-----
clang/test/SemaCXX/verbose-trap.cpp | 10 +-
7 files changed, 182 insertions(+), 91 deletions(-)
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 78eda8bc3c43e..54f0ac28ac082 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -791,17 +791,25 @@ class Expr : public ValueStmt {
const Expr *PtrExpression, ASTContext &Ctx,
EvalResult &Status) const;
- /// Fill \c Into with the first characters that can be constant-evaluated
- /// from this \c Expr . When encountering a null character, stop and return
- /// \c true (the null is not returned in \c Into ). Return \c false if
- /// evaluation runs off the end of the constant-evaluated string before it
- /// encounters a null character.
- bool tryEvaluateString(ASTContext &Ctx, std::string &Into) const;
-
- /// If the current \c Expr can be evaluated to a pointer to a null-terminated
- /// constant string, return the constant string (without the terminating
- /// null).
- std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
+ class StringEvalResult {
+ std::string Storage;
+ const StringLiteral *SL;
+ uint64_t Offset;
+
+ public:
+ StringEvalResult(std::string Contents);
+ StringEvalResult(const StringLiteral *SL, uint64_t Offset);
+
+ llvm::StringRef getString() const;
+ bool getStringLiteral(const StringLiteral *&SL, uint64_t &Offset) const;
+ };
+
+ /// If the current \c Expr can be evaluated to a pointer to a constant string,
+ /// return the constant string. The string may not be NUL-terminated. If
+ /// \c NullTerminated is supplied, it is set to whether there is at least one
+ /// NUL character in the string.
+ std::optional<StringEvalResult>
+ tryEvaluateString(ASTContext &Ctx, bool *NullTerminated = nullptr) const;
/// Enumeration used to describe the kind of Null pointer constant
/// returned from \c isNullPointerConstant().
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4139ff2737c76..e3b73c149b3d5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9105,7 +9105,7 @@ def err_expected_callable_argument : Error<
def note_building_builtin_dump_struct_call : Note<
"in call to printing function with arguments '(%0)' while dumping struct">;
def err_builtin_verbose_trap_arg : Error<
- "argument to __builtin_verbose_trap must %select{be a pointer to a constant string|not contain $}0">;
+ "argument to __builtin_verbose_trap must %select{be a pointer to a constant NUL-terminated string|not contain $}0">;
def err_atomic_load_store_uses_lib : Error<
"atomic %select{load|store}0 requires runtime support that is not "
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fec92edf49096..6c5e8260131bb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1922,9 +1922,17 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
EvalInfo &Info);
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
+static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E,
+ QualType &CharTy, LValue &String);
+static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info,
+ LValue &String,
+ QualType CharTy,
+ uint64_t &Offset);
+template <typename CharAction>
+static bool IterateStringLValue(EvalInfo &Info, const Expr *E, QualType CharTy,
+ LValue &String, CharAction &&Action);
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
- EvalInfo &Info,
- std::string *StringResult = nullptr);
+ EvalInfo &Info);
/// Evaluate an integer or fixed point expression into an APResult.
static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@@ -17943,14 +17951,12 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
}
-static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
- EvalInfo &Info, std::string *StringResult) {
+static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E,
+ QualType &CharTy, LValue &String) {
QualType Ty = E->getType();
if (!E->isPRValue())
return false;
- LValue String;
- QualType CharTy;
if (Ty->canDecayToPointerType()) {
if (E->isGLValue()) {
if (!EvaluateLValue(E, String, Info))
@@ -17975,8 +17981,13 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
} else {
return false;
}
+ return true;
+}
- // Fast path: if it's a string literal, search the string value.
+static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info,
+ LValue &String,
+ QualType CharTy,
+ uint64_t &Offset) {
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
String.getLValueBase().dyn_cast<const Expr *>())) {
StringRef Str = S->getBytes();
@@ -17985,49 +17996,111 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
S->getCharByteWidth() == 1 &&
// FIXME: Add fast-path for wchar_t too.
Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
- Str = Str.substr(Off);
-
- StringRef::size_type Pos = Str.find(0);
- if (Pos != StringRef::npos)
- Str = Str.substr(0, Pos);
-
- Result = Str.size();
- if (StringResult)
- *StringResult = Str;
- return true;
+ Offset = static_cast<uint64_t>(Off);
+ return S;
}
-
- // Fall through to slow path.
}
+ return nullptr;
+}
- // Slow path: scan the bytes of the string looking for the terminating 0.
- for (uint64_t Strlen = 0; /**/; ++Strlen) {
+template <typename CharAction>
+static bool IterateStringLValue(EvalInfo &Info, const Expr *E, QualType CharTy,
+ LValue &String, CharAction &&Action) {
+ while (true) {
APValue Char;
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
!Char.isInt())
return false;
- if (!Char.getInt()) {
- Result = Strlen;
+ if (!Action(Char.getInt().getExtValue()))
return true;
- } else if (StringResult)
- StringResult->push_back(Char.getInt().getExtValue());
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
return false;
}
}
-bool Expr::tryEvaluateString(ASTContext &Ctx, std::string &StringResult) const {
+static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
+ EvalInfo &Info) {
+ LValue String;
+ QualType CharTy;
+ if (!EvaluateStringAsLValue(Info, E, CharTy, String))
+ return false;
+
+ // Fast path: if it's a string literal, search the string value.
+ uint64_t Off;
+ if (const auto *S = StringLValueIsLiteral(Info, String, CharTy, Off)) {
+ StringRef Str = S->getBytes().substr(Off);
+
+ StringRef::size_type Pos = Str.find(0);
+ if (Pos != StringRef::npos)
+ Str = Str.substr(0, Pos);
+
+ Result = Str.size();
+ return true;
+ }
+
+ // Slow path: scan the bytes of the string looking for the terminating 0.
+ Result = 0;
+ return IterateStringLValue(Info, E, CharTy, String, [&](int Char) {
+ if (Char) {
+ Result++;
+ return true;
+ } else
+ return false;
+ });
+}
+
+Expr::StringEvalResult::StringEvalResult(const StringLiteral *SL,
+ uint64_t Offset)
+ : SL(SL), Offset(Offset) {}
+
+Expr::StringEvalResult::StringEvalResult(std::string Contents)
+ : Storage(std::move(Contents)), SL(nullptr), Offset(0) {}
+
+llvm::StringRef Expr::StringEvalResult::getString() const {
+ return SL ? SL->getBytes().substr(Offset) : Storage;
+}
+
+bool Expr::StringEvalResult::getStringLiteral(const StringLiteral *&SL,
+ uint64_t &Offset) const {
+ if (this->SL) {
+ SL = this->SL;
+ Offset = this->Offset;
+ return true;
+ }
+ return false;
+}
+
+std::optional<Expr::StringEvalResult>
+Expr::tryEvaluateString(ASTContext &Ctx, bool *NullTerminated) const {
+ if (NullTerminated)
+ *NullTerminated = false;
+
Expr::EvalStatus Status;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
- uint64_t Result;
- return EvaluateBuiltinStrLen(this, Result, Info, &StringResult);
-}
+ LValue String;
+ QualType CharTy;
+ if (!EvaluateStringAsLValue(Info, this, CharTy, String))
+ return {};
+
+ uint64_t Off;
+ if (const auto *S = StringLValueIsLiteral(Info, String, CharTy, Off)) {
+ if (NullTerminated)
+ *NullTerminated = true;
+ return StringEvalResult(S, Off);
+ }
+
+ std::string Result;
+ bool NTFound = IterateStringLValue(Info, this, CharTy, String, [&](int Char) {
+ if (Char) {
+ Result.push_back(Char);
+ return true;
+ } else
+ return false;
+ });
-std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
- std::string StringResult;
- if (tryEvaluateString(Ctx, StringResult))
- return StringResult;
- return {};
+ if (NullTerminated)
+ *NullTerminated = NTFound;
+ return StringEvalResult(Result);
}
template <typename T>
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index fbe753de9ef1f..10d4c63a1da05 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -725,12 +725,13 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
const Expr *Fmt = Call->getArg(FmtArgIdx);
if (auto *SL = dyn_cast<clang::StringLiteral>(Fmt->IgnoreParenImpCasts())) {
+ std::optional<Expr::StringEvalResult> SER;
StringRef FmtStr;
if (SL->getCharByteWidth() == 1)
FmtStr = SL->getString();
- else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx))
- FmtStr = *EvaledFmtStr;
+ else if ((SER = SL->tryEvaluateString(Ctx)))
+ FmtStr = SER->getString();
else
goto CHECK_UNSAFE_PTR;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fe55dfffc1cbe..d92bb977c7546 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3478,8 +3478,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
if (getDebugInfo()) {
TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
- TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
- *E->getArg(1)->tryEvaluateString(getContext()));
+ TrapLocation,
+ E->getArg(0)->tryEvaluateString(getContext())->getString(),
+ E->getArg(1)->tryEvaluateString(getContext())->getString());
}
ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
// Currently no attempt is made to prevent traps from being merged.
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 017be929ca18e..0ca41c9758af9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -180,12 +180,14 @@ static bool checkBuiltinVerboseTrap(CallExpr *Call, Sema &S) {
if (Arg->isValueDependent())
continue;
- std::optional<std::string> ArgString = Arg->tryEvaluateString(S.Context);
+ // Arguments must be pointers to constant strings, must be NUL-terminated,
+ // and cannot contain '$'.
+ bool HasNulTerminator;
+ auto ArgString = Arg->tryEvaluateString(S.Context, &HasNulTerminator);
int DiagMsgKind = -1;
- // Arguments must be pointers to constant strings and cannot use '$'.
- if (!ArgString.has_value())
+ if (!(ArgString && HasNulTerminator))
DiagMsgKind = 0;
- else if (ArgString->find('$') != std::string::npos)
+ else if (ArgString->getString().find('$') != llvm::StringRef::npos)
DiagMsgKind = 1;
if (DiagMsgKind >= 0) {
@@ -5943,7 +5945,8 @@ enum StringLiteralConstEvalResult {
};
static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL);
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
+ uint64_t &Offset);
// Determine if an expression is a string literal or constant string.
// If this function returns false on the arguments to a function expecting a
@@ -6330,8 +6333,9 @@ static StringLiteralCheckType checkFormatStringExpr(
break;
}
+ uint64_t EvalOffset = 0;
const StringLiteral *FakeLiteral = nullptr;
- switch (constEvalStringAsLiteral(S, E, FakeLiteral)) {
+ switch (constEvalStringAsLiteral(S, E, FakeLiteral, EvalOffset)) {
case SLCER_NotEvaluated:
return SLCT_NotALiteral;
@@ -6348,51 +6352,49 @@ static StringLiteralCheckType checkFormatStringExpr(
case SLCER_Evaluated:
InFunctionCall = false;
E = FakeLiteral;
+ Offset = EvalOffset;
goto tryAgain;
}
}
static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL) {
- // As a last resort, try to constant-evaluate the format string. If it
- // evaluates to a string literal in the first place, we can point to that
- // string literal in source and use that.
- Expr::EvalResult Result;
- if (E->EvaluateAsRValue(Result, S.Context) && Result.Val.isLValue()) {
- const auto *LVE = Result.Val.getLValueBase().dyn_cast<const Expr *>();
- if (auto *BaseSL = dyn_cast_or_null<StringLiteral>(LVE)) {
- SL = BaseSL;
- return SLCER_Evaluated;
- }
- }
-
- // Otherwise, try to evaluate the expression as a string constant.
- std::string FormatString;
- if (!E->tryEvaluateString(S.Context, FormatString)) {
- return FormatString.empty() ? SLCER_NotEvaluated : SLCER_NotNullTerminated;
- }
-
+constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
+ uint64_t &Offset) {
+ // As a last resort, try to constant-evaluate the format string.
+ bool HasNul;
+ auto SER = E->tryEvaluateString(S.Context, &HasNul);
+ if (!SER)
+ return SLCER_NotEvaluated;
+ if (!HasNul)
+ return SLCER_NotNullTerminated;
+
+ // If it evaluates to a string literal in the first place, we can point to
+ // that string literal in source and use that.
+ if (SER->getStringLiteral(SL, Offset))
+ return SLCER_Evaluated;
+
+ // Otherwise, lop that string into a scratch buffer, create a string literal
+ // and then go with that.
std::unique_ptr<llvm::MemoryBuffer> MemBuf;
{
llvm::SmallString<80> EscapedString;
{
llvm::raw_svector_ostream OS(EscapedString);
OS << '"';
- OS.write_escaped(FormatString);
+ OS.write_escaped(SER->getString());
OS << '"';
}
MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString),
"<scratch space>", true));
}
- // Plop that string into a scratch buffer, create a string literal and then
- // go with that.
auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile);
- QualType SLType = S.Context.getStringLiteralArrayType(S.Context.CharTy,
- FormatString.length());
- SL = StringLiteral::Create(S.Context, FormatString,
+ QualType SLType = S.Context.getStringLiteralArrayType(
+ S.Context.CharTy, SER->getString().size());
+ SL = StringLiteral::Create(S.Context, SER->getString(),
StringLiteralKind::Ordinary, false, SLType, Begin);
+ Offset = 0;
return SLCER_Evaluated;
}
@@ -6481,6 +6483,11 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
VariadicCallType CallType, SourceLocation Loc,
SourceRange Range,
llvm::SmallBitVector &CheckedVarArgs) {
+ // As a last resort, Clang attempts to evaluate the format string as a
+ // constant, which is expensive. Before we go down that route, check that
+ // the warnings are at least enabled at Loc, which in the common case points
+ // at the opening parenthesis of the function call.
+
// CHECK: printf/scanf-like function is called with no format string.
if (format_idx >= Args.size()) {
Diag(Loc, diag::warn_missing_format_string) << Range;
@@ -6493,14 +6500,9 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
//
// Dynamically generated format strings are difficult to
// automatically vet at compile time. Requiring that format strings
- // are string literals: (1) permits the checking of format strings by
- // the compiler and thereby (2) can practically remove the source of
- // many format string exploits.
-
- // Format string can be either ObjC string (e.g. @"%d") or
- // C string (e.g. "%d")
- // ObjC string uses the same format specifiers as C string, so we can use
- // the same format string checking logic for both ObjC and C strings.
+ // can evaluate to constant strings: (1) permits the checking of format
+ // strings by the compiler and thereby (2) can practically remove the source
+ // of many format string exploits.
UncoveredArgHandler UncoveredArg;
StringLiteralCheckType CT = checkFormatStringExpr(
*this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx,
diff --git a/clang/test/SemaCXX/verbose-trap.cpp b/clang/test/SemaCXX/verbose-trap.cpp
index 2503f9860d9c3..5562fb453948c 100644
--- a/clang/test/SemaCXX/verbose-trap.cpp
+++ b/clang/test/SemaCXX/verbose-trap.cpp
@@ -15,6 +15,9 @@ char const constMsg3[] = "hello";
template <const char * const category, const char * const reason>
void f(const char * arg) {
+ const char buf[] = {'a', 'b', 'c'};
+ const char buf_nt1[] = {'a', 'b', 'c', 0};
+ const char buf_nt2[] = {'a', 'b', 0, 'c'};
__builtin_verbose_trap("cat1", "Arbitrary string literals can be used!");
__builtin_verbose_trap(" cat1 ", "Argument_must_not_be_null");
__builtin_verbose_trap("cat" "egory1", "hello" "world");
@@ -24,9 +27,12 @@ void f(const char * arg) {
__builtin_verbose_trap(); // expected-error {{too few arguments}}
__builtin_verbose_trap(""); // expected-error {{too few arguments}}
__builtin_verbose_trap("", "", ""); // expected-error {{too many arguments}}
- __builtin_verbose_trap("", 0); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant string}}
+ __builtin_verbose_trap("", 0); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant NUL-terminated string}}
__builtin_verbose_trap(1, ""); // expected-error {{cannot initialize a parameter of type 'const char *' with an rvalue of type 'int'}}
- __builtin_verbose_trap(arg, ""); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant string}}
+ __builtin_verbose_trap(arg, ""); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant NUL-terminated string}}
+ __builtin_verbose_trap(buf, ""); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant NUL-terminated string}}
+ __builtin_verbose_trap(buf_nt1, "");
+ __builtin_verbose_trap(buf_nt2, "");
__builtin_verbose_trap("cat$1", "hel$lo"); // expected-error 2 {{argument to __builtin_verbose_trap must not contain $}}
__builtin_verbose_trap(category, reason);
__builtin_verbose_trap(u8"cat1", u8"hello");
>From 504cdb609d50414c6825a0ced8afb3a91146d4c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= <fcloutier at apple.com>
Date: Tue, 22 Apr 2025 14:12:17 -0700
Subject: [PATCH 3/3] Address naming/documentation-level review feedback
---
clang/docs/ReleaseNotes.rst | 35 +++++++++++++++++--
.../clang/Basic/DiagnosticSemaKinds.td | 2 +-
clang/lib/AST/ExprConstant.cpp | 11 +++---
clang/lib/Sema/SemaChecking.cpp | 25 ++++++++-----
4 files changed, 57 insertions(+), 16 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 05566d66a65d2..8529e5b991cba 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,8 +265,39 @@ related warnings within the method body.
``format_matches`` accepts an example valid format string as its third
argument. For more information, see the Clang attributes documentation.
-- Format string checking now supports the compile-time evaluation of format
- strings as a fallback mechanism.
+- Clang can now verify format strings that can be constant-folded even if they
+ do not resolve to a string literal. For instance, all of these can now be
+ verified:
+
+ .. code-block:: c++
+
+ const char format[] = {'h', 'e', 'l', 'l', 'o', ' ', '%', 's', 0};
+ printf(format, "world");
+ // no warning
+
+ printf(format, 123);
+ // warning: format specifies type 'char *' but the argument has type 'int'
+
+ printf(("%"s + "i"s).c_str(), "world");
+ // warning: format specifies type 'int' but the argument has type 'char *'
+
+ When the format expression does not evaluate to a string literal, Clang
+ points diagnostics into a pseudo-file called ``<scratch space>`` that contains
+ the format string literal as it evaluated, like so:
+
+ .. code-block:: text
+
+ example.c:6:17: warning: format specifies type 'char *' but the argument has type 'int' [-Wformat]
+ 6 | printf(format, 123);
+ | ~~~~~~ ^~~
+ <scratch space>:1:4: note: format string resolved to a constant string
+ 1 | "hello %s"
+ | ^~
+ | %d
+
+ This may mean that format strings which were previously unverified (or which
+ triggered ``-Wformat-nonliteral``) are now verified by ``-Wformat`` and its
+ allies.
- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
fine-grained control over atomic code generation on a per-statement basis.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e3b73c149b3d5..57287bb8e0115 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10171,7 +10171,7 @@ def warn_format_bool_as_character : Warning<
InGroup<Format>;
def note_format_string_defined : Note<"format string is defined here">;
def note_format_string_evaluated_to : Note<
- "format string was constant-evaluated">;
+ "format string resolved to a constant string">;
def note_format_fix_specifier : Note<"did you mean to use '%0'?">;
def note_printf_c_str: Note<"did you mean to call the %0 method?">;
def note_format_security_fixit: Note<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6c5e8260131bb..e5f41a9f9b294 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -17974,14 +17974,17 @@ static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E,
String.addArray(Info, E, CAT);
else
String.addUnsizedArray(Info, E, CharTy);
- } else if (Ty->hasPointerRepresentation()) {
+ return true;
+ }
+
+ if (Ty->hasPointerRepresentation()) {
if (!EvaluatePointer(E, String, Info))
return false;
CharTy = Ty->getPointeeType();
- } else {
- return false;
+ return true;
}
- return true;
+
+ return false;
}
static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0ca41c9758af9..6184fe384fee5 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5938,15 +5938,22 @@ static void CheckFormatString(
llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg,
bool IgnoreStringsWithoutSpecifiers);
-enum StringLiteralConstEvalResult {
+enum StringLiteralConstantEvaluationResult {
SLCER_NotEvaluated,
SLCER_NotNullTerminated,
SLCER_Evaluated,
};
-static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
- uint64_t &Offset);
+/// Attempt to fold \c E into a constant string that \c checkFormatStringExpr
+/// can use. If \c E folds to a string literal, that string literal will be used
+/// for diagnostics. If \c E has a constant string value but it does not fold to
+/// a literal (for instance, ("%"s + "i"s).c_str() constant-folds to "%i"), a
+/// <scratch space> pseudo-source file will be allocated, containing a string
+/// literal representation of the constant string, and format diagnostics will
+/// point to it.
+static StringLiteralConstantEvaluationResult
+EvaluateStringAndCreateLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
+ uint64_t &Offset);
// Determine if an expression is a string literal or constant string.
// If this function returns false on the arguments to a function expecting a
@@ -6335,7 +6342,7 @@ static StringLiteralCheckType checkFormatStringExpr(
uint64_t EvalOffset = 0;
const StringLiteral *FakeLiteral = nullptr;
- switch (constEvalStringAsLiteral(S, E, FakeLiteral, EvalOffset)) {
+ switch (EvaluateStringAndCreateLiteral(S, E, FakeLiteral, EvalOffset)) {
case SLCER_NotEvaluated:
return SLCT_NotALiteral;
@@ -6357,9 +6364,9 @@ static StringLiteralCheckType checkFormatStringExpr(
}
}
-static StringLiteralConstEvalResult
-constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
- uint64_t &Offset) {
+static StringLiteralConstantEvaluationResult
+EvaluateStringAndCreateLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
+ uint64_t &Offset) {
// As a last resort, try to constant-evaluate the format string.
bool HasNul;
auto SER = E->tryEvaluateString(S.Context, &HasNul);
@@ -6388,7 +6395,7 @@ constEvalStringAsLiteral(Sema &S, const Expr *E, const StringLiteral *&SL,
"<scratch space>", true));
}
- auto ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
+ FileID ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf));
SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile);
QualType SLType = S.Context.getStringLiteralArrayType(
S.Context.CharTy, SER->getString().size());
More information about the cfe-commits
mailing list