[clang] Fix incorrect array initialization with string literal (fixes #112189) (PR #156846)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 5 22:51:39 PDT 2025
https://github.com/awson updated https://github.com/llvm/llvm-project/pull/156846
>From 5585fdea6e545aebc3ee9919c5da74e2e7397a35 Mon Sep 17 00:00:00 2001
From: awson <kyrab at mail.ru>
Date: Fri, 5 Sep 2025 06:45:53 +0300
Subject: [PATCH] `updateStringLiteralType` recursively clones subexpressions
which types it modifies.
Done as a separate static function and not inlined directly into the `updateStringLiteralType` function for maintainability.
---
clang/lib/Sema/SemaInit.cpp | 121 +++++++++++++++++++++++++++-----
clang/test/SemaCXX/GH112189.cpp | 41 +++++++++++
2 files changed, 143 insertions(+), 19 deletions(-)
create mode 100644 clang/test/SemaCXX/GH112189.cpp
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index c97129336736b..162ed8d54093e 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -168,9 +168,87 @@ bool Sema::IsStringInit(Expr *Init, const ArrayType *AT) {
return ::IsStringInit(Init, AT, Context) == SIF_None;
}
+static StringLiteral *CloneStringLiteral(const StringLiteral *SL,
+ ASTContext &C) {
+ SourceLocation *SLocs = new (C) SourceLocation[SL->getNumConcatenated()];
+ std::copy(SL->tokloc_begin(), SL->tokloc_end(), SLocs);
+ return StringLiteral::Create(
+ C, SL->getBytes(), SL->getKind(), SL->isPascal(), SL->getType(),
+ ArrayRef<SourceLocation>(SLocs, SL->getNumConcatenated()));
+}
+
+// Exactly follow `IgnoreParensSingleStep` (`AST/IgnoreExpr.h`)
+// We only clone those subexpressions which `IgnoreParensSingleStep` drills down
+// to.
+static Expr *CloneDrilled(Expr *E, ASTContext &C) {
+ if (auto *SL = dyn_cast<StringLiteral>(E)) {
+ return CloneStringLiteral(SL, C);
+ }
+
+ if (auto *PE = dyn_cast<ParenExpr>(E)) {
+ return new (C) ParenExpr(PE->getBeginLoc(), PE->getEndLoc(),
+ CloneDrilled(PE->getSubExpr(), C));
+ }
+
+ if (auto *UO = dyn_cast<UnaryOperator>(E)) {
+ if (UO->getOpcode() == UO_Extension) {
+ return UnaryOperator::Create(
+ C, CloneDrilled(UO->getSubExpr(), C), UO_Extension, UO->getType(),
+ UO->getValueKind(), UO->getObjectKind(), UO->getBeginLoc(),
+ UO->canOverflow(), UO->getFPOptionsOverride());
+ }
+ }
+
+ else if (auto *GSE = dyn_cast<GenericSelectionExpr>(E)) {
+ if (!GSE->isResultDependent()) {
+ ArrayRef<Expr *> GSEAEs = GSE->getAssocExprs();
+ Expr **NewGSEAEs = new (C) Expr *[GSEAEs.size()];
+ std::copy(GSEAEs.begin(), GSEAEs.end(), NewGSEAEs);
+ NewGSEAEs[GSE->getResultIndex()] = CloneDrilled(GSE->getResultExpr(), C);
+
+#define CREATE_GSE(ctrl) \
+ GenericSelectionExpr::Create( \
+ C, GSE->getGenericLoc(), ctrl, GSE->getAssocTypeSourceInfos(), \
+ ArrayRef<Expr *>(NewGSEAEs, GSEAEs.size()), GSE->getDefaultLoc(), \
+ GSE->getRParenLoc(), GSE->containsUnexpandedParameterPack(), \
+ GSE->getResultIndex())
+
+ return GSE->isExprPredicate() ? CREATE_GSE(GSE->getControllingExpr())
+ : CREATE_GSE(GSE->getControllingType());
+#undef CREATE_GSE
+ }
+ }
+
+ else if (auto *CE = dyn_cast<ChooseExpr>(E)) {
+ if (!CE->isConditionDependent()) {
+ // Drills to `CE->getChosenSubExpr()`
+ const bool isCondTrue = CE->isConditionTrue();
+ return new (C)
+ ChooseExpr(CE->getBeginLoc(), CE->getCond(),
+ isCondTrue ? CloneDrilled(CE->getLHS(), C) : CE->getLHS(),
+ isCondTrue ? CE->getRHS() : CloneDrilled(CE->getRHS(), C),
+ CE->getType(), CE->getValueKind(), CE->getObjectKind(),
+ CE->getRParenLoc(), CE->isConditionTrue());
+ }
+ }
+
+ else if (auto *PE = dyn_cast<PredefinedExpr>(E)) {
+ if (PE->isTransparent() && PE->getFunctionName()) {
+ return PredefinedExpr::Create(
+ C, PE->getLocation(), PE->getType(), PE->getIdentKind(),
+ PE->isTransparent(), CloneStringLiteral(PE->getFunctionName(), C));
+ }
+ }
+
+ return E;
+}
+
/// Update the type of a string literal, including any surrounding parentheses,
/// to match the type of the object which it is initializing.
-static void updateStringLiteralType(Expr *E, QualType Ty) {
+static Expr *updateStringLiteralType(Expr *E, QualType Ty, Sema &S) {
+ Expr *ENew = CloneDrilled(E, S.Context);
+ E = ENew;
+
while (true) {
E->setType(Ty);
E->setValueKind(VK_PRValue);
@@ -178,6 +256,7 @@ static void updateStringLiteralType(Expr *E, QualType Ty) {
break;
E = IgnoreParensSingleStep(E);
}
+ return ENew;
}
/// Fix a compound literal initializing an array so it's correctly marked
@@ -209,9 +288,9 @@ static bool initializingConstexprVariable(const InitializedEntity &Entity) {
static void CheckC23ConstexprInitStringLiteral(const StringLiteral *SE,
Sema &SemaRef, QualType &TT);
-static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
- Sema &S, const InitializedEntity &Entity,
- bool CheckC23ConstexprInit = false) {
+static Expr *CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
+ Sema &S, const InitializedEntity &Entity,
+ bool CheckC23ConstexprInit = false) {
// Get the length of the string as parsed.
auto *ConstantArrayTy =
cast<ConstantArrayType>(Str->getType()->getAsArrayTypeUnsafe());
@@ -228,8 +307,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
// Return a new array type (C99 6.7.8p22).
DeclT = S.Context.getConstantArrayType(
IAT->getElementType(), ConstVal, nullptr, ArraySizeModifier::Normal, 0);
- updateStringLiteralType(Str, DeclT);
- return;
+ return updateStringLiteralType(Str, DeclT, S);
}
const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
@@ -302,7 +380,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
// something like:
// char x[1] = "foo";
// then this will set the string literal's type to char[1].
- updateStringLiteralType(Str, DeclT);
+ return updateStringLiteralType(Str, DeclT, S);
}
void emitUninitializedExplicitInitFields(Sema &S, const RecordDecl *R) {
@@ -1608,10 +1686,12 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
// FIXME: Should we do this checking in verify-only mode?
- if (!VerifyOnly)
- CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity,
- SemaRef.getLangOpts().C23 &&
- initializingConstexprVariable(Entity));
+ if (!VerifyOnly) {
+ expr = CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity,
+ SemaRef.getLangOpts().C23 &&
+ initializingConstexprVariable(Entity));
+ IList->setInit(Index, expr);
+ }
if (StructuredList)
UpdateStructuredListElement(StructuredList, StructuredIndex, expr);
++Index;
@@ -2121,10 +2201,13 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity,
// because doing so would involve allocating one character
// constant for each string.
// FIXME: Should we do these checks in verify-only mode too?
- if (!VerifyOnly)
- CheckStringInit(
- IList->getInit(Index), DeclType, arrayType, SemaRef, Entity,
- SemaRef.getLangOpts().C23 && initializingConstexprVariable(Entity));
+ if (!VerifyOnly) {
+ IList->setInit(
+ Index, CheckStringInit(IList->getInit(Index), DeclType, arrayType,
+ SemaRef, Entity,
+ SemaRef.getLangOpts().C23 &&
+ initializingConstexprVariable(Entity)));
+ }
if (StructuredList) {
UpdateStructuredListElement(StructuredList, StructuredIndex,
IList->getInit(Index));
@@ -8421,10 +8504,10 @@ ExprResult InitializationSequence::Perform(Sema &S,
case SK_StringInit: {
QualType Ty = Step->Type;
bool UpdateType = ResultType && Entity.getType()->isIncompleteArrayType();
- CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
- S.Context.getAsArrayType(Ty), S, Entity,
- S.getLangOpts().C23 &&
- initializingConstexprVariable(Entity));
+ CurInit = CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
+ S.Context.getAsArrayType(Ty), S, Entity,
+ S.getLangOpts().C23 &&
+ initializingConstexprVariable(Entity));
break;
}
diff --git a/clang/test/SemaCXX/GH112189.cpp b/clang/test/SemaCXX/GH112189.cpp
new file mode 100644
index 0000000000000..036fd8ea83c0e
--- /dev/null
+++ b/clang/test/SemaCXX/GH112189.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -verify %s
+// expected-no-diagnostics
+
+template<unsigned int SPACE>
+char foo_choose() {
+ char buffer[SPACE] {__builtin_choose_expr(6, "foo", "boo")};
+ return buffer[0];
+}
+
+int boro_choose()
+{
+ int r = foo_choose<10>();
+ r += foo_choose<100>();
+ return r + foo_choose<4>();
+}
+
+template<unsigned int SPACE>
+char foo_gen_ext() {
+ char buffer[SPACE] {__extension__ (_Generic(0, int: (__extension__ "foo" )))};
+ return buffer[0];
+}
+
+int boro_gen_ext()
+{
+ int r = foo_gen_ext<10>();
+ r += foo_gen_ext<100>();
+ return r + foo_gen_ext<4>();
+}
+
+template<unsigned int SPACE>
+char foo_paren_predef() {
+ char buffer[SPACE] {(((__FILE__)))};
+ return buffer[0];
+}
+
+int boro_paren_predef()
+{
+ int r = foo_paren_predef<200000>();
+ r += foo_paren_predef<300000>();
+ return r + foo_paren_predef<100000>();
+}
More information about the cfe-commits
mailing list