[clang] [Clang] improve -Wstring-concatenation to warn on every missing comma in initializer lists (PR #154018)
Oleksandr T. via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 18 04:07:06 PDT 2025
https://github.com/a-tarasyuk updated https://github.com/llvm/llvm-project/pull/154018
>From d9cd8679c7594da4dc31ca736a439b113a1239d6 Mon Sep 17 00:00:00 2001
From: Oleksandr Tarasiuk <oleksandr.tarasiuk at outlook.com>
Date: Sun, 17 Aug 2025 16:00:48 +0300
Subject: [PATCH 1/2] [Clang] improve -Wstring-concatenation to warn on every
missing comma in initializer lists
---
clang/docs/ReleaseNotes.rst | 2 ++
clang/lib/Sema/SemaDecl.cpp | 45 +++++++++++++++++++--------------
clang/test/Sema/string-concat.c | 14 ++++++++++
3 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b35f4ea42818a..48575b6e821f0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -162,6 +162,8 @@ Improvements to Clang's diagnostics
an override of a virtual method.
- Fixed fix-it hint for fold expressions. Clang now correctly places the suggested right
parenthesis when diagnosing malformed fold expressions. (#GH151787)
+- ``-Wstring-concatenation`` now diagnoses every missing comma in an initializer list,
+ rather than stopping after the first. (#GH153745)
- Fixed an issue where emitted format-signedness diagnostics were not associated with an appropriate
diagnostic id. Besides being incorrect from an API standpoint, this was user visible, e.g.:
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8ddbaf34a7f47..b63c157d81dc8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14708,7 +14708,16 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
isa<InitListExpr>(var->getInit())) {
const auto *ILE = cast<InitListExpr>(var->getInit());
unsigned NumInits = ILE->getNumInits();
- if (NumInits > 2)
+ if (NumInits > 2) {
+ auto concatenatedPartsAt = [&](unsigned Index) -> unsigned {
+ const Expr *E = ILE->getInit(Index);
+ if (E) {
+ if (const auto *S = dyn_cast<StringLiteral>(E->IgnoreImpCasts()))
+ return S->getNumConcatenated();
+ }
+ return 0;
+ };
+
for (unsigned I = 0; I < NumInits; ++I) {
const auto *Init = ILE->getInit(I);
if (!Init)
@@ -14721,24 +14730,23 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
// Diagnose missing comma in string array initialization.
// Do not warn when all the elements in the initializer are concatenated
// together. Do not warn for macros too.
- if (NumConcat == 2 && !SL->getBeginLoc().isMacroID()) {
- bool OnlyOneMissingComma = true;
- for (unsigned J = I + 1; J < NumInits; ++J) {
- const auto *Init = ILE->getInit(J);
- if (!Init)
- break;
- const auto *SLJ = dyn_cast<StringLiteral>(Init->IgnoreImpCasts());
- if (!SLJ || SLJ->getNumConcatenated() > 1) {
- OnlyOneMissingComma = false;
- break;
- }
- }
+ if (NumConcat == 2) {
+ if (SL->getBeginLoc().isMacroID())
+ continue;
+
+ unsigned L = I > 0 ? concatenatedPartsAt(I - 1) : 0;
+ unsigned R = I + 1 < NumInits ? concatenatedPartsAt(I + 1) : 0;
+
+ // Skip neighbors with multi-part concatenations.
+ if (L > 1 || R > 1)
+ continue;
- if (OnlyOneMissingComma) {
+ // Diagnose when at least one neighbor is a single literal.
+ if (L || R) {
SmallVector<FixItHint, 1> Hints;
- for (unsigned i = 0; i < NumConcat - 1; ++i)
- Hints.push_back(FixItHint::CreateInsertion(
- PP.getLocForEndOfToken(SL->getStrTokenLoc(i)), ","));
+ // Insert a comma between the two tokens of this element.
+ Hints.push_back(FixItHint::CreateInsertion(
+ PP.getLocForEndOfToken(SL->getStrTokenLoc(0)), ", "));
Diag(SL->getStrTokenLoc(1),
diag::warn_concatenated_literal_array_init)
@@ -14746,10 +14754,9 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
Diag(SL->getBeginLoc(),
diag::note_concatenated_string_literal_silence);
}
- // In any case, stop now.
- break;
}
}
+ }
}
diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c
index 63abf100c020f..ae6c0897878a7 100644
--- a/clang/test/Sema/string-concat.c
+++ b/clang/test/Sema/string-concat.c
@@ -168,3 +168,17 @@ const char *extra_parens_to_suppress_warning[] = {
"promise"),
"shared_future"
};
+
+const char *multiple_missing_commas[] = {
+ "1",
+ "2" // expected-note {{place parentheses around the string literal to silence warning}}
+ "3", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}}
+ "4",
+ "5",
+ "6" // expected-note {{place parentheses around the string literal to silence warning}}
+ "7", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}}
+ "8",
+ "9",
+ "10",
+ "11",
+};
>From b62c1448055b380e2ca1a2d1895f23e3a7aa6988 Mon Sep 17 00:00:00 2001
From: Oleksandr Tarasiuk <oleksandr.tarasiuk at outlook.com>
Date: Mon, 18 Aug 2025 14:06:24 +0300
Subject: [PATCH 2/2] add test for multiple consecutive missing commas
---
clang/lib/Sema/SemaDecl.cpp | 8 +++-----
clang/test/Sema/string-concat.c | 16 +++++++++++++++-
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index b63c157d81dc8..98485cf9e72be 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14710,11 +14710,9 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
unsigned NumInits = ILE->getNumInits();
if (NumInits > 2) {
auto concatenatedPartsAt = [&](unsigned Index) -> unsigned {
- const Expr *E = ILE->getInit(Index);
- if (E) {
+ if (const Expr *E = ILE->getInit(Index))
if (const auto *S = dyn_cast<StringLiteral>(E->IgnoreImpCasts()))
return S->getNumConcatenated();
- }
return 0;
};
@@ -14738,11 +14736,11 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
unsigned R = I + 1 < NumInits ? concatenatedPartsAt(I + 1) : 0;
// Skip neighbors with multi-part concatenations.
- if (L > 1 || R > 1)
+ if (R > 1)
continue;
// Diagnose when at least one neighbor is a single literal.
- if (L || R) {
+ if (R == 1 || L == 1) {
SmallVector<FixItHint, 1> Hints;
// Insert a comma between the two tokens of this element.
Hints.push_back(FixItHint::CreateInsertion(
diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c
index ae6c0897878a7..4b52a74116b49 100644
--- a/clang/test/Sema/string-concat.c
+++ b/clang/test/Sema/string-concat.c
@@ -169,7 +169,7 @@ const char *extra_parens_to_suppress_warning[] = {
"shared_future"
};
-const char *multiple_missing_commas[] = {
+const char *multiple_missing_commas1[] = {
"1",
"2" // expected-note {{place parentheses around the string literal to silence warning}}
"3", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}}
@@ -182,3 +182,17 @@ const char *multiple_missing_commas[] = {
"10",
"11",
};
+
+const char *multiple_missing_commas2[] = {
+ "1",
+ "2"
+ "3"
+ "4"
+ "5",
+ "6" // expected-note {{place parentheses around the string literal to silence warning}}
+ "7", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}}
+ "8",
+ "9",
+ "10",
+ "11",
+};
More information about the cfe-commits
mailing list