[clang] [-Wunterminated-string-initialization] Handle C string literals ending with explicit '\0' (PR #143487)
Ziqing Luo via cfe-commits
cfe-commits at lists.llvm.org
Sun Jun 22 22:14:12 PDT 2025
https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/143487
>From 5825b324e47c78a939d8e62d1101e1574fd04638 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Tue, 10 Jun 2025 15:50:10 +0800
Subject: [PATCH 1/7] [-Wunterminated-string-initialization] Handle C string
literals ending with explicit '\0'
In C, a char array needs no "nonstring" attribute, if its initializer
is a string literal that 1) explicitly ends with '\0' and 2) fits in
the array after a possible truncation.
For example
`char a[4] = "ABC\0"; // fine, needs no "nonstring" attr`
rdar://152506883
---
clang/lib/Sema/SemaInit.cpp | 5 +++++
clang/test/Sema/attr-nonstring_safe.c | 28 +++++++++++++++++++++++++++
2 files changed, 33 insertions(+)
create mode 100644 clang/test/Sema/attr-nonstring_safe.c
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index da56225b2f926..f7592688e0327 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -260,6 +260,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
diag::ext_initializer_string_for_char_array_too_long)
<< Str->getSourceRange();
else if (StrLength - 1 == ArrayLen) {
+ // If the string literal is null-terminated explicitly, e.g., `char a[4] =
+ // "ABC\0"`, there should be no warn:
+ if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()))
+ if (SL->isOrdinary() && SL->getBytes().back() == 0)
+ return;
// If the entity being initialized has the nonstring attribute, then
// silence the "missing nonstring" diagnostic. If there's no entity,
// check whether we're initializing an array of arrays; if so, walk the
diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c
new file mode 100644
index 0000000000000..3ea441e033dba
--- /dev/null
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c
+// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c++
+
+
+// In C, the following examples are fine:
+#if __cplusplus
+char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+
+struct S {
+ char buf[3];
+ char fub[3];
+} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+
+signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+
+#else
+//expected-no-diagnostics
+char foo[3] = "fo\0";
+
+struct S {
+ char buf[3];
+ char fub[3];
+} s = { "ba\0", "bo\0" };
+
+signed char scfoo[3] = "fo\0";
+unsigned char ucfoo[3] = "fo\0";
+#endif
>From afb909341d4c5152f1d5ac6f2deef5385901bb61 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 11 Jun 2025 14:47:30 +0800
Subject: [PATCH 2/7] address comments
---
clang/lib/Sema/SemaInit.cpp | 9 +++---
clang/test/Sema/attr-nonstring_safe.c | 45 +++++++++++++++++++++------
2 files changed, 40 insertions(+), 14 deletions(-)
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index f7592688e0327..ac611aed6d581 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -261,10 +261,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
<< Str->getSourceRange();
else if (StrLength - 1 == ArrayLen) {
// If the string literal is null-terminated explicitly, e.g., `char a[4] =
- // "ABC\0"`, there should be no warn:
- if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()))
- if (SL->isOrdinary() && SL->getBytes().back() == 0)
- return;
+ // "ABC\0"`, there should be no warning:
+ if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens());
+ SL && SL->getLength() > 0 &&
+ SL->getCodeUnit(SL->getLength() - 1) == 0)
+ return;
// If the entity being initialized has the nonstring attribute, then
// silence the "missing nonstring" diagnostic. If there's no entity,
// check whether we're initializing an array of arrays; if so, walk the
diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c
index 3ea441e033dba..93715d18db5a8 100644
--- a/clang/test/Sema/attr-nonstring_safe.c
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -1,28 +1,53 @@
// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c
-// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c++
+// RUN: %clang_cc1 -fsyntax-only -verify=cxx,expected -Wunterminated-string-initialization %s -x c++
-// In C, the following examples are fine:
-#if __cplusplus
-char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+#ifdef __cplusplus
+// C++ is stricter so the following cases should be warned about:
+
+char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}}
struct S {
char buf[3];
char fub[3];
-} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-
-signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
+char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
#else
-//expected-no-diagnostics
-char foo[3] = "fo\0";
+
+// In C, the following examples are fine:
+#include <stddef.h>
+typedef unsigned short char16_t;
+typedef unsigned int char32_t;
+
+char foo3[3] = "fo\0";
+char foo1[1] = "\0";
struct S {
char buf[3];
char fub[3];
} s = { "ba\0", "bo\0" };
+// Test different encodings:
signed char scfoo[3] = "fo\0";
unsigned char ucfoo[3] = "fo\0";
+wchar_t wcfoo[3] = L"fo\0";
+char16_t c16foo[3] = u"fo\0";
+char32_t c32foo[3] = U"fo\0";
+
+// Test list initializer:
+signed char scfoo_lst[3] = {'f', 'o', '\0'};
+unsigned char ucfoo_lst[3] = {'f', 'o', '\0'};
+wchar_t wcfoo_lst[3] = {L'f', L'o', L'\0'};
+char16_t c16foo_lst[3] = {u'f', u'o', u'\0'};
+char32_t c32foo_lst[3] = {U'f', U'o', U'\0'};
+
+// Declaring an array of size 0 is invalid by C standard but compilers
+// may allow it:
+char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}}
#endif
>From fca602a4c18fdfe1ada285ea096e3436d7fa8253 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 11 Jun 2025 14:53:19 +0800
Subject: [PATCH 3/7] add release notes
---
clang/docs/ReleaseNotes.rst | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index beed0da6883d6..13e4db89e5dc8 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -220,6 +220,7 @@ C Language Changes
char buf1[3] = "foo"; // -Wunterminated-string-initialization
char buf2[3] = "flarp"; // -Wexcess-initializers
+ char buf3[3] = "fo\0"; // This is fine, no warning.
This diagnostic can be suppressed by adding the new ``nonstring`` attribute
to the field or variable being initialized. #GH137705
>From 6cfbbb36b0b79ab72f7d2772723a429fa996156a Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Thu, 12 Jun 2025 11:43:03 +0800
Subject: [PATCH 4/7] remove '#include <stddef.h>'
---
clang/test/Sema/attr-nonstring_safe.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c
index 93715d18db5a8..68e153d90cb42 100644
--- a/clang/test/Sema/attr-nonstring_safe.c
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -21,9 +21,9 @@ char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array i
#else
// In C, the following examples are fine:
-#include <stddef.h>
typedef unsigned short char16_t;
typedef unsigned int char32_t;
+typedef int wchar_t;
char foo3[3] = "fo\0";
char foo1[1] = "\0";
>From ebb85743af08f4547a7416bb6e1098a899ca1b5b Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Fri, 13 Jun 2025 10:12:19 +0800
Subject: [PATCH 5/7] fix test
---
clang/test/Sema/attr-nonstring_safe.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c
index 68e153d90cb42..f3f3b3e0758d7 100644
--- a/clang/test/Sema/attr-nonstring_safe.c
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -23,7 +23,7 @@ char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array i
// In C, the following examples are fine:
typedef unsigned short char16_t;
typedef unsigned int char32_t;
-typedef int wchar_t;
+typedef unsigned int wchar_t;
char foo3[3] = "fo\0";
char foo1[1] = "\0";
>From 40d654385da4157cff8dd666e5d9ca7b721d5702 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Fri, 13 Jun 2025 15:49:08 +0800
Subject: [PATCH 6/7] fix test
---
clang/test/Sema/attr-nonstring_safe.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c
index f3f3b3e0758d7..86e5441520456 100644
--- a/clang/test/Sema/attr-nonstring_safe.c
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -23,7 +23,7 @@ char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array i
// In C, the following examples are fine:
typedef unsigned short char16_t;
typedef unsigned int char32_t;
-typedef unsigned int wchar_t;
+typedef __WCHAR_TYPE__ wchar_t;
char foo3[3] = "fo\0";
char foo1[1] = "\0";
>From 5239320b1f2ad25ba22306e434898126f6dafbd2 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Mon, 23 Jun 2025 13:13:07 +0800
Subject: [PATCH 7/7] address comments
---
clang/lib/Sema/SemaInit.cpp | 58 +++++++++++++-------------
clang/test/Sema/attr-nonstring_safe.c | 59 +++++++++++++--------------
2 files changed, 58 insertions(+), 59 deletions(-)
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index ac611aed6d581..0844cb4d6c3cd 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -260,35 +260,37 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
diag::ext_initializer_string_for_char_array_too_long)
<< Str->getSourceRange();
else if (StrLength - 1 == ArrayLen) {
- // If the string literal is null-terminated explicitly, e.g., `char a[4] =
- // "ABC\0"`, there should be no warning:
- if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens());
- SL && SL->getLength() > 0 &&
- SL->getCodeUnit(SL->getLength() - 1) == 0)
- return;
- // If the entity being initialized has the nonstring attribute, then
- // silence the "missing nonstring" diagnostic. If there's no entity,
- // check whether we're initializing an array of arrays; if so, walk the
- // parents to find an entity.
- auto FindCorrectEntity =
- [](const InitializedEntity *Entity) -> const ValueDecl * {
- while (Entity) {
- if (const ValueDecl *VD = Entity->getDecl())
- return VD;
- if (!Entity->getType()->isArrayType())
- return nullptr;
- Entity = Entity->getParent();
- }
-
- return nullptr;
- };
- if (const ValueDecl *D = FindCorrectEntity(&Entity);
- !D || !D->hasAttr<NonStringAttr>())
- S.Diag(
- Str->getBeginLoc(),
- diag::warn_initializer_string_for_char_array_too_long_no_nonstring)
- << ArrayLen << StrLength << Str->getSourceRange();
+ // In C, if the string literal is null-terminated explicitly, e.g., `char
+ // a[4] = "ABC\0"`, there should be no warning:
+ const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens());
+ bool IsSLSafe = SL && SL->getLength() > 0 &&
+ SL->getCodeUnit(SL->getLength() - 1) == 0;
+
+ if (!IsSLSafe) {
+ // If the entity being initialized has the nonstring attribute, then
+ // silence the "missing nonstring" diagnostic. If there's no entity,
+ // check whether we're initializing an array of arrays; if so, walk the
+ // parents to find an entity.
+ auto FindCorrectEntity =
+ [](const InitializedEntity *Entity) -> const ValueDecl * {
+ while (Entity) {
+ if (const ValueDecl *VD = Entity->getDecl())
+ return VD;
+ if (!Entity->getType()->isArrayType())
+ return nullptr;
+ Entity = Entity->getParent();
+ }
+ return nullptr;
+ };
+ if (const ValueDecl *D = FindCorrectEntity(&Entity);
+ !D || !D->hasAttr<NonStringAttr>())
+ S.Diag(
+ Str->getBeginLoc(),
+ diag::
+ warn_initializer_string_for_char_array_too_long_no_nonstring)
+ << ArrayLen << StrLength << Str->getSourceRange();
+ }
// Always emit the C++ compatibility diagnostic.
S.Diag(Str->getBeginLoc(),
diag::warn_initializer_string_for_char_array_too_long_for_cpp)
diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c
index 86e5441520456..b59e2bfc7f691 100644
--- a/clang/test/Sema/attr-nonstring_safe.c
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -1,9 +1,14 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c
-// RUN: %clang_cc1 -fsyntax-only -verify=cxx,expected -Wunterminated-string-initialization %s -x c++
+// RUN: %clang_cc1 -fsyntax-only -verify=compat,expected -Wunterminated-string-initialization %s -x c
+// RUN: %clang_cc1 -fsyntax-only -verify=cxx -Wunterminated-string-initialization %s -x c++
+#ifndef __cplusplus
+typedef unsigned short char16_t;
+typedef unsigned int char32_t;
+typedef __WCHAR_TYPE__ wchar_t;
+#endif
-#ifdef __cplusplus
-// C++ is stricter so the following cases should be warned about:
+// C++ is stricter so the following cases should be warned about. In
+// C, the following examples are fine.
char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}}
@@ -13,32 +18,23 @@ struct S {
char fub[3];
} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
-#else
-
-// In C, the following examples are fine:
-typedef unsigned short char16_t;
-typedef unsigned int char32_t;
-typedef __WCHAR_TYPE__ wchar_t;
-
-char foo3[3] = "fo\0";
-char foo1[1] = "\0";
-
-struct S {
- char buf[3];
- char fub[3];
-} s = { "ba\0", "bo\0" };
-
+#pragma clang diagnostic push
+#pragma clang diagnostic warning "-Wc++-compat"
// Test different encodings:
-signed char scfoo[3] = "fo\0";
-unsigned char ucfoo[3] = "fo\0";
-wchar_t wcfoo[3] = L"fo\0";
-char16_t c16foo[3] = u"fo\0";
-char32_t c32foo[3] = U"fo\0";
+signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}}
+unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}}
+wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{identifier 'wchar_t' conflicts with a C++ keyword}}
+char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{identifier 'char16_t' conflicts with a C++ keyword}}
+char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
+ compat-warning {{identifier 'char32_t' conflicts with a C++ keyword}}
+#pragma clang diagnostic pop
// Test list initializer:
signed char scfoo_lst[3] = {'f', 'o', '\0'};
@@ -49,5 +45,6 @@ char32_t c32foo_lst[3] = {U'f', U'o', U'\0'};
// Declaring an array of size 0 is invalid by C standard but compilers
// may allow it:
-char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}}
-#endif
+char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
+ cxx-error {{initializer-string for char array is too long, array size is 0 but initializer has size 1 (including the null terminating character)}}
+char b[1] = ""; // no warn
More information about the cfe-commits
mailing list