[clang] Fix/interp init list unnamed bitfields (PR #87799)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 5 09:05:17 PDT 2024
https://github.com/sethp created https://github.com/llvm/llvm-project/pull/87799
Previously, types like this one would be mis-initialized:
```c++
struct S {
unsigned : 12;
unsigned f : 8;
}
constexpr S s = { 1 };
```
Because, although the unnamed bit-field has an index in the Record, it
does not actually take any initializer values from an expression such
as the above.
This one's for @tbaederr: it's broken out from the work I was doing experimenting with the bit-field bit-casting tests, since it seems mostly self-contained.
While separating it out, I had the question of "will iterating the Inits in order always work?": it will for C++, I think, but I'm not so sure about C. I wrote a test to find out (63587aee7d6bfde09fdd8e4d0699a6c485589274), and I manually-fuzzed my way into a failure for an unrelated reason that seems C-specific (a similar construct in `records.cpp` worked just fine).
I was curious what you'd like to do: hold here awaiting a fix for the underlying issue? Pull the failing `records.c` into a different change? Merge the failing test as a future pending task?
Thanks!
>From fbc06ba18b298c410c601ff3aeb27391cd6d9f95 Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Sun, 31 Mar 2024 10:50:27 -0700
Subject: [PATCH 1/4] [Interp] fix: InitList for unnamed bit-fields
Previously, types like this one would be mis-initialized:
```c++
struct S {
unsigned : 12;
unsigned f : 8;
}
constexpr S s = { 1 };
```
Because, although the unnamed bit-field has an index in the Record, it
does not actually take any initializer values from an expression such
as the above.
---
clang/lib/AST/Interp/ByteCodeExprGen.cpp | 63 ++++++++++-------
clang/test/AST/Interp/records.cpp | 89 ++++++++++++++++++++++++
2 files changed, 126 insertions(+), 26 deletions(-)
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 46182809810bcf..76e4a5a30932be 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -884,8 +884,36 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
if (!this->emitDupPtr(E))
return false;
+ // guard relatively expensive base check behind an almost-always-false
+ // branch. this works because all bases must be initialized first before we
+ // initialize any direct fields
+ if (InitIndex == 0) {
+ // Initializer for a direct base class?
+ if (const Record::Base *B = R->getBase(Init->getType())) {
+ if (!this->emitGetPtrBasePop(B->Offset, Init))
+ return false;
+
+ if (!this->visitInitializer(Init))
+ return false;
+
+ if (!this->emitFinishInitPop(E))
+ return false;
+ // Base initializers don't increase InitIndex, since they don't count
+ // into the Record's fields.
+ continue;
+ }
+ } else {
+ assert(!R->getBase(Init->getType()));
+ }
+
+ // skip over padding-only fields
+ while (R->getField(InitIndex)->Decl->isUnnamedBitfield())
+ ++InitIndex;
+
+ const Record::Field *FieldToInit = R->getField(InitIndex++);
if (std::optional<PrimType> T = classify(Init)) {
- const Record::Field *FieldToInit = R->getField(InitIndex);
+ assert(classifyPrim(FieldToInit->Decl->getType()) == *T);
+
if (!this->visit(Init))
return false;
@@ -899,34 +927,17 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
if (!this->emitPopPtr(E))
return false;
- ++InitIndex;
} else {
- // Initializer for a direct base class.
- if (const Record::Base *B = R->getBase(Init->getType())) {
- if (!this->emitGetPtrBasePop(B->Offset, Init))
- return false;
-
- if (!this->visitInitializer(Init))
- return false;
-
- if (!this->emitFinishInitPop(E))
- return false;
- // Base initializers don't increase InitIndex, since they don't count
- // into the Record's fields.
- } else {
- const Record::Field *FieldToInit = R->getField(InitIndex);
- // Non-primitive case. Get a pointer to the field-to-initialize
- // on the stack and recurse into visitInitializer().
- if (!this->emitGetPtrField(FieldToInit->Offset, Init))
- return false;
+ // Non-primitive case. Get a pointer to the field-to-initialize
+ // on the stack and recurse into visitInitializer().
+ if (!this->emitGetPtrField(FieldToInit->Offset, Init))
+ return false;
- if (!this->visitInitializer(Init))
- return false;
+ if (!this->visitInitializer(Init))
+ return false;
- if (!this->emitPopPtr(E))
- return false;
- ++InitIndex;
- }
+ if (!this->emitPopPtr(E))
+ return false;
}
}
return true;
diff --git a/clang/test/AST/Interp/records.cpp b/clang/test/AST/Interp/records.cpp
index 0f76e0cfe99277..d95413757f43e0 100644
--- a/clang/test/AST/Interp/records.cpp
+++ b/clang/test/AST/Interp/records.cpp
@@ -1285,3 +1285,92 @@ namespace {
}
}
#endif
+
+#if __cplusplus >= 201703L
+namespace InitLists {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wc99-designator"
+#pragma clang diagnostic ignored "-Wc++20-extensions"
+
+ struct EmptyBase {};
+
+ struct A { char x; };
+ struct B { short y; unsigned z; };
+
+ struct S : A, B, EmptyBase {
+ constexpr bool operator==(const S& rhs) const {
+ return this->x == rhs.x && this->y == rhs.y && this->z == rhs.z;
+ };
+ };
+
+ constexpr S s0 = {};
+ static_assert(s0.x == 0 && s0.y == 0 && s0.z == 0);
+
+ static_assert(S{1} == S{A{1}, B{0, 0}}, "");
+ static_assert(S{1, 2} == S{A{1}, B{2, 0}}, "");
+ static_assert(S{1, 2, 3} == S{A{1}, B{2, 3}}, "");
+
+ static_assert(S{1, {2, 3}} == S{A{1}, B{2, 3}}, "");
+ static_assert(S{{1}, {2, 3}} == S{A{1}, B{2, 3}}, "");
+
+ struct BF : S {
+ unsigned : 12;
+ unsigned f1 : 3;
+ unsigned : 34;
+ unsigned : 34;
+ unsigned f2 : 3;
+ unsigned : 0;
+
+ bool operator==(const BF&) const = default;
+ };
+
+ static_assert(BF{} == BF{{s0}, 0, 0}, "");
+ static_assert(BF{1, 2, 3} == BF{S{A{1}, B{2, 3}, {}}, 0, 0}, "");
+
+ static_assert(BF{{}, 4} == BF{s0, .f1 = 4});
+ static_assert(BF{{}, 4, 5} == BF{s0, .f1 = 4, .f2 = 5});
+ static_assert(BF{{1, 2, 3}, 4} == BF{S{A{1}, B{2, 3}, {}}, .f1 = 4}, "");
+ static_assert(BF{1, 2, 3, {}, 4} == BF{S{A{1}, B{2, 3}, {}}, .f1 = 4}, "");
+ static_assert(BF{1, 2, 3, {}, 4, 5}
+ == BF{S{A{1}, B{2, 3}, {}}, .f1 = 4, .f2 = 5}, "");
+
+
+ struct R : BF {
+ unsigned ff : 3;
+ struct {
+ char cc[2];
+ } rr[2];
+
+ constexpr bool operator==(const R& rhs) const {
+ return static_cast<BF>(*this) == static_cast<BF>(rhs)
+ && this->ff == rhs.ff
+ && this->rr[0].cc[0] == rhs.rr[0].cc[0]
+ && this->rr[0].cc[1] == rhs.rr[0].cc[1]
+ && this->rr[1].cc[0] == rhs.rr[1].cc[0]
+ && this->rr[1].cc[1] == rhs.rr[1].cc[1]
+ ;
+ };
+ };
+
+ static_assert(R{} == R{{s0}, 0}, "");
+ static_assert(R{{}, 6} == R{{s0}, .ff = 6, .rr = {}}, "");
+ static_assert(R{{}, 6, 7} ==
+ R{{s0}, .ff = 6, .rr = {[0]={.cc = {[0]=7, [1]=0}}, [1]={.cc = {[0]=0, [1]=0}}}}, "");
+ static_assert(R{{}, 6, 7, 8} == R{{s0}, 6, {{7, 8}}}, "");
+
+ constexpr R r = {{{1, 2, 3}, 4, 5}, 6, 7, 8, 9, 10};
+ static_assert(r.x == 1, "");
+ static_assert(r.y == 2, "");
+ static_assert(r.z == 3, "");
+ static_assert(r.f1 == 4, "");
+ static_assert(r.f2 == 5, "");
+ static_assert(r.ff == 6, "");
+ static_assert(r.rr[0].cc[0] == 7, "");
+ static_assert(r.rr[0].cc[1] == 8, "");
+ static_assert(r.rr[1].cc[0] == 9, "");
+ static_assert(r.rr[1].cc[1] == 10, "");
+
+#pragma clang diagnostic pop
+} // namespace InitLists
+#endif
>From d6239b30e8dd58b4124323ae338688002d6ddf62 Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Sun, 31 Mar 2024 10:54:22 -0700
Subject: [PATCH 2/4] [Interp] fmt: remove trailing whitespace from test
---
clang/test/AST/Interp/records.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/AST/Interp/records.cpp b/clang/test/AST/Interp/records.cpp
index d95413757f43e0..89ac51620fc4cc 100644
--- a/clang/test/AST/Interp/records.cpp
+++ b/clang/test/AST/Interp/records.cpp
@@ -415,7 +415,7 @@ namespace DeriveFailures {
constexpr Derived(int i) : OtherVal(i) {} // ref-error {{never produces a constant expression}} \
// both-note {{non-constexpr constructor 'Base' cannot be used in a constant expression}} \
- // ref-note {{non-constexpr constructor 'Base' cannot be used in a constant expression}}
+ // ref-note {{non-constexpr constructor 'Base' cannot be used in a constant expression}}
};
constexpr Derived D(12); // both-error {{must be initialized by a constant expression}} \
>From 63587aee7d6bfde09fdd8e4d0699a6c485589274 Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Fri, 5 Apr 2024 08:51:30 -0700
Subject: [PATCH 3/4] [Interp][test] Add currently-failing test
Checks some of the different init expressions permitted in C-land (but
not C++).
---
clang/test/AST/Interp/records.c | 114 ++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)
create mode 100644 clang/test/AST/Interp/records.c
diff --git a/clang/test/AST/Interp/records.c b/clang/test/AST/Interp/records.c
new file mode 100644
index 00000000000000..1492fafae2eba7
--- /dev/null
+++ b/clang/test/AST/Interp/records.c
@@ -0,0 +1,114 @@
+// UNSUPPORTED: asserts
+// REQUIRES: asserts
+// ^ this attempts to say "don't actually run this test", because it's broken
+//
+// The point of this test is to demonstrate something that ExprConstant accepts,
+// but Interp rejects. I had hoped to express that as the same file with two
+// sets of RUNs: one for the classic evaluator, which would be expected to
+// succeed, and one for the new interpreter which would be expected to fail (so
+// the overall test passes just in case the new interpreter rejects something
+// that the evaluator accepts).
+//
+// Using `XFAIL ... *` with `not` on the expected-to-pass lines isn't appropriate,
+// it seems, because that will cause the test to pass when _any_ of the RUNs
+// fail.
+//
+// We could use a RUN that groups all four commands into a single shell
+// invocation that expresses the desired logical properties, possibly negating
+// and using an `XFAIL` for clarity (?), but I suspect the long-term future
+// of this test file is to get out of this situation and back into the "both
+// match" category anyway.
+//
+// RUN: %clang_cc1 -verify=ref,both -std=c99 %s
+// RUN: %clang_cc1 -verify=ref,both -std=c11 %s
+// RUN: %clang_cc1 -verify=ref,both -std=c2x %s
+//
+// RUN: %clang_cc1 -verify=expected,both -std=c99 -fexperimental-new-constant-interpreter %s
+// RUN: %clang_cc1 -verify=expected,both -std=c11 -fexperimental-new-constant-interpreter %s
+// RUN: %clang_cc1 -verify=expected,both -std=c2x -fexperimental-new-constant-interpreter %s
+
+#pragma clang diagnostic ignored "-Wgnu-folding-constant"
+#pragma clang diagnostic ignored "-Wempty-translation-unit"
+
+#if __STDC_VERSION__ >= 201112L
+#define CHECK(cond) _Static_assert((cond), "")
+#else
+#pragma clang diagnostic ignored "-Wextra-semi"
+#define CHECK(cond)
+#endif
+
+typedef struct {
+ unsigned a, b;
+ char cc[2];
+} s_t;
+
+// out-of-order designated initialization
+// array designated initialization
+const s_t s1 = { .a = 2, .b = 4, .cc[0] = 8, .cc[1] = 16 } ;
+const s_t s2 = { .b = 4, .a = 2, .cc[1] = 16, .cc[0] = 8 } ;
+
+CHECK(s1.a == s2.a && s1.b == s2.b);
+CHECK(s1.cc[0] == s2.cc[0] && s1.cc[1] == s2.cc[1]);
+
+// nested designated initialization
+const struct {
+ struct { unsigned v; } inner;
+} nested_designated = { .inner.v = 3 };
+CHECK(nested_designated.inner.v == 3);
+
+// mixing of designated initializers and regular initializers
+// both-warning at +1 {{excess elements in array initializer}}
+const s_t s3 = { {}, .b = 4, {[1]=16, 8}};
+const s_t s4 = { .b = 4, {[1]=16}};
+
+CHECK(s3.a == 0);
+CHECK(s3.b == 4);
+CHECK(s3.cc[0] == 0);
+CHECK(s3.cc[1] == 16);
+
+CHECK(s3.a == s4.a && s3.b == s4.b);
+CHECK(s3.cc[0] == s4.cc[0] && s3.cc[1] == s4.cc[1]);
+
+const unsigned fw = 2;
+typedef struct {
+ struct {
+ unsigned : 4;
+ unsigned ff : fw;
+ unsigned : 12;
+ unsigned : 12;
+ } in[2];
+
+ unsigned of : 4;
+ unsigned : 0;
+} bf_t;
+
+const bf_t bf0 = { };
+CHECK(bf0.in[0].ff == 0);
+CHECK(bf0.in[1].ff == 0);
+CHECK(bf0.of == 0);
+
+CHECK(((bf_t){{{}, {}}, {}}).of == 0);
+CHECK(((bf_t){{{}, {}}, {}}).of == 0);
+CHECK(((bf_t){{{}, {1}}, {}}).in[1].ff == 1);
+
+// out-of-order designated initialization
+// array designated initialization
+// nested designated initialization
+// mixing of designated initializers and regular initializers
+// + skipped fields (unnamed bit fields)
+const bf_t bf1 = { 1, 2, 3, };
+const bf_t bf2 = { { [1]=2, [0]={ 1 }}, 3, };
+
+CHECK(bf1.in[0].ff == 1);
+CHECK(bf1.in[1].ff == 2);
+CHECK(bf1.of == 3);
+
+CHECK(
+ bf1.in[0].ff == bf2.in[0].ff &&
+ bf1.in[1].ff == bf2.in[1].ff &&
+ bf1.of == bf2.of
+);
+
+unsigned func() {
+ return s1.a + s2.b + bf1.of + bf2.of;
+}
>From 5c70aaedb9b891d6ecbb65d22b0d6d4d498119fd Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Fri, 5 Apr 2024 08:53:19 -0700
Subject: [PATCH 4/4] [clang][Interp] Add some complex type asserts
Is it ever possible to sneak mismatched types past Sema far enough to
get to this point? These assert that it is not.
---
clang/lib/AST/Interp/ByteCodeExprGen.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 76e4a5a30932be..bf7752d78ae579 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -928,6 +928,14 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
if (!this->emitPopPtr(E))
return false;
} else {
+ assert(!Init->getType()->isRecordType() ||
+ getRecord(FieldToInit->Decl->getType()) ==
+ getRecord(Init->getType()));
+ assert(
+ !Init->getType()->isArrayType() ||
+ (FieldToInit->Decl->getType()->isArrayType() &&
+ Init->getType()->getArrayElementTypeNoTypeQual() ==
+ FieldToInit->Decl->getType()->getArrayElementTypeNoTypeQual()));
// Non-primitive case. Get a pointer to the field-to-initialize
// on the stack and recurse into visitInitializer().
if (!this->emitGetPtrField(FieldToInit->Offset, Init))
More information about the cfe-commits
mailing list