[clang] [Clang][C++20] Implement constexpr std::bit_cast for bit-fields (PR #74775)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 4 18:00:04 PST 2024


================
@@ -404,29 +691,126 @@ constexpr unsigned char identity3b = __builtin_bit_cast(unsigned char, identity3
 
 namespace test_bool {
 
-constexpr bool test_bad_bool = bit_cast<bool>('A'); // expected-error {{must be initialized by a constant expression}} expected-note{{in call}}
+// expected-note at +1 {{cannot be represented in type 'bool'}}
+constexpr bool test_bad_bool = __builtin_bit_cast(bool, 'A'); // expected-error {{must be initialized by a constant expression}}
+
+static_assert(round_trip<signed char>(true));
+static_assert(round_trip<unsigned char>(true));
+static_assert(round_trip<bool>(false) == false);
 
-static_assert(round_trip<signed char>(true), "");
-static_assert(round_trip<unsigned char>(false), "");
-static_assert(round_trip<bool>(false), "");
+static_assert(static_cast<uint8_t>(false) == 0x0);
+static_assert(bit_cast<uint8_t>(false) == 0x0);
+static_assert(static_cast<uint8_t>(true) == 0x1);
+static_assert(bit_cast<uint8_t>(true) == 0x1);
 
-static_assert(round_trip<bool>((char)0), "");
-static_assert(round_trip<bool>((char)1), "");
+static_assert(round_trip<bool, uint8_t>(0x01) == 0x1);
+static_assert(round_trip<bool, uint8_t>(0x00) == 0x0);
+// expected-note at +2 {{cannot be represented in type 'bool'}}
+// expected-error at +1 {{constant expression}}
+constexpr auto test_bad_bool2 = __builtin_bit_cast(bool, (uint8_t)0x02);
+
+#if LITTLE_END == 1
+constexpr auto okbits = bit_cast<bits<1>>(true);
+#else
+constexpr auto okbits = bit_cast<bits<1, uint8_t, 7>>(true);
+#endif
+static_assert(okbits == 0x1);
+// expected-note at +3 {{bit [1-7]}}
+// expected-note at +2 {{or 'std::byte'; 'bool' is invalid}}
+// expected-error at +1 {{constant expression}}
+constexpr auto _weird_bool = __builtin_bit_cast(bool, okbits);
+
+// these don't work because we're trying to read the whole 8 bits to ensure
+// the value is representable, as above
+// static_assert(round_trip<bool, bits<1>>({0x1}) == 0x1);
+// static_assert(round_trip<bool, bits<1>>({0x0}) == 0x0);
+
+// these work because we're only reading 1 bit of "bool" to ensure
+// "representability"
+static_assert(round_trip<bits<1, bool>, bits<1>>({0x1}) == 0x1);
+static_assert(round_trip<bits<1, bool>, bits<1>>({0x0}) == 0x0);
+
+template <const int P, class B = bool>
+constexpr bool extract_bit(unsigned char v) {
+  return static_cast<bool>(bit_cast<bits<1, B, P>>(v).bits);
 }
+// 0xA5 is a palindrome, so endianness doesn't matter
+// (counting LSB->MSB is the same as MSB->LSB)
+static_assert(extract_bit<0>(0xA5) == 0x1);
+static_assert(extract_bit<2>(0xA5) == 0x1);
+static_assert(extract_bit<5>(0xA5) == 0x1);
+static_assert(extract_bit<7>(0xA5) == 0x1);
+
+static_assert(extract_bit<1>(0xA5) == 0x0);
+static_assert(extract_bit<3>(0xA5) == 0x0);
+static_assert(extract_bit<4>(0xA5) == 0x0);
+static_assert(extract_bit<6>(0xA5) == 0x0);
+
+enum byte : unsigned char {}; // not std::byte or unsigned char
+
+static_assert(extract_bit<5, byte>('\xa5') == 0x1);
+
+struct pad {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  bool : 5; // push field down to the LSB
+#endif
+  bool b : 3;
+};
+
+static_assert(bit_cast<pad, uint8_t>(0b001).b == true);
+static_assert(bit_cast<pad, uint8_t>(0b000).b == false);
+
+// expected-note at +1 {{cannot be represented in type 'bool'}}
+constexpr auto _bad_bool3 = __builtin_bit_cast(pad, (uint8_t)0b110); // expected-error {{must be initialized by a constant expression}}
+
+struct S {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  byte : 7;
+#endif
+  byte z : 1;
+};
+
+constexpr auto s = bit_cast<S>(pad{1});
+static_assert(s.z == 0x1);
+
+// expected-note at +3 {{bit [1-2]}}
+// expected-note at +2 {{or 'std::byte'; 'bool' is invalid}}
+// expected-error at +1 {{constant expression}}
+constexpr auto _bad_bool4 = __builtin_bit_cast(pad, s);
+
+
+// `bool` includes padding bits, but *which* single bit stores the
+// value is under-specified. These tests not-so-secretly assert that
+// it's in fact the LSB that the compiler "sees" as the value.
+struct pack {
+  bool a : 1;
+  bool b : 1;
+
+  // 1 bit of value, 5 bits of padding
+  bool c : 6;
+};
+
+constexpr auto packed = bit_cast<pack, uint8_t>(LITTLE_END ? 0x07 : 0xc1);
+static_assert(packed.a && packed.b && packed.c);
----------------
sethp wrote:

Taken together this makes a very strong assertion about the ABI of a `boolean`; that seems to match the existing behavior, both in how the evaulator handled 8-bit values and the output from codegen: https://godbolt.org/z/xsjE7aG3e

Probably there's no good reason to make a different decision in the future, but it does seem worth calling out: this is far from the only place the evaluator rests on the decisions made elsewhere in clang/llvm-land, but it's (partially) a new one. 

https://github.com/llvm/llvm-project/pull/74775


More information about the cfe-commits mailing list