[clang] [Clang][C++20] Implement constexpr std::bit_cast for bit-fields (PR #74775)

Thu Jan 4 17:19:12 PST 2024

https://github.com/sethp updated https://github.com/llvm/llvm-project/pull/74775

>From f281d34a51f662c934f158e4770774b0dc3588a2 Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Thu, 7 Dec 2023 08:45:51 -0800
Subject: [PATCH 1/3] [Clang][Sema] Print more static_assert exprs

This change introspects more values involved in a static_assert, and
extends the supported set of operators for introspection to include
binary operator method calls.

It's intended to address the use-case where a small static_assert helper
looks something like this (via `constexpr-builtin-bit-cast.cpp`):

```c++
struct int_splicer {
  unsigned x;
  unsigned y;

  constexpr bool operator==(const int_splicer &other) const {
    return other.x == x && other.y == y;
  }
};
```

When used like so:

```c++
constexpr int_splicer got{1, 2};
constexpr int_splicer want{3, 4};
static_assert(got == want);
```

Then we'd expect to get the error:

```
Static assertion failed due to requirement 'got == want'
```

And this change adds the helpful note:

```
Expression evaluates to '{1, 2} == {3, 4}'
```
---
 clang/lib/Sema/SemaDeclCXX.cpp                | 31 ++++++++++++++-----
 .../CXX/class/class.compare/class.eq/p3.cpp   | 20 ++++++------
 .../CXX/class/class.compare/class.rel/p2.cpp  | 10 +++---
 .../over.match.oper/p9-2a.cpp                 |  2 +-
 clang/test/SemaCXX/static-assert-cxx17.cpp    |  2 +-
 5 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index c6218a491aecec..e3d46c3140741b 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17219,6 +17219,13 @@ static bool ConvertAPValueToString(const APValue &V, QualType T,
     OS << "i)";
   } break;
 
+  case APValue::ValueKind::Array:
+  case APValue::ValueKind::Vector:
+  case APValue::ValueKind::Struct: {
+    llvm::raw_svector_ostream OS(Str);
+    V.printPretty(OS, Context, T);
+  } break;
+
   default:
     return false;
   }
@@ -17256,11 +17263,10 @@ static bool UsefulToPrintExpr(const Expr *E) {
 /// Try to print more useful information about a failed static_assert
 /// with expression \E
 void Sema::DiagnoseStaticAssertDetails(const Expr *E) {
-  if (const auto *Op = dyn_cast<BinaryOperator>(E);
-      Op && Op->getOpcode() != BO_LOr) {
-    const Expr *LHS = Op->getLHS()->IgnoreParenImpCasts();
-    const Expr *RHS = Op->getRHS()->IgnoreParenImpCasts();
-
+  const auto Diagnose = [&](const Expr *LHS, const Expr *RHS,
+                            const llvm::StringRef &OpStr) {
+    LHS = LHS->IgnoreParenImpCasts();
+    RHS = RHS->IgnoreParenImpCasts();
     // Ignore comparisons of boolean expressions with a boolean literal.
     if ((isa<CXXBoolLiteralExpr>(LHS) && RHS->getType()->isBooleanType()) ||
         (isa<CXXBoolLiteralExpr>(RHS) && LHS->getType()->isBooleanType()))
@@ -17287,10 +17293,19 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) {
                                  DiagSide[I].ValueString, Context);
     }
     if (DiagSide[0].Print && DiagSide[1].Print) {
-      Diag(Op->getExprLoc(), diag::note_expr_evaluates_to)
-          << DiagSide[0].ValueString << Op->getOpcodeStr()
-          << DiagSide[1].ValueString << Op->getSourceRange();
+      Diag(E->getExprLoc(), diag::note_expr_evaluates_to)
+          << DiagSide[0].ValueString << OpStr << DiagSide[1].ValueString
+          << E->getSourceRange();
     }
+  };
+
+  if (const auto *Op = dyn_cast<BinaryOperator>(E);
+      Op && Op->getOpcode() != BO_LOr) {
+    Diagnose(Op->getLHS(), Op->getRHS(), Op->getOpcodeStr());
+  } else if (const auto *Op = dyn_cast<CXXOperatorCallExpr>(E);
+             Op && Op->isInfixBinaryOp()) {
+    Diagnose(Op->getArg(0), Op->getArg(1),
+             getOperatorSpelling(Op->getOperator()));
   }
 }
 
diff --git a/clang/test/CXX/class/class.compare/class.eq/p3.cpp b/clang/test/CXX/class/class.compare/class.eq/p3.cpp
index 04db022fe73021..53c4dda133301b 100644
--- a/clang/test/CXX/class/class.compare/class.eq/p3.cpp
+++ b/clang/test/CXX/class/class.compare/class.eq/p3.cpp
@@ -6,11 +6,11 @@ struct A {
 };
 
 static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 3, 4, 5});
-static_assert(A{1, 2, 3, 4, 5} == A{0, 2, 3, 4, 5}); // expected-error {{failed}}
-static_assert(A{1, 2, 3, 4, 5} == A{1, 0, 3, 4, 5}); // expected-error {{failed}}
-static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 0, 4, 5}); // expected-error {{failed}}
-static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 3, 0, 5}); // expected-error {{failed}}
-static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 3, 4, 0}); // expected-error {{failed}}
+static_assert(A{1, 2, 3, 4, 5} == A{0, 2, 3, 4, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(A{1, 2, 3, 4, 5} == A{1, 0, 3, 4, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 0, 4, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 3, 0, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(A{1, 2, 3, 4, 5} == A{1, 2, 3, 4, 0}); // expected-error {{failed}} expected-note {{evaluates to}}
 
 struct B {
   int a, b[3], c;
@@ -18,8 +18,8 @@ struct B {
 };
 
 static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 3, 4, 5});
-static_assert(B{1, 2, 3, 4, 5} == B{0, 2, 3, 4, 5}); // expected-error {{failed}}
-static_assert(B{1, 2, 3, 4, 5} == B{1, 0, 3, 4, 5}); // expected-error {{failed}}
-static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 0, 4, 5}); // expected-error {{failed}}
-static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 3, 0, 5}); // expected-error {{failed}}
-static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 3, 4, 0}); // expected-error {{failed}}
+static_assert(B{1, 2, 3, 4, 5} == B{0, 2, 3, 4, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(B{1, 2, 3, 4, 5} == B{1, 0, 3, 4, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 0, 4, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 3, 0, 5}); // expected-error {{failed}} expected-note {{evaluates to}}
+static_assert(B{1, 2, 3, 4, 5} == B{1, 2, 3, 4, 0}); // expected-error {{failed}} expected-note {{evaluates to}}
diff --git a/clang/test/CXX/class/class.compare/class.rel/p2.cpp b/clang/test/CXX/class/class.compare/class.rel/p2.cpp
index 90115284d2bd02..07501c6a081841 100644
--- a/clang/test/CXX/class/class.compare/class.rel/p2.cpp
+++ b/clang/test/CXX/class/class.compare/class.rel/p2.cpp
@@ -10,15 +10,15 @@ namespace Rel {
     friend bool operator>=(const A&, const A&) = default;
   };
   static_assert(A{0} < A{1});
-  static_assert(A{1} < A{1}); // expected-error {{failed}}
+  static_assert(A{1} < A{1}); // expected-error {{failed}} expected-note {{'{1} < {1}'}}
   static_assert(A{0} <= A{1});
   static_assert(A{1} <= A{1});
-  static_assert(A{2} <= A{1}); // expected-error {{failed}}
+  static_assert(A{2} <= A{1}); // expected-error {{failed}} expected-note {{'{2} <= {1}'}}
   static_assert(A{1} > A{0});
-  static_assert(A{1} > A{1}); // expected-error {{failed}}
+  static_assert(A{1} > A{1}); // expected-error {{failed}} expected-note {{'{1} > {1}'}}
   static_assert(A{1} >= A{0});
   static_assert(A{1} >= A{1});
-  static_assert(A{1} >= A{2}); // expected-error {{failed}}
+  static_assert(A{1} >= A{2}); // expected-error {{failed}} expected-note {{'{1} >= {2}'}}
 
   struct B {
     bool operator<=>(B) const = delete; // expected-note 4{{deleted here}} expected-note-re 8{{candidate {{.*}} deleted}}
@@ -49,7 +49,7 @@ namespace NotEqual {
     friend bool operator!=(const A&, const A&) = default;
   };
   static_assert(A{1} != A{2});
-  static_assert(A{1} != A{1}); // expected-error {{failed}}
+  static_assert(A{1} != A{1}); // expected-error {{failed}} expected-note {{'{1} != {1}'}}
 
   struct B {
     bool operator==(B) const = delete; // expected-note {{deleted here}} expected-note-re 2{{candidate {{.*}} deleted}}
diff --git a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p9-2a.cpp b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p9-2a.cpp
index 95d6a55aee66a1..8f31e8947a768c 100644
--- a/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p9-2a.cpp
+++ b/clang/test/CXX/over/over.match/over.match.funcs/over.match.oper/p9-2a.cpp
@@ -33,7 +33,7 @@ struct Y {};
 constexpr bool operator==(X x, Y) { return x.equal; }
 
 static_assert(X{true} == Y{});
-static_assert(X{false} == Y{}); // expected-error {{failed}}
+static_assert(X{false} == Y{}); // expected-error {{failed}} expected-note{{'{false} == {}'}}
 
 // x == y -> y == x
 static_assert(Y{} == X{true});
diff --git a/clang/test/SemaCXX/static-assert-cxx17.cpp b/clang/test/SemaCXX/static-assert-cxx17.cpp
index 41a7b025d0eb75..1d78915aa13e18 100644
--- a/clang/test/SemaCXX/static-assert-cxx17.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx17.cpp
@@ -94,7 +94,7 @@ void foo6() {
   // expected-error at -1{{static assertion failed due to requirement '(const X<int> *)nullptr'}}
   static_assert(static_cast<const X<typename T::T> *>(nullptr));
   // expected-error at -1{{static assertion failed due to requirement 'static_cast<const X<int> *>(nullptr)'}}
-  static_assert((const X<typename T::T>[]){} == nullptr);
+  static_assert((const X<typename T::T>[]){} == nullptr); // expected-note{{expression evaluates to '{} == nullptr'}}
   // expected-error at -1{{static assertion failed due to requirement '(const X<int>[0]){} == nullptr'}}
   static_assert(sizeof(X<decltype(X<typename T::T>().X<typename T::T>::~X())>) == 0);
   // expected-error at -1{{static assertion failed due to requirement 'sizeof(X<void>) == 0'}} \

>From f9785e47a5f9954690d8a30a4296cc833d5185b9 Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Thu, 7 Dec 2023 09:29:13 -0800
Subject: [PATCH 2/3] [Clang][C++20] Implement constexpr std::bit_cast for
 bit-fields

After this commit, clang permits constructions like:
```c++
struct bits {
    unsigned char : 7;
    unsigned char flag : 1;
}

static_assert(std::bit_cast<bits>(0x80).flag); // succeeds on little-endian systems
```

This change builds on the prior work in https://reviews.llvm.org/D62825
---
 .../include/clang/Basic/DiagnosticASTKinds.td |   8 +-
 clang/lib/AST/ExprConstant.cpp                | 403 +++++++++++++++---
 .../SemaCXX/constexpr-builtin-bit-cast.cpp    | 384 ++++++++++++++---
 .../SemaTemplate/temp_arg_nontype_cxx20.cpp   |  12 +-
 4 files changed, 690 insertions(+), 117 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index c81d17ed641084..7020f70f7c1b07 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -316,10 +316,14 @@ def note_constexpr_memcpy_unsupported : Note<
   "size to copy (%4) is not a multiple of size of element type %3 (%5)|"
   "source is not a contiguous array of at least %4 elements of type %3|"
   "destination is not a contiguous array of at least %4 elements of type %3}2">;
+def note_constexpr_bit_cast_bad_bits : Note<
+  "bit_cast source expression (type %5) does not produce a constant value for "
+  "%select{bit|byte}0 [%1] (of {%2%plural{0:|:..0}2}) which are required by "
+  "target type %4 %select{|(subobject %3)}6">;
 def note_constexpr_bit_cast_unsupported_type : Note<
   "constexpr bit cast involving type %0 is not yet supported">;
-def note_constexpr_bit_cast_unsupported_bitfield : Note<
-  "constexpr bit_cast involving bit-field is not yet supported">;
+def note_constexpr_bit_cast_invalid_decl : Note<
+  "bit_cast here %select{from|to}0 invalid declaration %0">;
 def note_constexpr_bit_cast_invalid_type : Note<
   "bit_cast %select{from|to}0 a %select{|type with a }1"
   "%select{union|pointer|member pointer|volatile|reference}2 "
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 986302e1fd225f..356cef552d3544 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -38,7 +38,6 @@
 #include "Interp/State.h"
 #include "clang/AST/APValue.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/CXXInheritance.h"
@@ -49,19 +48,33 @@
 #include "clang/AST/OptionalDiagnostic.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Builtins.h"
-#include "clang/Basic/DiagnosticSema.h"
+#include "clang/Basic/DiagnosticAST.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APFixedPoint.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/SwapByteOrder.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
 #include <cstring>
 #include <functional>
+#include <iomanip>
+#include <iterator>
 #include <optional>
 
 #define DEBUG_TYPE "exprconstant"
@@ -6901,51 +6914,113 @@ bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) {
 //===----------------------------------------------------------------------===//
 namespace {
 
-class BitCastBuffer {
-  // FIXME: We're going to need bit-level granularity when we support
-  // bit-fields.
+struct BitCastBuffer {
   // FIXME: Its possible under the C++ standard for 'char' to not be 8 bits, but
   // we don't support a host or target where that is the case. Still, we should
   // use a more generic type in case we ever do.
-  SmallVector<std::optional<unsigned char>, 32> Bytes;
-
-  static_assert(std::numeric_limits<unsigned char>::digits >= 8,
+  using byte_t = unsigned char;
+  static_assert(std::numeric_limits<byte_t>::digits >= 8,
                 "Need at least 8 bit unsigned char");
 
+  SmallVector<byte_t, 32> Bytes;
+  SmallVector<byte_t, 32> Valid;
+
   bool TargetIsLittleEndian;
 
-public:
+  static SmallVector<byte_t> MaskAllSet(size_t Width) {
+    SmallVector<byte_t> M;
+    M.resize(Width);
+    std::fill(M.begin(), M.end(), ~0);
+    return M;
+  }
+
   BitCastBuffer(CharUnits Width, bool TargetIsLittleEndian)
-      : Bytes(Width.getQuantity()),
+      : Bytes(Width.getQuantity()), Valid(Width.getQuantity()),
         TargetIsLittleEndian(TargetIsLittleEndian) {}
 
   [[nodiscard]] bool readObject(CharUnits Offset, CharUnits Width,
-                                SmallVectorImpl<unsigned char> &Output) const {
-    for (CharUnits I = Offset, E = Offset + Width; I != E; ++I) {
-      // If a byte of an integer is uninitialized, then the whole integer is
-      // uninitialized.
-      if (!Bytes[I.getQuantity()])
+                                SmallVectorImpl<byte_t> &Output,
+                                SmallVectorImpl<byte_t> const &Mask) const {
+    assert(Mask.size() >= static_cast<unsigned>(Width.getQuantity()));
+    assert(Output.size() >= static_cast<unsigned>(Width.getQuantity()));
+    assert(Bytes.size() >=
+           static_cast<unsigned>((Offset + Width).getQuantity()));
+
+    SmallVector<byte_t, 8> RevMask;
+    const SmallVectorImpl<byte_t> &M =
+        (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
+        ? [&]() -> const SmallVectorImpl<byte_t> & {
+      auto W = Width.getQuantity();
+      RevMask.resize_for_overwrite(W);
+      std::reverse_copy(Mask.begin(), Mask.begin() + W, RevMask.begin());
+      return RevMask;
+    }()
+        : Mask;
+
+    size_t Index = 0;
+    for (CharUnits I = Offset, E = Offset + Width; I != E; ++I, ++Index) {
+      const auto BufIdx = I.getQuantity();
+      const auto mask = M[Index];
+      // are there any bits in Mask[Index] that are not set in
+      // Valid[BufIdx]? (NB: more bits can be set, that's just
+      // fine)
+      if ((Valid[BufIdx] & M[Index]) != M[Index])
+        // If any bit of an integer is uninitialized, then the
+        // whole integer is uninitialized.
         return false;
-      Output.push_back(*Bytes[I.getQuantity()]);
+
+      Output[Index] = (Output[Index] & ~mask) | (Bytes[BufIdx] & mask);
     }
+
     if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
       std::reverse(Output.begin(), Output.end());
     return true;
   }
 
-  void writeObject(CharUnits Offset, SmallVectorImpl<unsigned char> &Input) {
-    if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
+  void writeObject(CharUnits Offset, SmallVectorImpl<byte_t> &Input,
+                   SmallVectorImpl<byte_t> &Mask) {
+    assert(Mask.size() >= Input.size());
+    assert(Bytes.size() >=
+           static_cast<unsigned>(Offset.getQuantity()) + Input.size());
+
+    // we could promise Input and Mask were `const`, except for this
+    if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian) {
       std::reverse(Input.begin(), Input.end());
+      // we might (will) have more mask bits than input bits
+      std::reverse(Mask.begin(), Mask.begin() + Input.size());
+    }
 
     size_t Index = 0;
-    for (unsigned char Byte : Input) {
-      assert(!Bytes[Offset.getQuantity() + Index] && "overwriting a byte?");
-      Bytes[Offset.getQuantity() + Index] = Byte;
+    size_t BufIdx = Offset.getQuantity();
+    for (byte_t &Byte : Input) {
+      assert((Valid[BufIdx] & Mask[Index]) == 0 && "overwriting data?");
+      Bytes[BufIdx] |= Byte & Mask[Index];
+      Valid[BufIdx] |= Mask[Index];
+      ++BufIdx;
       ++Index;
     }
   }
 
   size_t size() { return Bytes.size(); }
+
+  LLVM_DUMP_METHOD void dump() {
+    auto pp = [](std::stringstream &SS, llvm::SmallVectorImpl<byte_t> &V) {
+      bool first = true;
+      for (byte_t v : V) {
+        if (first)
+          first = false;
+        else
+          SS << " ";
+        SS << "0x" << std::hex << std::setw(2) << std::setfill('0')
+           << static_cast<unsigned>(v);
+      }
+    };
+    std::stringstream SS[2];
+    pp(SS[0], Bytes);
+    pp(SS[1], Valid);
+    llvm::dbgs() << "BitCastBuffer{Bytes: [" << SS[0].str() << "], Valid: ["
+                 << SS[1].str() << "]}\n";
+  }
 };
 
 /// Traverse an APValue to produce an BitCastBuffer, emulating how the current
@@ -6973,7 +7048,7 @@ class APValueToBufferConverter {
     if (Ty->isNullPtrType())
       return true;
 
-    // Dig through Src to find the byte at SrcOffset.
+    // Dig through Val to find the byte at Offset.
     switch (Val.getKind()) {
     case APValue::Indeterminate:
     case APValue::None:
@@ -7012,6 +7087,9 @@ class APValueToBufferConverter {
 
   bool visitRecord(const APValue &Val, QualType Ty, CharUnits Offset) {
     const RecordDecl *RD = Ty->getAsRecordDecl();
+    if (RD->isInvalidDecl()) {
+      return invalidDecl(Ty);
+    }
     const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD);
 
     // Visit the base classes.
@@ -7028,12 +7106,11 @@ class APValueToBufferConverter {
 
     // Visit the fields.
     unsigned FieldIdx = 0;
-    for (FieldDecl *FD : RD->fields()) {
-      if (FD->isBitField()) {
-        Info.FFDiag(BCE->getBeginLoc(),
-                    diag::note_constexpr_bit_cast_unsupported_bitfield);
-        return false;
-      }
+    for (auto I = RD->field_begin(), E = RD->field_end(); I != E;
+         I++, FieldIdx++) {
+      FieldDecl *FD = *I;
+      if (FD->isBitField())
+        continue; // see below
 
       uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx);
 
@@ -7044,7 +7121,72 @@ class APValueToBufferConverter {
       QualType FieldTy = FD->getType();
       if (!visit(Val.getStructField(FieldIdx), FieldTy, FieldOffset))
         return false;
-      ++FieldIdx;
+    }
+
+    // Handle bit-fields
+    FieldIdx = 0;
+    for (auto I = RD->field_begin(), E = RD->field_end(); I != E;
+         I++, FieldIdx++) {
+      FieldDecl *FD = *I;
+      if (!FD->isBitField())
+        continue;
+
+      // unnamed bit fields are purely padding
+      if (FD->isUnnamedBitfield())
+        continue;
+
+      auto FieldVal = Val.getStructField(FieldIdx);
+      if (!FieldVal.hasValue())
+        continue;
+
+      uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx);
+      CharUnits BufOffset = Offset;
+      uint64_t BitOffset = FieldOffsetBits;
+
+      unsigned int BitWidth = FD->getBitWidthValue(Info.Ctx);
+
+      CharUnits TypeWidth = Info.Ctx.getTypeSizeInChars(FD->getType());
+      uint64_t TypeWidthBits = Info.Ctx.toBits(TypeWidth);
+      if (BitWidth > TypeWidthBits) {
+        // e.g. `unsigned uint8_t c : 12`
+        // we truncate to CHAR_BIT * sizeof(T)
+        // (the extra bits are padding)
+        BitWidth = TypeWidthBits;
+      }
+      if (FieldOffsetBits >= TypeWidthBits) {
+        // e.g. `uint32_t : 33; uint32_t i : 12`
+        // or `uint16_t : 16; unsigned uint16_t i : 12`
+        BufOffset =
+            BufOffset + CharUnits::fromQuantity(BitOffset / TypeWidthBits) *
+                            TypeWidth.getQuantity();
+        BitOffset %= TypeWidthBits;
+      }
+
+      if (Info.Ctx.getTargetInfo().isBigEndian()) {
+        // big endian bits count from MSB to LSB
+        // so a bit-field of width 16 and size 12 will occupy bits [0-11] on a
+        // little endian machine, but [3-15] on a big endian machine
+        BitOffset = TypeWidthBits - (BitOffset + BitWidth);
+      }
+
+      assert(TypeWidth >= Info.Ctx.toCharUnitsFromBits(BitWidth));
+
+      llvm::SmallBitVector MaskBits(Info.Ctx.toBits(TypeWidth));
+      MaskBits.set(BitOffset, BitOffset + BitWidth);
+      uintptr_t Store;
+      ArrayRef<uintptr_t> Ref = MaskBits.getData(Store);
+      SmallVector<uint8_t, 8> Mask(Ref.size() * sizeof(uintptr_t));
+      std::memcpy(Mask.data(), Ref.data(), Mask.size());
+      Mask.truncate(TypeWidth.getQuantity());
+
+      SmallVector<uint8_t, 8> Bytes(TypeWidth.getQuantity());
+
+      APSInt Val = FieldVal.getInt() << BitOffset;
+      assert(Val.getBitWidth() >= BitOffset + BitWidth &&
+             "lost data in APInt -> byte buffer conversion");
+
+      llvm::StoreIntToMemory(Val, &*Bytes.begin(), TypeWidth.getQuantity());
+      Buffer.writeObject(BufOffset, Bytes, Mask);
     }
 
     return true;
@@ -7129,8 +7271,9 @@ class APValueToBufferConverter {
       }
 
       SmallVector<uint8_t, 8> Bytes(NElts / 8);
+      auto Mask = BitCastBuffer::MaskAllSet(Bytes.size());
       llvm::StoreIntToMemory(Res, &*Bytes.begin(), NElts / 8);
-      Buffer.writeObject(Offset, Bytes);
+      Buffer.writeObject(Offset, Bytes, Mask);
     } else {
       // Iterate over each of the elements and write them out to the buffer at
       // the appropriate offset.
@@ -7153,8 +7296,9 @@ class APValueToBufferConverter {
     }
 
     SmallVector<uint8_t, 8> Bytes(Width / 8);
+    auto Mask = BitCastBuffer::MaskAllSet(Bytes.size());
     llvm::StoreIntToMemory(AdjustedVal, &*Bytes.begin(), Width / 8);
-    Buffer.writeObject(Offset, Bytes);
+    Buffer.writeObject(Offset, Bytes, Mask);
     return true;
   }
 
@@ -7163,6 +7307,12 @@ class APValueToBufferConverter {
     return visitInt(AsInt, Ty, Offset);
   }
 
+  bool invalidDecl(QualType Ty) {
+    Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_invalid_decl)
+        << /* checking dest */ false << Ty;
+    return false;
+  }
+
 public:
   static std::optional<BitCastBuffer>
   convert(EvalInfo &Info, const APValue &Src, const CastExpr *BCE) {
@@ -7194,6 +7344,12 @@ class BufferToAPValueConverter {
     return std::nullopt;
   }
 
+  std::nullopt_t invalidDecl(QualType Ty) {
+    Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_invalid_decl)
+        << /* checking dest */ true << Ty;
+    return std::nullopt;
+  }
+
   std::nullopt_t unrepresentableValue(QualType Ty, const APSInt &Val) {
     Info.FFDiag(BCE->getBeginLoc(),
                 diag::note_constexpr_bit_cast_unrepresentable_value)
@@ -7201,6 +7357,75 @@ class BufferToAPValueConverter {
     return std::nullopt;
   }
 
+  std::nullopt_t badBits(QualType Ty, CharUnits Offset,
+                         SmallVectorImpl<BitCastBuffer::byte_t> &M) {
+    Info.FFDiag(BCE->getExprLoc(), diag::note_constexpr_bit_cast_indet_dest, 1)
+        << Ty << Info.Ctx.getLangOpts().CharIsSigned;
+    uint64_t BitWidth = Info.Ctx.getTypeSize(BCE->getType());
+    uint64_t ByteWidth = Info.Ctx.toCharUnitsFromBits(BitWidth).getQuantity();
+    assert(ByteWidth == Buffer.Valid.size_in_bytes());
+
+    APInt Valid(BitWidth, 0);
+    llvm::LoadIntFromMemory(Valid, Buffer.Valid.begin(), ByteWidth);
+    APInt Mask(BitWidth, 0);
+    llvm::LoadIntFromMemory(Mask, M.begin(), M.size_in_bytes());
+
+    Mask = Mask.zext(Valid.getBitWidth());
+    Mask <<= Info.Ctx.toBits(Offset);
+
+    auto ByteAligned = true;
+
+    APInt Missing = (~Valid & Mask);
+    assert(!Missing.isZero() && "bad bits called with no bad bits?");
+    llvm::SmallVector<std::pair<size_t, size_t>> MissingBitRanges;
+    int NextBit = 0;
+    while (!Missing.isZero()) {
+      APInt Last(Missing);
+      int N = Missing.countr_zero();
+
+      Missing.lshrInPlace(N);
+      auto M = Missing.countr_one();
+
+      MissingBitRanges.push_back({NextBit + N, NextBit + N + M});
+
+      Missing.lshrInPlace(M);
+      NextBit += N;
+      NextBit += M;
+      ByteAligned &= N % Info.Ctx.getCharWidth() == 0;
+      ByteAligned &= M % Info.Ctx.getCharWidth() == 0;
+    }
+
+    llvm::SmallString<32> RangesStr;
+    llvm::raw_svector_ostream OS(RangesStr);
+    bool First = true;
+    for (auto [Start, End] : MissingBitRanges) {
+      if (!First)
+        OS << " ";
+      else
+        First = false;
+      if (ByteAligned) {
+        Start /= Info.Ctx.getCharWidth();
+        End /= Info.Ctx.getCharWidth();
+      }
+      size_t Len = End - Start;
+      if (Len > 1) {
+        OS << Start << "-" << End - 1;
+      } else {
+        OS << Start;
+      }
+    }
+
+    assert(RangesStr.size() > 0);
+    auto LastIdx = (ByteAligned ? ByteWidth : BitWidth) - 1;
+    bool IsForSubobject =
+        BCE->getType().getCanonicalType() != Ty.getCanonicalType();
+    Info.Note(BCE->getSubExpr()->getExprLoc(),
+              diag::note_constexpr_bit_cast_bad_bits)
+        << ByteAligned << RangesStr << LastIdx << Ty << BCE->getType()
+        << BCE->getSubExpr()->getType() << IsForSubobject;
+    return std::nullopt;
+  }
+
   std::optional<APValue> visit(const BuiltinType *T, CharUnits Offset,
                                const EnumType *EnumSugar = nullptr) {
     if (T->isNullPtrType()) {
@@ -7225,8 +7450,10 @@ class BufferToAPValueConverter {
         SizeOf = NumBytes;
     }
 
-    SmallVector<uint8_t, 8> Bytes;
-    if (!Buffer.readObject(Offset, SizeOf, Bytes)) {
+    SmallVector<uint8_t, 8> Bytes,
+        Mask = BitCastBuffer::MaskAllSet(SizeOf.getQuantity());
+    Bytes.resize_for_overwrite(SizeOf.getQuantity());
+    if (!Buffer.readObject(Offset, SizeOf, Bytes, Mask)) {
       // If this is std::byte or unsigned char, then its okay to store an
       // indeterminate value.
       bool IsStdByte = EnumSugar && EnumSugar->isStdByteType();
@@ -7235,10 +7462,7 @@ class BufferToAPValueConverter {
                          T->isSpecificBuiltinType(BuiltinType::Char_U));
       if (!IsStdByte && !IsUChar) {
         QualType DisplayType(EnumSugar ? (const Type *)EnumSugar : T, 0);
-        Info.FFDiag(BCE->getExprLoc(),
-                    diag::note_constexpr_bit_cast_indet_dest)
-            << DisplayType << Info.Ctx.getLangOpts().CharIsSigned;
-        return std::nullopt;
+        return badBits(DisplayType, Offset, Mask);
       }
 
       return APValue::IndeterminateValue();
@@ -7272,6 +7496,9 @@ class BufferToAPValueConverter {
 
   std::optional<APValue> visit(const RecordType *RTy, CharUnits Offset) {
     const RecordDecl *RD = RTy->getAsRecordDecl();
+    if (RD->isInvalidDecl()) {
+      return invalidDecl(QualType(RD->getTypeForDecl(), 0));
+    }
     const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD);
 
     unsigned NumBases = 0;
@@ -7300,14 +7527,11 @@ class BufferToAPValueConverter {
 
     // Visit the fields.
     unsigned FieldIdx = 0;
-    for (FieldDecl *FD : RD->fields()) {
-      // FIXME: We don't currently support bit-fields. A lot of the logic for
-      // this is in CodeGen, so we need to factor it around.
-      if (FD->isBitField()) {
-        Info.FFDiag(BCE->getBeginLoc(),
-                    diag::note_constexpr_bit_cast_unsupported_bitfield);
-        return std::nullopt;
-      }
+    for (auto I = RD->field_begin(), E = RD->field_end(); I != E;
+         I++, FieldIdx++) {
+      FieldDecl *FD = *I;
+      if (FD->isBitField())
+        continue; // see below
 
       uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx);
       assert(FieldOffsetBits % Info.Ctx.getCharWidth() == 0);
@@ -7320,7 +7544,86 @@ class BufferToAPValueConverter {
       if (!SubObj)
         return std::nullopt;
       ResultVal.getStructField(FieldIdx) = *SubObj;
-      ++FieldIdx;
+    }
+
+    // Handle bit-fields
+    FieldIdx = 0;
+    for (auto I = RD->field_begin(), E = RD->field_end(); I != E;
+         I++, FieldIdx++) {
+      FieldDecl *FD = *I;
+      if (!FD->isBitField())
+        continue;
+
+      // unnamed bit fields are purely padding
+      if (FD->isUnnamedBitfield())
+        continue;
+
+      uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx);
+      CharUnits BufOffset = Offset;
+      uint64_t BitOffset = FieldOffsetBits;
+
+      unsigned int BitWidth = FD->getBitWidthValue(Info.Ctx);
+
+      CharUnits TypeWidth = Info.Ctx.getTypeSizeInChars(FD->getType());
+      uint64_t TypeWidthBits = Info.Ctx.toBits(TypeWidth);
+      if (BitWidth > TypeWidthBits) {
+        // e.g. `unsigned uint8_t c : 12`
+        // we truncate to CHAR_BIT * sizeof(T)
+        // (the extra bits are padding)
+        BitWidth = TypeWidthBits;
+      }
+      if (FieldOffsetBits >= TypeWidthBits) {
+        // e.g. `uint32_t : 33; uint32_t i : 12`
+        // or `uint16_t : 16; unsigned uint16_t i : 12`
+        BufOffset =
+            BufOffset + CharUnits::fromQuantity(BitOffset / TypeWidthBits) *
+                            TypeWidth.getQuantity();
+        BitOffset %= TypeWidthBits;
+      }
+
+      if (Info.Ctx.getTargetInfo().isBigEndian()) {
+        // big endian bits count from MSB to LSB
+        // so a bit-field of width 16 and size 12 will occupy bits [0-11] on a
+        // little endian machine, but [3-15] on a big endian machine
+        BitOffset = TypeWidthBits - (BitOffset + BitWidth);
+      }
+
+      assert(TypeWidth >= Info.Ctx.toCharUnitsFromBits(BitWidth));
+
+      llvm::SmallBitVector MaskBits(Info.Ctx.toBits(TypeWidth));
+      MaskBits.set(BitOffset, BitOffset + BitWidth);
+      uintptr_t Store;
+      ArrayRef<uintptr_t> BitRef = MaskBits.getData(Store);
+      SmallVector<uint8_t, 8> Mask(BitRef.size() * sizeof(uintptr_t));
+      std::memcpy(Mask.data(), BitRef.data(), Mask.size());
+      Mask.truncate(TypeWidth.getQuantity());
+
+      SmallVector<uint8_t, 8> Bytes(TypeWidth.getQuantity());
+      if (!Buffer.readObject(BufOffset, TypeWidth, Bytes, Mask)) {
+        const Type *T = FD->getType().getCanonicalType().getTypePtr();
+        const EnumType *EnumSugar = dyn_cast<EnumType>(T);
+        // If this is std::byte or unsigned char, then its okay to store an
+        // indeterminate value.
+        bool IsStdByte = EnumSugar && EnumSugar->isStdByteType();
+        bool IsUChar =
+            !EnumSugar && (T->isSpecificBuiltinType(BuiltinType::UChar) ||
+                           T->isSpecificBuiltinType(BuiltinType::Char_U));
+        if (!IsStdByte && !IsUChar) {
+          QualType DisplayType(EnumSugar ? (const Type *)EnumSugar : T, 0);
+          return badBits(DisplayType, BufOffset, Mask);
+        }
+        ResultVal.getStructField(FieldIdx) = APValue::IndeterminateValue();
+      } else {
+        APSInt Val(Info.Ctx.toBits(TypeWidth), true);
+        llvm::LoadIntFromMemory(Val, &*Bytes.begin(), TypeWidth.getQuantity());
+
+        Val >>= BitOffset;
+        Val = Val.trunc(BitWidth);
+        Val.setIsSigned(FD->getType()->isSignedIntegerOrEnumerationType());
+        Val = Val.extend(Info.Ctx.toBits(TypeWidth));
+
+        ResultVal.getStructField(FieldIdx) = APValue(Val);
+      }
     }
 
     return ResultVal;
@@ -7394,9 +7697,11 @@ class BufferToAPValueConverter {
       // actually need to be accessed.
       bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
 
-      SmallVector<uint8_t, 8> Bytes;
-      Bytes.reserve(NElts / 8);
-      if (!Buffer.readObject(Offset, CharUnits::fromQuantity(NElts / 8), Bytes))
+      size_t Width = NElts / 8;
+      SmallVector<uint8_t, 8> Bytes, Mask = BitCastBuffer::MaskAllSet(Width);
+      Bytes.resize_for_overwrite(Width);
+      if (!Buffer.readObject(Offset, CharUnits::fromQuantity(Width), Bytes,
+                             Mask))
         return std::nullopt;
 
       APSInt SValInt(NElts, true);
diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
index c5b8032f40b131..29d046e2def3db 100644
--- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
+++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
@@ -23,28 +23,24 @@ static_assert(sizeof(long long) == 8);
 template <class To, class From>
 constexpr To bit_cast(const From &from) {
   static_assert(sizeof(To) == sizeof(From));
-  // expected-note at +9 {{cannot be represented in type 'bool'}}
-#ifdef __x86_64
-  // expected-note at +7 {{or 'std::byte'; '__int128' is invalid}}
-#endif
-#ifdef __CHAR_UNSIGNED__
-  // expected-note at +4 2 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'signed char' is invalid}}
-#else
-  // expected-note at +2 2 {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'signed char' is invalid}}
-#endif
   return __builtin_bit_cast(To, from);
 }
 
 template <class Intermediate, class Init>
-constexpr bool round_trip(const Init &init) {
+constexpr bool check_round_trip(const Init &init) {
   return bit_cast<Init>(bit_cast<Intermediate>(init)) == init;
 }
 
+template <class Intermediate, class Init>
+constexpr Init round_trip(const Init &init) {
+  return bit_cast<Init>(bit_cast<Intermediate>(init));
+}
+
 void test_int() {
-  static_assert(round_trip<unsigned>((int)-1));
-  static_assert(round_trip<unsigned>((int)0x12345678));
-  static_assert(round_trip<unsigned>((int)0x87654321));
-  static_assert(round_trip<unsigned>((int)0x0C05FEFE));
+  static_assert(check_round_trip<unsigned>((int)-1));
+  static_assert(check_round_trip<unsigned>((int)0x12345678));
+  static_assert(check_round_trip<unsigned>((int)0x87654321));
+  static_assert(check_round_trip<unsigned>((int)0x0C05FEFE));
 }
 
 void test_array() {
@@ -73,8 +69,8 @@ void test_record() {
                                                                     ? 0x0C05FEFE
                                                                     : 0xCAFEBABE));
 
-  static_assert(round_trip<unsigned long long>(splice));
-  static_assert(round_trip<long long>(splice));
+  static_assert(round_trip<unsigned long long>(splice) == splice);
+  static_assert(round_trip<long long>(splice) == splice);
 
   struct base2 {
   };
@@ -98,7 +94,7 @@ void test_record() {
   constexpr bases b = {{1, 2}, {}, {3}, 4};
   constexpr tuple4 t4 = bit_cast<tuple4>(b);
   static_assert(t4 == tuple4{1, 2, 3, 4});
-  static_assert(round_trip<tuple4>(b));
+  static_assert(round_trip<tuple4>(b) == b);
 }
 
 void test_partially_initialized() {
@@ -115,33 +111,295 @@ void test_partially_initialized() {
 
   static_assert(sizeof(pad) == sizeof(no_pad));
 
+  constexpr auto cast = [](const pad& from) constexpr {
+    // expected-note at +6 2 {{bit_cast source expression (type 'const pad') does not produce a constant value for byte [1] (of {7..0}) which are required by target type 'no_pad' (subobject 'signed char')}}
+    #ifdef __CHAR_UNSIGNED__
+    // expected-note at +4 2 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'signed char' is invalid}}
+    #else
+    // expected-note at +2 2 {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'signed char' is invalid}}
+    #endif
+    return __builtin_bit_cast(no_pad, from);
+  };
+
   constexpr pad pir{4, 4};
   // expected-error at +2 {{constexpr variable 'piw' must be initialized by a constant expression}}
-  // expected-note at +1 {{in call to 'bit_cast<no_pad, pad>(pir)'}}
-  constexpr int piw = bit_cast<no_pad>(pir).x;
+  // expected-note at +1 {{in call}}
+  constexpr int piw = cast(pir).x;
 
   // expected-error at +2 {{constexpr variable 'bad' must be initialized by a constant expression}}
-  // expected-note at +1 {{in call to 'bit_cast<no_pad, pad>(pir)'}}
-  constexpr no_pad bad = bit_cast<no_pad>(pir);
+  // expected-note at +1 {{in call}}
+  constexpr no_pad bad = cast(pir);
 
   constexpr pad fine = bit_cast<pad>(no_pad{1, 2, 3, 4, 5});
   static_assert(fine.x == 1 && fine.y == 5);
 }
 
-void no_bitfields() {
-  // FIXME!
+namespace std {
+enum byte : unsigned char {};
+} // namespace std
+
+template <int N, typename T = unsigned char, int Pad = 0>
+struct bits {
+  T : Pad;
+  T bits : N;
+
+  constexpr bool operator==(const T& rhs) const {
+    return bits == rhs;
+  }
+};
+
+template <int N, typename T, int P>
+constexpr bool operator==(const struct bits<N, T, P>& lhs, const struct bits<N, T, P>& rhs) {
+  return lhs.bits == rhs.bits;
+}
+
+void test_bitfields() {
+  using uint16_t = unsigned __INT16_TYPE__;
+  {
+    struct Q {
+      // cf. CGBitFieldInfo
+      // on a little-endian machine the bits "[count from] the
+      // least-significant-bit."
+      // so, by leaving a bit unused, we truncate the value's MSB.
+
+      // however, on a big-endian machine we "imagine the bits
+      // counting from the most-significant-bit", so we truncate
+      // the LSB here.
+      uint16_t q : 15;
+    };
+    constexpr unsigned char bits[2] = {0xf3, 0xef};
+    constexpr Q q = bit_cast<Q>(bits);
+    static_assert(bit_cast<uint16_t>(bits) == (LITTLE_END
+                                                    ? 0xeff3
+                                                    : 0xf3ef),
+      "bit-field casting ought to match \"whole\"-field casting");
+    static_assert(q.q == (LITTLE_END ? 0x6ff3 : (0xf3ee >> 1)));
+  }
+
   struct S {
-    unsigned char x : 8;
+    // little endian:
+    //    MSB .... .... LSB
+    //        |y|   |x|
+    //
+    // big endian
+    //    MSB .... .... LSB
+    //        |x|   |y|
+
+    unsigned char x : 4;
+    unsigned char y : 4;
+
+    constexpr bool operator==(S const &other) const {
+      return x == other.x && y == other.y;
+    }
   };
 
-  struct G {
-    unsigned char x : 8;
+  constexpr S s{0xa, 0xb};
+  static_assert(bit_cast<bits<8>>(s) == (LITTLE_END ? 0xba : 0xab));
+  static_assert(bit_cast<bits<7>>(s) == (LITTLE_END
+                                              ? 0xba & 0x7f
+                                              : (0xab & 0xfe) >> 1));
+
+  static_assert(round_trip<bits<8>>(s) == s);
+
+  struct R {
+    unsigned int r : 31;
+    unsigned int : 0;
+    unsigned int : 32;
+    constexpr bool operator==(R const &other) const {
+      return r == other.r;
+    }
   };
+  using T = bits<31, signed long long>;
 
-  constexpr S s{0};
-  // expected-error at +2 {{constexpr variable 'g' must be initialized by a constant expression}}
-  // expected-note at +1 {{constexpr bit_cast involving bit-field is not yet supported}}
-  constexpr G g = __builtin_bit_cast(G, s);
+  constexpr R r{0x4ac0ffee};
+  constexpr T t = bit_cast<T>(r);
+  static_assert(t == ((0xFFFFFFFF8 << 28) | 0x4ac0ffee)); // sign extension
+
+  static_assert(round_trip<T>(r) == r);
+  static_assert(round_trip<R>(t) == t);
+
+  struct U {
+    // expected-warning at +1 {{exceeds the width of its type}}
+    unsigned __INT32_TYPE__ trunc : 33;
+    unsigned __INT32_TYPE__ u : 31;
+    constexpr bool operator==(U const &other) const {
+      return trunc == other.trunc && u == other.u;
+    }
+  };
+  struct V {
+    unsigned __INT64_TYPE__ notrunc : 32;
+    unsigned __INT64_TYPE__ : 1;
+    unsigned __INT64_TYPE__ v : 31;
+    constexpr bool operator==(V const &other) const {
+      return notrunc == other.notrunc && v == other.v;
+    }
+  };
+
+  constexpr U u{static_cast<unsigned int>(~0), 0x4ac0ffee};
+  constexpr V v = bit_cast<V>(u);
+  static_assert(v.v == 0x4ac0ffee);
+
+  {
+    #define MSG "a constexpr ought to produce padding bits from padding bits"
+    static_assert(round_trip<V>(u) == u, MSG);
+    static_assert(round_trip<U>(v) == v, MSG);
+
+    constexpr auto w = bit_cast<bits<12, unsigned long, 33>>(u);
+    static_assert(w == (LITTLE_END
+                        ? 0x4ac0ffee & 0xFFF
+                        : (0x4ac0ffee & (0xFFF << (31 - 12))) >> (31-12)
+                      ), MSG);
+    #undef MSG
+  }
+
+  // nested structures
+  {
+    struct J {
+      struct {
+        uint16_t  k : 12;
+      } K;
+      struct {
+        uint16_t  l : 4;
+      } L;
+    };
+
+    static_assert(sizeof(J) == 4);
+    constexpr J j = bit_cast<J>(0x8c0ffee5);
+
+    static_assert(j.K.k == (LITTLE_END ? 0xee5 : 0x8c0));
+    static_assert(j.L.l == 0xf /* yay symmetry */);
+    static_assert(bit_cast<bits<4, uint16_t, 16>>(j) == 0xf);
+    struct N {
+      bits<12, uint16_t> k;
+      uint16_t : 16;
+    };
+    static_assert(bit_cast<N>(j).k == j.K.k);
+
+    struct M {
+      bits<4, uint16_t, 0> m[2];
+      constexpr bool operator==(const M& rhs) const {
+        return m[0] == rhs.m[0] && m[1] == rhs.m[1];
+      };
+    };
+    #if LITTLE_END == 1
+    constexpr uint16_t want[2] = {0x5, 0xf};
+    #else
+    constexpr uint16_t want[2] = {0x8000, 0xf000};
+    #endif
+
+    static_assert(bit_cast<M>(j) == bit_cast<M>(want));
+  }
+
+  // enums
+  {
+    // ensure we're packed into the top 2 bits
+    constexpr int pad = LITTLE_END ? 6 : 0;
+    struct X
+    {
+        char : pad;
+        enum class direction: char { left, right, up, down } direction : 2;
+    };
+
+    constexpr X x = { X::direction::down };
+    static_assert(bit_cast<bits<2, signed char, pad>>(x) == -1);
+    static_assert(bit_cast<bits<2, unsigned char, pad>>(x) == 3);
+    static_assert(
+      bit_cast<X>((unsigned char)0x40).direction == X::direction::right);
+  }
+}
+
+template<int N>
+struct bytebuf {
+  using size_t = int;
+  unsigned char bytes[N];
+
+  constexpr unsigned char &operator[](size_t index) {
+    if (index < N)
+      return bytes[index];
+  }
+};
+
+void bitfield_indeterminate() {
+  struct BF { unsigned char z : 2; };
+  enum byte : unsigned char {};
+
+  constexpr BF bf = {0x3};
+  static_assert(bit_cast<bits<2>>(bf).bits == bf.z);
+
+  // expected-error at +1 {{not an integral constant expression}}
+  static_assert(bit_cast<unsigned char>(bf));
+  /// FIXME the above doesn't get any helpful notes, but the below does
+#if LITTLE_END == 1
+  // expected-note at +6 {{bit [2-7]}}
+#else
+  // expected-note at +4 {{bit [0-5]}}
+#endif
+  // expected-note at +2 {{indeterminate}}
+  // expected-error at +1 {{not an integral constant expression}}
+  static_assert(__builtin_bit_cast(byte, bf));
+
+  struct M {
+    // expected-note at +1 {{subobject declared here}}
+    unsigned char mem[sizeof(BF)];
+  };
+  // expected-error at +3 {{initialized by a constant expression}}
+  // zzexpected-note at +2 {{bad bits}}
+  // expected-note at +1 {{not initialized}}
+  constexpr M m = bit_cast<M>(bf);
+
+  constexpr auto f = []() constexpr {
+    // bits<24, unsigned int, LITTLE_END ? 0 : 8> B = {0xc0ffee};
+    constexpr struct { unsigned short b1; unsigned char b0;  } B = {0xc0ff, 0xee};
+    return bit_cast<bytebuf<4>>(B);
+  };
+
+  static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee);
+  {
+    // expected-error at +2 {{initialized by a constant expression}}
+    // expected-note at +1 {{read of uninitialized object is not allowed in a constant expression}}
+    constexpr auto _bad = f()[3];
+  }
+
+  struct B {
+    unsigned short s0 : 8;
+    unsigned short s1 : 8;
+    std::byte b0 : 4;
+    std::byte b1 : 4;
+    std::byte b2 : 4;
+  };
+  constexpr auto g = [f]() constexpr {
+    return bit_cast<B>(f());
+  };
+  static_assert(g().s0 + g().s1 + g().b0 + g().b1 == 0xc0 + 0xff + 0xe + 0xe);
+  {
+    // expected-error at +2 {{initialized by a constant expression}}
+    // expected-note at +1 {{read of uninitialized object is not allowed in a constant expression}}
+    constexpr auto _bad = g().b2;
+  }
+}
+
+void bitfield_unsupported() {
+  // if a future standard requires more types to be permitted in the
+  // declaration of a bit-field, then this test will hopefully indicate
+  // that there's work to be done on __builtin_bit_cast.
+  struct U {
+    // expected-error at +1 {{bit-field 'f' has non-integral type}}
+    bool f[8] : 8;
+  };
+
+  // this next bit is speculative: if the above _were_ a valid definition,
+  // then the below might also be a reasonable interpretation of its
+  // semantics, but the current implementation of __builtin_bit_cast will
+  // fail
+
+  // expected-note at +3 {{invalid declaration}} FIXME should we instead bail out in Sema?
+  // expected-note at +2 {{declared here}}
+  // expected-error at +1 {{initialized by a constant expression}}
+  constexpr U u = __builtin_bit_cast(U, (char)0b1010'0101);
+  static_assert(U.f[0] && U.f[2] && U.f[4] && U.f[8]);
+  // expected-note at +2 {{not a constant expression}}
+  // expected-error at +1 {{not an integral constant expression}}
+  static_assert(__builtin_bit_cast(bits<8>, u) == 0xA5);
 }
 
 void array_members() {
@@ -165,8 +423,8 @@ void array_members() {
   constexpr G g = bit_cast<G>(s);
   static_assert(g.a == 1 && g.b == 2 && g.c == 3);
 
-  static_assert(round_trip<G>(s));
-  static_assert(round_trip<S>(g));
+  static_assert(check_round_trip<G>(s));
+  static_assert(check_round_trip<S>(g));
 }
 
 void bad_types() {
@@ -229,6 +487,7 @@ void test_array_fill() {
 
 typedef decltype(nullptr) nullptr_t;
 
+// expected-note at +7 {{byte [0-7]}}
 #ifdef __CHAR_UNSIGNED__
 // expected-note at +5 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'unsigned long' is invalid}}
 #else
@@ -350,10 +609,6 @@ constexpr A two() {
 }
 constexpr short good_two = two().c + two().s;
 
-namespace std {
-enum byte : unsigned char {};
-}
-
 enum my_byte : unsigned char {};
 
 struct pad {
@@ -364,16 +619,18 @@ struct pad {
 constexpr int ok_byte = (__builtin_bit_cast(std::byte[8], pad{1, 2}), 0);
 constexpr int ok_uchar = (__builtin_bit_cast(unsigned char[8], pad{1, 2}), 0);
 
+// expected-note at +7 {{bit_cast source expression (type 'pad') does not produce a constant value for byte [1] (of {7..0}) which are required by target type 'my_byte[8]' (subobject 'my_byte')}}
 #ifdef __CHAR_UNSIGNED__
-// expected-note at +5 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'my_byte' is invalid}}}}
+// expected-note at +5 {{indeterminate value can only initialize an object of type 'unsigned char', 'char', or 'std::byte'; 'my_byte' is invalid}}
 #else
 // expected-note at +3 {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'my_byte' is invalid}}
 #endif
 // expected-error at +1 {{constexpr variable 'bad_my_byte' must be initialized by a constant expression}}
 constexpr int bad_my_byte = (__builtin_bit_cast(my_byte[8], pad{1, 2}), 0);
 #ifndef __CHAR_UNSIGNED__
-// expected-error at +3 {{constexpr variable 'bad_char' must be initialized by a constant expression}}
-// expected-note at +2 {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'char' is invalid}}
+// expected-note at +4 {{bit_cast source expression (type 'pad') does not produce a constant value for byte [1] (of {7..0}) which are required by target type 'char[8]' (subobject 'char')}}
+// expected-note at +3 {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'char' is invalid}}
+// expected-error at +2 {{constexpr variable 'bad_char' must be initialized by a constant expression}}
 #endif
 constexpr int bad_char =  (__builtin_bit_cast(char[8], pad{1, 2}), 0);
 
@@ -404,19 +661,22 @@ constexpr unsigned char identity3b = __builtin_bit_cast(unsigned char, identity3
 
 namespace test_bool {
 
-constexpr bool test_bad_bool = bit_cast<bool>('A'); // expected-error {{must be initialized by a constant expression}} expected-note{{in call}}
+// expected-note at +1 {{cannot be represented in type 'bool'}}
+constexpr bool test_bad_bool = __builtin_bit_cast(bool, 'A'); // expected-error {{must be initialized by a constant expression}}
 
-static_assert(round_trip<signed char>(true), "");
-static_assert(round_trip<unsigned char>(false), "");
-static_assert(round_trip<bool>(false), "");
+static_assert(check_round_trip<signed char>(true));
+static_assert(check_round_trip<unsigned char>(false));
+static_assert(check_round_trip<bool>(false));
 
-static_assert(round_trip<bool>((char)0), "");
-static_assert(round_trip<bool>((char)1), "");
+static_assert(check_round_trip<bool>((char)0));
+static_assert(check_round_trip<bool>((char)1));
 }
 
 namespace test_long_double {
 #ifdef __x86_64
-constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // expected-error{{must be initialized by a constant expression}} expected-note{{in call}}
+// expected-note at +2 {{byte [10-15]}}
+// expected-note at +1 {{or 'std::byte'; '__int128' is invalid}}
+constexpr __int128_t test_cast_to_int128 = __builtin_bit_cast(__int128_t, (long double)0); // expected-error{{must be initialized by a constant expression}}
 
 constexpr long double ld = 3.1425926539;
 
@@ -424,9 +684,9 @@ struct bytes {
   unsigned char d[16];
 };
 
-static_assert(round_trip<bytes>(ld), "");
+static_assert(check_round_trip<bytes>(ld));
 
-static_assert(round_trip<long double>(10.0L));
+static_assert(check_round_trip<long double>(10.0L));
 
 constexpr bool f(bool read_uninit) {
   bytes b = bit_cast<bytes>(ld);
@@ -445,8 +705,8 @@ constexpr bool f(bool read_uninit) {
   return true;
 }
 
-static_assert(f(/*read_uninit=*/false), "");
-static_assert(f(/*read_uninit=*/true), ""); // expected-error{{static assertion expression is not an integral constant expression}} expected-note{{in call to 'f(true)'}}
+static_assert(f(/*read_uninit=*/false));
+static_assert(f(/*read_uninit=*/true)); // expected-error{{static assertion expression is not an integral constant expression}} expected-note{{in call to 'f(true)'}}
 
 constexpr bytes ld539 = {
   0x0, 0x0,  0x0,  0x0,
@@ -457,7 +717,7 @@ constexpr bytes ld539 = {
 
 constexpr long double fivehundredandthirtynine = 539.0;
 
-static_assert(bit_cast<long double>(ld539) == fivehundredandthirtynine, "");
+static_assert(bit_cast<long double>(ld539) == fivehundredandthirtynine);
 
 #else
 static_assert(round_trip<__int128_t>(34.0L));
@@ -473,10 +733,10 @@ constexpr uint2 test_vector = { 0x0C05FEFE, 0xCAFEBABE };
 
 static_assert(bit_cast<unsigned long long>(test_vector) == (LITTLE_END
                                                                 ? 0xCAFEBABE0C05FEFE
-                                                                : 0x0C05FEFECAFEBABE), "");
+                                                                : 0x0C05FEFECAFEBABE));
 
-static_assert(round_trip<uint2>(0xCAFEBABE0C05FEFEULL), "");
-static_assert(round_trip<byte8>(0xCAFEBABE0C05FEFEULL), "");
+static_assert(check_round_trip<uint2>(0xCAFEBABE0C05FEFEULL));
+static_assert(check_round_trip<byte8>(0xCAFEBABE0C05FEFEULL));
 
 typedef bool bool8 __attribute__((ext_vector_type(8)));
 typedef bool bool9 __attribute__((ext_vector_type(9)));
@@ -485,16 +745,16 @@ typedef bool bool17 __attribute__((ext_vector_type(17)));
 typedef bool bool32 __attribute__((ext_vector_type(32)));
 typedef bool bool128 __attribute__((ext_vector_type(128)));
 
-static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA), "");
-static_assert(round_trip<bool8>(static_cast<unsigned char>(0)), "");
-static_assert(round_trip<bool8>(static_cast<unsigned char>(1)), "");
-static_assert(round_trip<bool8>(static_cast<unsigned char>(0x55)), "");
+static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA));
+static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0)));
+static_assert(check_round_trip<bool8>(static_cast<unsigned char>(1)));
+static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0x55)));
 
-static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4), "");
+static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4));
 
-static_assert(round_trip<bool16>(static_cast<short>(0xCAFE)), "");
-static_assert(round_trip<bool32>(static_cast<int>(0xCAFEBABE)), "");
-static_assert(round_trip<bool128>(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)), "");
+static_assert(check_round_trip<bool16>(static_cast<short>(0xCAFE)));
+static_assert(check_round_trip<bool32>(static_cast<int>(0xCAFEBABE)));
+static_assert(check_round_trip<bool128>(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)));
 
 // expected-error at +2 {{constexpr variable 'bad_bool9_to_short' must be initialized by a constant expression}}
 // expected-note at +1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}}
diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp
index 792dc78464b2a8..f6fbea4ab03d75 100644
--- a/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp
+++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx20.cpp
@@ -216,15 +216,19 @@ namespace UnnamedBitfield {
   // uninitialized and it being zeroed. Those are not distinct states
   // according to [temp.type]p2.
   //
-  // FIXME: We shouldn't track a value for unnamed bit-fields, nor number
-  // them when computing field indexes.
+  // At namespace scope, multiple `using` declarations are valid (to avoid
+  // conflicts when #including), just in case they they "all refer to the same
+  // entity." This test makes use of that implicit constraint to ensure that
+  // the compiler does not "see" a difference between any of the `T`s below.
+  // cf. https://stackoverflow.com/a/31225016/151464
   template <A> struct X {};
   constexpr A a;
   using T = X<a>;
   using T = X<A{}>;
   using T = X<(A())>;
-  // Once we support bit-casts involving bit-fields, this should be valid too.
-  using T = X<__builtin_bit_cast(A, 0)>; // expected-error {{constant}} expected-note {{not yet supported}}
+  using T = X<__builtin_bit_cast(A, 0)>;
+  using T = X<__builtin_bit_cast(A, A{})>;
+  using T = X<__builtin_bit_cast(A, (unsigned char[4]){})>;
 }
 
 namespace Temporary {

>From 2e0d3f81e4f5b623bb476bfac0278cfc6d1bd4bc Mon Sep 17 00:00:00 2001
From: Seth Pellegrino <seth at codecopse.net>
Date: Thu, 4 Jan 2024 13:52:25 -0800
Subject: [PATCH 3/3] refactor: BitCastBuffer with APInt, fix for bools

Make use of the pre-exising APInt type as a pre-established spelling of
BitVector to get much more granular with the BitCastBuffer; this also
permits more thorough usage of `const APInt&` references to make the
copies easier to optimize away and reduces the common case of
int->buffer->int from 4-7 copies down to 2. As a bonus, we also got a
bit more flexible with supporting platforms where `CHAR_BIT` is
something other than 8, addressing some of the review feedback from the
original diff, though there's plenty of follow-up work here and
elsewhere to make that a reality.

Additional, uncovers and fixes some errant behavior around `bool`s
introduced in the previous commit, and makes the handling of
`_BitInt(N)` types more consistent while preserving
backwards-compatability.
---
 clang/lib/AST/ExprConstant.cpp                | 717 +++++++++++-------
 .../constexpr-builtin-bit-cast-bitint.cpp     |  82 ++
 .../SemaCXX/constexpr-builtin-bit-cast.cpp    | 231 ++++--
 3 files changed, 709 insertions(+), 321 deletions(-)
 create mode 100644 clang/test/SemaCXX/constexpr-builtin-bit-cast-bitint.cpp

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 356cef552d3544..9d714ef09a1337 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -64,6 +64,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/SwapByteOrder.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -73,9 +74,10 @@
 #include <cstdint>
 #include <cstring>
 #include <functional>
-#include <iomanip>
 #include <iterator>
+#include <limits>
 #include <optional>
+#include <valarray>
 
 #define DEBUG_TYPE "exprconstant"
 
@@ -6914,112 +6916,259 @@ bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) {
 //===----------------------------------------------------------------------===//
 namespace {
 
+struct BitSlice : public std::slice {
+  BitSlice(size_t Offset, size_t Num) : std::slice(Offset, Num, 1){};
+
+  inline size_t end() const { return start() + size(); }
+};
+
+struct BitFieldInfo {
+  /// The offset (in bits) within the appropriate storage type.
+  const unsigned Offset : 16;
+
+  /// The size of the value held by the bit-field, in bits.
+  const unsigned Width : 15;
+
+  /// Whether the bit-field is signed.
+  const unsigned IsSigned : 1;
+
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  const unsigned StorageSize;
+
+  /// The offset of the bitfield storage from the start of the struct.
+  const CharUnits StorageOffset;
+
+  static BitFieldInfo MakeInfo(const ASTContext &Ctx, const FieldDecl *FD,
+                               const ASTRecordLayout &Layout) {
+    const unsigned StorageSize = Ctx.getTypeSize(FD->getType()),
+                   FieldOffsetBits = Layout.getFieldOffset(FD->getFieldIndex());
+
+    unsigned Width = FD->getBitWidthValue(Ctx);
+    if (Width > StorageSize) {
+      // e.g. `unsigned uint8_t c : 12`
+      // we truncate to CHAR_BIT * sizeof(T)
+      // (the extra bits are padding)
+      Width = StorageSize;
+    }
+    unsigned Offset = FieldOffsetBits % StorageSize;
+    if (Ctx.getTargetInfo().isBigEndian()) {
+      // big endian bits count from MSB to LSB
+      // so a bit-field of width 16 and size 12 will
+      // occupy bits [0-11] on a little endian machine,
+      // but [3-15] on a big endian machine
+      Offset = StorageSize - (Offset + Width);
+    }
+    return {
+        Offset,
+        Width,
+        FD->getType()->isSignedIntegerOrEnumerationType(),
+        StorageSize,
+        Ctx.toCharUnitsFromBits((FieldOffsetBits / StorageSize) * StorageSize),
+    };
+  }
+};
+
 struct BitCastBuffer {
-  // FIXME: Its possible under the C++ standard for 'char' to not be 8 bits, but
-  // we don't support a host or target where that is the case. Still, we should
-  // use a more generic type in case we ever do.
-  using byte_t = unsigned char;
-  static_assert(std::numeric_limits<byte_t>::digits >= 8,
-                "Need at least 8 bit unsigned char");
-
-  SmallVector<byte_t, 32> Bytes;
-  SmallVector<byte_t, 32> Valid;
-
-  bool TargetIsLittleEndian;
-
-  static SmallVector<byte_t> MaskAllSet(size_t Width) {
-    SmallVector<byte_t> M;
-    M.resize(Width);
-    std::fill(M.begin(), M.end(), ~0);
-    return M;
-  }
-
-  BitCastBuffer(CharUnits Width, bool TargetIsLittleEndian)
-      : Bytes(Width.getQuantity()), Valid(Width.getQuantity()),
-        TargetIsLittleEndian(TargetIsLittleEndian) {}
-
-  [[nodiscard]] bool readObject(CharUnits Offset, CharUnits Width,
-                                SmallVectorImpl<byte_t> &Output,
-                                SmallVectorImpl<byte_t> const &Mask) const {
-    assert(Mask.size() >= static_cast<unsigned>(Width.getQuantity()));
-    assert(Output.size() >= static_cast<unsigned>(Width.getQuantity()));
-    assert(Bytes.size() >=
-           static_cast<unsigned>((Offset + Width).getQuantity()));
-
-    SmallVector<byte_t, 8> RevMask;
-    const SmallVectorImpl<byte_t> &M =
-        (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
-        ? [&]() -> const SmallVectorImpl<byte_t> & {
-      auto W = Width.getQuantity();
-      RevMask.resize_for_overwrite(W);
-      std::reverse_copy(Mask.begin(), Mask.begin() + W, RevMask.begin());
-      return RevMask;
-    }()
-        : Mask;
-
-    size_t Index = 0;
-    for (CharUnits I = Offset, E = Offset + Width; I != E; ++I, ++Index) {
-      const auto BufIdx = I.getQuantity();
-      const auto mask = M[Index];
-      // are there any bits in Mask[Index] that are not set in
-      // Valid[BufIdx]? (NB: more bits can be set, that's just
-      // fine)
-      if ((Valid[BufIdx] & M[Index]) != M[Index])
-        // If any bit of an integer is uninitialized, then the
-        // whole integer is uninitialized.
+  // The number of bits in a `char`, needed to handle endianness (which is
+  // assumed to be exclusively big or little) for values with more bits than
+  // this number.
+  //
+  // No current platforms support varying this size.
+  static const uint64_t CharBit = 8;
+
+  const uint64_t BitWidth;
+  const bool IsNativeEndian;
+
+  APInt Data;
+  APInt Invalid; // Indeterminate bits
+
+  BitCastBuffer(uint64_t BitWidth, bool TargetIsLittleEndian, uint64_t CharBit)
+      : BitWidth(BitWidth),
+        IsNativeEndian(llvm::sys::IsLittleEndianHost == TargetIsLittleEndian),
+        Data(BitWidth, 0), Invalid(BitWidth, ~0, /* extend "sign" bit */ true) {
+    assert(Invalid.countl_one() == BitWidth);
+    assert(CharBit == BitCastBuffer::CharBit);
+  }
+
+  [[nodiscard]] bool readMasked(const uint64_t Offset, APInt &Output,
+                                const APInt &Mask) const {
+    assert(Output.getBitWidth() == Mask.getBitWidth());
+    const BitSlice Which = {Offset, Output.getBitWidth()};
+
+    const auto read = [&](const APInt &Mask) {
+      if ((getBits(Invalid, Which) & Mask) != 0)
         return false;
 
-      Output[Index] = (Output[Index] & ~mask) | (Bytes[BufIdx] & mask);
+      Output = (Output & ~Mask) | (getBits(Data, Which) & Mask);
+      return true;
+    };
+
+    if (!IsNativeEndian && Output.getBitWidth() > CharBit) {
+      bool OK = read(Mask.byteSwap());
+      Output = Output.byteSwap();
+      return OK;
     }
 
-    if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
-      std::reverse(Output.begin(), Output.end());
+    return read(Mask);
+  }
+
+  [[nodiscard]] inline bool readObject(const uint64_t Offset,
+                                       APInt &Output) const {
+    return readObject({Offset, Output.getBitWidth()}, Output);
+  }
+
+  [[nodiscard]] bool readObject(const BitSlice &Which, APInt &Output) const {
+    assert(Output.getBitWidth() <= BitWidth);
+    assert(Which.size() <= Output.getBitWidth());
+    assert(Which.end() <= BitWidth);
+    assert((IsNativeEndian || withinByte(Which) ||
+            APInt::getBitsSet(BitWidth, Which.start(), Which.end())
+                .byteSwap()
+                .isShiftedMask()) &&
+           "use readMasked instead");
+
+    if (getBits(Invalid, Which) != 0)
+      return false;
+
+    copyBitsFrom(Output, {0, Which.size()}, Data, Which);
+
+    if (!IsNativeEndian && Output.getBitWidth() > CharBit)
+      Output = Output.byteSwap();
+
     return true;
   }
 
-  void writeObject(CharUnits Offset, SmallVectorImpl<byte_t> &Input,
-                   SmallVectorImpl<byte_t> &Mask) {
-    assert(Mask.size() >= Input.size());
-    assert(Bytes.size() >=
-           static_cast<unsigned>(Offset.getQuantity()) + Input.size());
+  void writeMasked(const uint64_t Offset, const APInt &Input,
+                   const APInt &Mask) {
+    assert(Input.getBitWidth() == Mask.getBitWidth());
+    const uint64_t BW = Input.getBitWidth();
+    const BitSlice Dest = {Offset, BW};
 
-    // we could promise Input and Mask were `const`, except for this
-    if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian) {
-      std::reverse(Input.begin(), Input.end());
-      // we might (will) have more mask bits than input bits
-      std::reverse(Mask.begin(), Mask.begin() + Input.size());
+    auto write = [&](const APInt &Input, const APInt &Mask) {
+      assert((~getBits(Invalid, Dest) & Mask) == 0 && "overwriting data?");
+      const APInt Val = (Input & Mask) | (getBits(Data, Dest) & ~Mask);
+      const APInt Written = getBits(Invalid, Dest) ^ Mask;
+
+      copyBitsFrom(Data, Dest, Val, {0, BW});
+      copyBitsFrom(Invalid, Dest, Written, {0, BW});
+    };
+
+    if (!IsNativeEndian && BW > CharBit) {
+      write(Input.byteSwap(), Mask.byteSwap());
+      return;
     }
+    write(Input, Mask);
+  }
+
+  void writeObject(const uint64_t Offset, const APInt &Input) {
+    writeObject({Offset, Input.getBitWidth()}, Input, {0, Input.getBitWidth()});
+  }
+
+  void writeObject(const BitSlice &Dst, const APInt &Input,
+                   const BitSlice &Src) {
+    assert(Src.size() == Dst.size());
+    assert(Src.end() <= Input.getBitWidth());
+    assert(Dst.end() <= BitWidth);
+    assert(~getBits(Invalid, Dst) == 0 && "overwriting data?");
+    assert((IsNativeEndian || (withinByte(Src) && withinByte(Dst)) ||
+            [&] {
+              unsigned lo, len;
+              return Src.size() % 8 == 0 &&
+                     APInt::getBitsSet(Src.size(), Src.start(), Src.end())
+                         .byteSwap()
+                         .isShiftedMask(lo, len) &&
+                     lo == Src.start() && len == Src.size() &&
+                     Dst.start() % CharBit == 0 && Dst.size() % CharBit == 0;
+            }()) &&
+           "use writeMasked instead");
+
+    auto write = [&](const APInt &Input) {
+      copyBitsFrom(Data, Dst, Input, Src);
+      clearBits(Invalid, Dst);
+    };
 
-    size_t Index = 0;
-    size_t BufIdx = Offset.getQuantity();
-    for (byte_t &Byte : Input) {
-      assert((Valid[BufIdx] & Mask[Index]) == 0 && "overwriting data?");
-      Bytes[BufIdx] |= Byte & Mask[Index];
-      Valid[BufIdx] |= Mask[Index];
-      ++BufIdx;
-      ++Index;
+    if (!IsNativeEndian && Input.getBitWidth() > CharBit) {
+      write(Input.byteSwap());
+      return;
     }
+
+    write(Input);
+  }
+
+  // true iff the range described by Which from start (inclusive) to end
+  // (exclusive) refers to the same addressable byte, i.e.
+  //    [0, 0)     -> yes
+  //    [0, 3)     -> yes
+  //    [0, 8)     -> yes
+  //    [16, 24)   -> yes
+  //    [123, 123) -> yes
+  //    [7, 9)     -> no
+  inline static bool withinByte(const BitSlice &Which) {
+    // NB: Which.start() may equal Which.end(), and either may be zero, so
+    // care must be taken here to avoid underflow
+    return Which.size() == 0 ||
+           Which.start() / CharBit + 1 == (Which.end() + CharBit - 1) / CharBit;
   }
 
-  size_t size() { return Bytes.size(); }
+  inline static APInt getBits(const APInt &Int, const BitSlice &Which) {
+    // more lenient than extractBits (which asserts `start < BitWidth`)
+    // this permits e.g. zero-width reads "one past the last bit"
+    assert(Which.end() <= Int.getBitWidth());
+    if (Which.size() == 0) {
+      return APInt::getZeroWidth();
+    }
+    return Int.extractBits(Which.size(), Which.start());
+  }
+
+  // copyBitsFrom acts like `LHS[Dst] = RHS[Src];`, if `APInt`s supported
+  // slicing
+  inline static void copyBitsFrom(APInt &LHS, const BitSlice &Dst,
+                                  const APInt &RHS, const BitSlice &Src) {
+    assert(Src.size() == Dst.size());
+
+    if (Src.start() > 0 || Src.end() < RHS.getBitWidth() ||
+        RHS.getBitWidth() != Dst.size()) {
+      APInt Val = RHS.lshr(Src.start()).trunc(Src.size()).zext(Dst.size());
+      LHS.insertBits(Val, Dst.start());
+      return;
+    }
+    LHS.insertBits(RHS, Dst.start());
+  }
+
+  inline static void clearBits(APInt &Int, const BitSlice &Which) {
+    unsigned Bit = Which.start(), Rem = Which.size() % 64;
+    if (Rem > 0) // else APInt crashes when Bit == 0
+      Int.insertBits(0ull, Bit, Rem);
+    Bit += Rem;
+    for (unsigned End = Which.end(); Bit < End; Bit += 64)
+      Int.insertBits(0ull, Bit, 64u);
+  }
+
+  static llvm::FormattedBytes formatInt(const APInt &Int) {
+    // implicit in the below we're assuming that CHAR_BIT is 8.
+    //
+    // this might get confusing on a PDP-10, where "bytes" were a software
+    // abstraction that varied in size (potentially even within the same
+    // program; see https://retrocomputing.stackexchange.com/a/15514).
+    //
+    // happily, this is a dump method, so we get to do non-backwards-compatible
+    // things like assume the programmer will know if they're in the extremely
+    // unlikely context where "byte" means something other than 8 bits.
+    const auto *Data = Int.getRawData();
+    const auto NumBytes = Int.getBitWidth() / 8;
+    assert(NumBytes <= Int.getNumWords() * sizeof(*Data));
+    const ArrayRef<uint8_t> AsBytes(reinterpret_cast<const uint8_t *>(Data),
+                                    NumBytes);
+    const unsigned int NumPerLine = 40, ByteGroupSize = 1;
+
+    return format_bytes(AsBytes, std::nullopt, NumPerLine, ByteGroupSize);
+  }
 
   LLVM_DUMP_METHOD void dump() {
-    auto pp = [](std::stringstream &SS, llvm::SmallVectorImpl<byte_t> &V) {
-      bool first = true;
-      for (byte_t v : V) {
-        if (first)
-          first = false;
-        else
-          SS << " ";
-        SS << "0x" << std::hex << std::setw(2) << std::setfill('0')
-           << static_cast<unsigned>(v);
-      }
-    };
-    std::stringstream SS[2];
-    pp(SS[0], Bytes);
-    pp(SS[1], Valid);
-    llvm::dbgs() << "BitCastBuffer{Bytes: [" << SS[0].str() << "], Valid: ["
-                 << SS[1].str() << "]}\n";
+    llvm::dbgs() << "BitCastBuffer{Bytes: [" << formatInt(Data)
+                 << "], Invalid: [" << formatInt(Invalid) << "] (=> Valid: ["
+                 << formatInt(~Invalid) << "])}\n";
   }
 };
 
@@ -7032,8 +7181,9 @@ class APValueToBufferConverter {
 
   APValueToBufferConverter(EvalInfo &Info, CharUnits ObjectWidth,
                            const CastExpr *BCE)
-      : Info(Info),
-        Buffer(ObjectWidth, Info.Ctx.getTargetInfo().isLittleEndian()),
+      : Info(Info), Buffer(Info.Ctx.toBits(ObjectWidth),
+                           Info.Ctx.getTargetInfo().isLittleEndian(),
+                           Info.Ctx.getCharWidth()),
         BCE(BCE) {}
 
   bool visit(const APValue &Val, QualType Ty) {
@@ -7042,13 +7192,13 @@ class APValueToBufferConverter {
 
   // Write out Val with type Ty into Buffer starting at Offset.
   bool visit(const APValue &Val, QualType Ty, CharUnits Offset) {
-    assert((size_t)Offset.getQuantity() <= Buffer.size());
+    assert((size_t)Info.Ctx.toBits(Offset) <= Buffer.BitWidth);
 
     // As a special case, nullptr_t has an indeterminate value.
     if (Ty->isNullPtrType())
       return true;
 
-    // Dig through Val to find the byte at Offset.
+    // Dig through Val to find the bits.
     switch (Val.getKind()) {
     case APValue::Indeterminate:
     case APValue::None:
@@ -7139,54 +7289,34 @@ class APValueToBufferConverter {
       if (!FieldVal.hasValue())
         continue;
 
-      uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx);
-      CharUnits BufOffset = Offset;
-      uint64_t BitOffset = FieldOffsetBits;
-
-      unsigned int BitWidth = FD->getBitWidthValue(Info.Ctx);
-
-      CharUnits TypeWidth = Info.Ctx.getTypeSizeInChars(FD->getType());
-      uint64_t TypeWidthBits = Info.Ctx.toBits(TypeWidth);
-      if (BitWidth > TypeWidthBits) {
-        // e.g. `unsigned uint8_t c : 12`
-        // we truncate to CHAR_BIT * sizeof(T)
-        // (the extra bits are padding)
-        BitWidth = TypeWidthBits;
-      }
-      if (FieldOffsetBits >= TypeWidthBits) {
-        // e.g. `uint32_t : 33; uint32_t i : 12`
-        // or `uint16_t : 16; unsigned uint16_t i : 12`
-        BufOffset =
-            BufOffset + CharUnits::fromQuantity(BitOffset / TypeWidthBits) *
-                            TypeWidth.getQuantity();
-        BitOffset %= TypeWidthBits;
-      }
-
-      if (Info.Ctx.getTargetInfo().isBigEndian()) {
-        // big endian bits count from MSB to LSB
-        // so a bit-field of width 16 and size 12 will occupy bits [0-11] on a
-        // little endian machine, but [3-15] on a big endian machine
-        BitOffset = TypeWidthBits - (BitOffset + BitWidth);
+      const auto BF = BitFieldInfo::MakeInfo(Info.Ctx, FD, Layout);
+
+      APSInt BoolVal;
+      const APSInt &Val = [&]() -> const APSInt & {
+        const APSInt &Val = FieldVal.getInt();
+        if (FD->getType()->isBooleanType()) {
+          // Let's zero extend the `i1` to be the full `Width` bits
+          // Note: this works because we refuse to read boolean
+          // values that don't have their high bits zeroed; see comment
+          // in BufferToAPValueConverter::visit(BuiltinType, ...)
+          BoolVal = Val.extend(BF.Width);
+          return BoolVal;
+        }
+        return Val;
+      }();
+      assert(Val.getBitWidth() >= BF.Width);
+      if (!Buffer.IsNativeEndian && Val.getBitWidth() > 8) {
+        APInt AdjVal = (Val << BF.Offset);
+        APInt Mask = APInt::getBitsSet(Val.getBitWidth(), BF.Offset,
+                                       BF.Width + BF.Offset);
+
+        Buffer.writeMasked(Info.Ctx.toBits(Offset + BF.StorageOffset), AdjVal,
+                           Mask);
+      } else {
+        const uint64_t BitOffset = Info.Ctx.toBits(Offset + BF.StorageOffset);
+        Buffer.writeObject({BitOffset + BF.Offset, BF.Width}, Val,
+                           {0, BF.Width});
       }
-
-      assert(TypeWidth >= Info.Ctx.toCharUnitsFromBits(BitWidth));
-
-      llvm::SmallBitVector MaskBits(Info.Ctx.toBits(TypeWidth));
-      MaskBits.set(BitOffset, BitOffset + BitWidth);
-      uintptr_t Store;
-      ArrayRef<uintptr_t> Ref = MaskBits.getData(Store);
-      SmallVector<uint8_t, 8> Mask(Ref.size() * sizeof(uintptr_t));
-      std::memcpy(Mask.data(), Ref.data(), Mask.size());
-      Mask.truncate(TypeWidth.getQuantity());
-
-      SmallVector<uint8_t, 8> Bytes(TypeWidth.getQuantity());
-
-      APSInt Val = FieldVal.getInt() << BitOffset;
-      assert(Val.getBitWidth() >= BitOffset + BitWidth &&
-             "lost data in APInt -> byte buffer conversion");
-
-      llvm::StoreIntToMemory(Val, &*Bytes.begin(), TypeWidth.getQuantity());
-      Buffer.writeObject(BufOffset, Bytes, Mask);
     }
 
     return true;
@@ -7252,13 +7382,13 @@ class APValueToBufferConverter {
 
     if (VTy->isExtVectorBoolType()) {
       // Special handling for OpenCL bool vectors:
-      // Since these vectors are stored as packed bits, but we can't write
-      // individual bits to the BitCastBuffer, we'll buffer all of the elements
-      // together into an appropriately sized APInt and write them all out at
-      // once. Because we don't accept vectors where NElts * EltSize isn't a
-      // multiple of the char size, there will be no padding space, so we don't
-      // have to worry about writing data which should have been left
-      // uninitialized.
+      // Since these vectors are stored in memory as packed bits, but the
+      // constexpr interpreter stores them as individual 1-bit-wide APInts, we
+      // pack them together into a single appropriately sized APInt and write
+      // them all out at once. Because we don't accept vectors where NElts *
+      // EltSize isn't a multiple of the char size, there will be no padding
+      // space, so we don't have to worry about writing data which should have
+      // been left uninitialized.
       bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
 
       llvm::APInt Res = llvm::APInt::getZero(NElts);
@@ -7267,13 +7397,10 @@ class APValueToBufferConverter {
         assert(EltAsInt.isUnsigned() && EltAsInt.getBitWidth() == 1 &&
                "bool vector element must be 1-bit unsigned integer!");
 
-        Res.insertBits(EltAsInt, BigEndian ? (NElts - I - 1) : I);
+        Res.setBitVal(BigEndian ? (NElts - I - 1) : I, EltAsInt[0]);
       }
 
-      SmallVector<uint8_t, 8> Bytes(NElts / 8);
-      auto Mask = BitCastBuffer::MaskAllSet(Bytes.size());
-      llvm::StoreIntToMemory(Res, &*Bytes.begin(), NElts / 8);
-      Buffer.writeObject(Offset, Bytes, Mask);
+      Buffer.writeObject(Info.Ctx.toBits(Offset), Res);
     } else {
       // Iterate over each of the elements and write them out to the buffer at
       // the appropriate offset.
@@ -7288,17 +7415,39 @@ class APValueToBufferConverter {
   }
 
   bool visitInt(const APSInt &Val, QualType Ty, CharUnits Offset) {
-    APSInt AdjustedVal = Val;
-    unsigned Width = AdjustedVal.getBitWidth();
     if (Ty->isBooleanType()) {
-      Width = Info.Ctx.getTypeSize(Ty);
-      AdjustedVal = AdjustedVal.extend(Width);
+      // Let's zero extend the `i1` to be the full 8 bits
+      // Note: this works because we refuse to read boolean
+      // values that don't have their high bits zeroed; see comment
+      // in BufferToAPValueConverter::visit(BuiltinType, ...)
+      unsigned Width = Info.Ctx.getTypeSize(Ty);
+      Buffer.writeObject(Info.Ctx.toBits(Offset), Val.zext(Width));
+      return true;
+    }
+
+    if (Ty->isBitIntType()) {
+      // This preserves the existing behavior that used to function like so:
+      // ```c++
+      // SmallVector<uint8_t, 8> Bytes(Width / 8);
+      // llvm::StoreIntToMemory(AdjustedVal, &*Bytes.begin(), Width / 8);
+      // Buffer.writeObject(Offset, Bytes);
+      // ```
+      // which, when provided a _BitInt(N), would write N/8 bytes (when it
+      // didn't crash)
+      const CharUnits Bytes =
+          CharUnits::fromQuantity(Info.Ctx.getIntWidth(Ty) / 8);
+      const unsigned Size = Info.Ctx.toBits(Bytes);
+      if (!Buffer.IsNativeEndian && Val.getBitWidth() > 8) {
+        Buffer.writeObject(Info.Ctx.toBits(Offset), Val.trunc(Size));
+      } else {
+        Buffer.writeObject({static_cast<size_t>(Info.Ctx.toBits(Offset)), Size},
+                           Val, {0, Size});
+      }
+
+      return true;
     }
 
-    SmallVector<uint8_t, 8> Bytes(Width / 8);
-    auto Mask = BitCastBuffer::MaskAllSet(Bytes.size());
-    llvm::StoreIntToMemory(AdjustedVal, &*Bytes.begin(), Width / 8);
-    Buffer.writeObject(Offset, Bytes, Mask);
+    Buffer.writeObject(Info.Ctx.toBits(Offset), Val);
     return true;
   }
 
@@ -7357,28 +7506,28 @@ class BufferToAPValueConverter {
     return std::nullopt;
   }
 
-  std::nullopt_t badBits(QualType Ty, CharUnits Offset,
-                         SmallVectorImpl<BitCastBuffer::byte_t> &M) {
+  std::nullopt_t badBits(QualType Ty, const BitSlice &Want) {
     Info.FFDiag(BCE->getExprLoc(), diag::note_constexpr_bit_cast_indet_dest, 1)
         << Ty << Info.Ctx.getLangOpts().CharIsSigned;
     uint64_t BitWidth = Info.Ctx.getTypeSize(BCE->getType());
-    uint64_t ByteWidth = Info.Ctx.toCharUnitsFromBits(BitWidth).getQuantity();
-    assert(ByteWidth == Buffer.Valid.size_in_bytes());
+    uint64_t ByteWidth =
+        Info.Ctx.getTypeSizeInChars(BCE->getType()).getQuantity();
+    assert(ByteWidth == Buffer.BitWidth / Info.Ctx.getCharWidth());
 
-    APInt Valid(BitWidth, 0);
-    llvm::LoadIntFromMemory(Valid, Buffer.Valid.begin(), ByteWidth);
-    APInt Mask(BitWidth, 0);
-    llvm::LoadIntFromMemory(Mask, M.begin(), M.size_in_bytes());
-
-    Mask = Mask.zext(Valid.getBitWidth());
-    Mask <<= Info.Ctx.toBits(Offset);
+    uint64_t BW = Info.Ctx.getTypeSize(Ty);
+    APInt Indet = Buffer.Invalid.lshr(Want.start()).trunc(BW);
+    auto Mask = APInt::getBitsSet(BW, 0, Want.size());
+    if (!Buffer.IsNativeEndian && BW % 16 == 0) {
+      Indet = Indet.byteSwap();
+      Mask = Mask.byteSwap();
+    }
 
     auto ByteAligned = true;
 
-    APInt Missing = (~Valid & Mask);
+    APInt Missing = Indet & Mask;
     assert(!Missing.isZero() && "bad bits called with no bad bits?");
     llvm::SmallVector<std::pair<size_t, size_t>> MissingBitRanges;
-    int NextBit = 0;
+    int NextBit = Want.start();
     while (!Missing.isZero()) {
       APInt Last(Missing);
       int N = Missing.countr_zero();
@@ -7426,6 +7575,16 @@ class BufferToAPValueConverter {
     return std::nullopt;
   }
 
+  static bool canStoreIndeterminate(const Type *T, const EnumType *EnumSugar) {
+    // If this is std::byte or unsigned char, then its okay to store an
+    // indeterminate value.
+    bool IsStdByte = EnumSugar && EnumSugar->isStdByteType();
+    bool IsUChar =
+        !EnumSugar && (T->isSpecificBuiltinType(BuiltinType::UChar) ||
+                       T->isSpecificBuiltinType(BuiltinType::Char_U));
+    return IsStdByte || IsUChar;
+  }
+
   std::optional<APValue> visit(const BuiltinType *T, CharUnits Offset,
                                const EnumType *EnumSugar = nullptr) {
     if (T->isNullPtrType()) {
@@ -7435,51 +7594,60 @@ class BufferToAPValueConverter {
                      APValue::NoLValuePath{}, /*IsNullPtr=*/true);
     }
 
-    CharUnits SizeOf = Info.Ctx.getTypeSizeInChars(T);
+    uint64_t SizeOf = Info.Ctx.getTypeSize(T);
 
-    // Work around floating point types that contain unused padding bytes. This
+    // Some floating point types contain unused padding bytes. This
     // is really just `long double` on x86, which is the only fundamental type
-    // with padding bytes.
+    // with padding bytes. (other than `bool`s, which are handled specially
+    // below)
     if (T->isRealFloatingType()) {
       const llvm::fltSemantics &Semantics =
           Info.Ctx.getFloatTypeSemantics(QualType(T, 0));
       unsigned NumBits = llvm::APFloatBase::getSizeInBits(Semantics);
       assert(NumBits % 8 == 0);
-      CharUnits NumBytes = CharUnits::fromQuantity(NumBits / 8);
-      if (NumBytes != SizeOf)
-        SizeOf = NumBytes;
-    }
-
-    SmallVector<uint8_t, 8> Bytes,
-        Mask = BitCastBuffer::MaskAllSet(SizeOf.getQuantity());
-    Bytes.resize_for_overwrite(SizeOf.getQuantity());
-    if (!Buffer.readObject(Offset, SizeOf, Bytes, Mask)) {
-      // If this is std::byte or unsigned char, then its okay to store an
-      // indeterminate value.
-      bool IsStdByte = EnumSugar && EnumSugar->isStdByteType();
-      bool IsUChar =
-          !EnumSugar && (T->isSpecificBuiltinType(BuiltinType::UChar) ||
-                         T->isSpecificBuiltinType(BuiltinType::Char_U));
-      if (!IsStdByte && !IsUChar) {
+      if (NumBits != SizeOf)
+        SizeOf = NumBits;
+    }
+
+    uint64_t BitAddr = Info.Ctx.toBits(Offset);
+    APSInt Val(SizeOf, true);
+    if (!Buffer.readObject(BitAddr, Val)) {
+      if (!canStoreIndeterminate(T, EnumSugar)) {
         QualType DisplayType(EnumSugar ? (const Type *)EnumSugar : T, 0);
-        return badBits(DisplayType, Offset, Mask);
+        return badBits(DisplayType, {BitAddr, Val.getBitWidth()});
       }
 
       return APValue::IndeterminateValue();
     }
 
-    APSInt Val(SizeOf.getQuantity() * Info.Ctx.getCharWidth(), true);
-    llvm::LoadIntFromMemory(Val, &*Bytes.begin(), Bytes.size());
-
     if (T->isIntegralOrEnumerationType()) {
       Val.setIsSigned(T->isSignedIntegerOrEnumerationType());
 
-      unsigned IntWidth = Info.Ctx.getIntWidth(QualType(T, 0));
-      if (IntWidth != Val.getBitWidth()) {
-        APSInt Truncated = Val.trunc(IntWidth);
-        if (Truncated.extend(Val.getBitWidth()) != Val)
+      if (T->isBooleanType()) {
+        // booleans are special in that they have natural padding. However,
+        // rather than treating the padding bits as such, we instead choose to
+        // see them more like "tag" bits that are architecturally required to be
+        // zeroed, i.e. invoking the "no value of type `To` corresponding to the
+        // representation" undefined behavior clause, and therefore refusing to
+        // produce a constant value.
+        //
+        // We do this because on write, we'd like to zero-extend a `bool` out to
+        // 8 bits so that it's possible to `bit_cast<uint8_t>(false)` without
+        // additional ceremony. However, that means that if we permit any
+        // non-zero bit patterns to be cast _to_ a bool here, we'd permit a
+        // construct like the following:
+        // ```c++
+        // bit_cast<uint8_t>(bit_cast<bool>('\x02'))
+        // ```
+        // to produce a constant `0x0` (because we'll zero-extend the LSB).
+        //
+        // Note that this is different behavior than we'll want for _BitInt(N)
+        // types, where we have no desire for a bit cast from a `_BitInt(3)` to
+        // produce a constant value for the other bits.
+        if (Val.getActiveBits() > 1)
           return unrepresentableValue(QualType(T, 0), Val);
-        Val = Truncated;
+
+        Val = Val.trunc(1);
       }
 
       return APValue(Val);
@@ -7554,76 +7722,79 @@ class BufferToAPValueConverter {
       if (!FD->isBitField())
         continue;
 
+      // matches the existing behavior
+      if (FD->getType()->isBitIntType())
+        return unsupportedType(FD->getType());
+
       // unnamed bit fields are purely padding
       if (FD->isUnnamedBitfield())
         continue;
 
-      uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx);
-      CharUnits BufOffset = Offset;
-      uint64_t BitOffset = FieldOffsetBits;
-
-      unsigned int BitWidth = FD->getBitWidthValue(Info.Ctx);
-
-      CharUnits TypeWidth = Info.Ctx.getTypeSizeInChars(FD->getType());
-      uint64_t TypeWidthBits = Info.Ctx.toBits(TypeWidth);
-      if (BitWidth > TypeWidthBits) {
-        // e.g. `unsigned uint8_t c : 12`
-        // we truncate to CHAR_BIT * sizeof(T)
-        // (the extra bits are padding)
-        BitWidth = TypeWidthBits;
-      }
-      if (FieldOffsetBits >= TypeWidthBits) {
-        // e.g. `uint32_t : 33; uint32_t i : 12`
-        // or `uint16_t : 16; unsigned uint16_t i : 12`
-        BufOffset =
-            BufOffset + CharUnits::fromQuantity(BitOffset / TypeWidthBits) *
-                            TypeWidth.getQuantity();
-        BitOffset %= TypeWidthBits;
-      }
-
-      if (Info.Ctx.getTargetInfo().isBigEndian()) {
-        // big endian bits count from MSB to LSB
-        // so a bit-field of width 16 and size 12 will occupy bits [0-11] on a
-        // little endian machine, but [3-15] on a big endian machine
-        BitOffset = TypeWidthBits - (BitOffset + BitWidth);
+      const auto BF = BitFieldInfo::MakeInfo(Info.Ctx, FD, Layout);
+      const bool isUnsigned =
+          FD->getType()->isUnsignedIntegerOrEnumerationType();
+      APSInt Val;
+      bool ReadOK;
+      const unsigned BitAddr =
+          Info.Ctx.toBits(Offset + BF.StorageOffset) + BF.Offset;
+      if (!Buffer.IsNativeEndian && BF.StorageSize > 8) {
+        Val = APSInt(BF.StorageSize, true);
+        const APInt Mask =
+            APInt::getBitsSet(BF.StorageSize, BF.Offset, BF.Offset + BF.Width);
+
+        ReadOK = Buffer.readMasked(Info.Ctx.toBits(Offset + BF.StorageOffset),
+                                   Val, Mask);
+
+        Val >>= BF.Offset;
+        Val = Val.trunc(BF.Width);
+        Val.setIsUnsigned(isUnsigned);
+      } else {
+        Val = APSInt(BF.Width, isUnsigned);
+        ReadOK = Buffer.readObject(BitAddr, Val);
       }
 
-      assert(TypeWidth >= Info.Ctx.toCharUnitsFromBits(BitWidth));
-
-      llvm::SmallBitVector MaskBits(Info.Ctx.toBits(TypeWidth));
-      MaskBits.set(BitOffset, BitOffset + BitWidth);
-      uintptr_t Store;
-      ArrayRef<uintptr_t> BitRef = MaskBits.getData(Store);
-      SmallVector<uint8_t, 8> Mask(BitRef.size() * sizeof(uintptr_t));
-      std::memcpy(Mask.data(), BitRef.data(), Mask.size());
-      Mask.truncate(TypeWidth.getQuantity());
-
-      SmallVector<uint8_t, 8> Bytes(TypeWidth.getQuantity());
-      if (!Buffer.readObject(BufOffset, TypeWidth, Bytes, Mask)) {
+      if (!ReadOK) {
         const Type *T = FD->getType().getCanonicalType().getTypePtr();
         const EnumType *EnumSugar = dyn_cast<EnumType>(T);
-        // If this is std::byte or unsigned char, then its okay to store an
-        // indeterminate value.
-        bool IsStdByte = EnumSugar && EnumSugar->isStdByteType();
-        bool IsUChar =
-            !EnumSugar && (T->isSpecificBuiltinType(BuiltinType::UChar) ||
-                           T->isSpecificBuiltinType(BuiltinType::Char_U));
-        if (!IsStdByte && !IsUChar) {
+        if (!canStoreIndeterminate(T, EnumSugar)) {
           QualType DisplayType(EnumSugar ? (const Type *)EnumSugar : T, 0);
-          return badBits(DisplayType, BufOffset, Mask);
+          return badBits(DisplayType, {BitAddr, BF.Width});
         }
+
         ResultVal.getStructField(FieldIdx) = APValue::IndeterminateValue();
-      } else {
-        APSInt Val(Info.Ctx.toBits(TypeWidth), true);
-        llvm::LoadIntFromMemory(Val, &*Bytes.begin(), TypeWidth.getQuantity());
+        continue;
+      }
 
-        Val >>= BitOffset;
-        Val = Val.trunc(BitWidth);
-        Val.setIsSigned(FD->getType()->isSignedIntegerOrEnumerationType());
-        Val = Val.extend(Info.Ctx.toBits(TypeWidth));
+      if (FD->getType()->isBooleanType()) {
+        // booleans are special in that they have natural padding. However,
+        // rather than treating the padding bits as such, we instead choose to
+        // see them more like "tag" bits that are architecturally required to be
+        // zeroed, i.e. invoking the "no value of type `To` corresponding to the
+        // representation" undefined behavior clause, and therefore refusing to
+        // produce a constant value.
+        //
+        // We do this because on write, we'd like to zero-extend a `bool` out to
+        // 8 bits so that it's possible to `bit_cast<uint8_t>(false)` without
+        // additional ceremony. However, that means that if we permit any
+        // non-zero bit patterns to be cast _to_ a bool here, we'd permit a
+        // construct like the following:
+        // ```c++
+        // bit_cast<uint8_t>(bit_cast<bool>('\x02'))
+        // ```
+        // to produce a constant `0x0` (because we'll zero-extend the LSB).
+        //
+        // Note that this is different behavior than we'll want for _BitInt(N)
+        // types, where we have no desire for a bit cast from a `_BitInt(3)` to
+        // produce a constant value for the other bits.
+        if (Val.getActiveBits() > 1)
+          return unrepresentableValue(FD->getType(), Val);
 
-        ResultVal.getStructField(FieldIdx) = APValue(Val);
+        Val = Val.trunc(1);
+      } else {
+        Val = Val.extend(BF.StorageSize);
       }
+
+      ResultVal.getStructField(FieldIdx) = APValue(Val);
     }
 
     return ResultVal;
@@ -7688,28 +7859,22 @@ class BufferToAPValueConverter {
     Elts.reserve(NElts);
     if (VTy->isExtVectorBoolType()) {
       // Special handling for OpenCL bool vectors:
-      // Since these vectors are stored as packed bits, but we can't read
-      // individual bits from the BitCastBuffer, we'll buffer all of the
-      // elements together into an appropriately sized APInt and write them all
-      // out at once. Because we don't accept vectors where NElts * EltSize
-      // isn't a multiple of the char size, there will be no padding space, so
-      // we don't have to worry about reading any padding data which didn't
-      // actually need to be accessed.
+      // Since these vectors are stored in memory as packed bits, but the
+      // constexpr interpreter wants to store them as individual 1-bit-wide
+      // APInts, so we unpack them here. Because we don't accept vectors where
+      // NElts * EltSize isn't a multiple of the char size, there will be no
+      // padding space, so we don't have to worry about reading any padding data
+      // which didn't actually need to be accessed.
       bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
 
-      size_t Width = NElts / 8;
-      SmallVector<uint8_t, 8> Bytes, Mask = BitCastBuffer::MaskAllSet(Width);
-      Bytes.resize_for_overwrite(Width);
-      if (!Buffer.readObject(Offset, CharUnits::fromQuantity(Width), Bytes,
-                             Mask))
-        return std::nullopt;
-
-      APSInt SValInt(NElts, true);
-      llvm::LoadIntFromMemory(SValInt, &*Bytes.begin(), Bytes.size());
+      const unsigned BitAddr = Info.Ctx.toBits(Offset);
+      APSInt Val(NElts, true);
+      if (!Buffer.readObject(BitAddr, Val))
+        return badBits(QualType(VTy, 0), {BitAddr, Val.getBitWidth()});
 
       for (unsigned I = 0; I < NElts; ++I) {
         llvm::APInt Elt =
-            SValInt.extractBits(1, (BigEndian ? NElts - I - 1 : I) * EltSize);
+            Val.extractBits(1, (BigEndian ? NElts - I - 1 : I) * EltSize);
         Elts.emplace_back(
             APSInt(std::move(Elt), !EltTy->isSignedIntegerType()));
       }
diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast-bitint.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast-bitint.cpp
new file mode 100644
index 00000000000000..3acf82dc0fb4a6
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast-bitint.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only -triple x86_64-apple-macosx10.14.0 %s
+// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only -triple x86_64-apple-macosx10.14.0 %s -fno-signed-char
+// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu %s
+
+// This is separate from constexpr-builtin-bit-cast.cpp because clangd17 seems to behave
+// poorly around __BitInt(N) types, and this isolates that unfortunate behavior to one file
+//
+// hopefully a future clangd will not crash or lose track of its syntax highlighting, at which
+// point the "bit_precise" namespace ought to be merged back into *bit-cast.cpp.
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#  define LITTLE_END 1
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#  define LITTLE_END 0
+#else
+#  error "huh?"
+#endif
+
+using uint8_t = unsigned char;
+
+template <class To, class From>
+constexpr To bit_cast(const From &from) {
+  static_assert(sizeof(To) == sizeof(From));
+  return __builtin_bit_cast(To, from);
+}
+
+namespace bit_precise {
+// ok so it's a little bit of a lie to say we don't support _BitInt in any casts; we do in fact
+// support casting _from_ a _BitInt(N), at least some of the time
+static_assert(bit_cast<uint8_t, _BitInt(8)>(0xff) == 0xff);
+template <int N> struct bytes { uint8_t b[N]; };
+static_assert(bit_cast<bytes<2>, _BitInt(12)>(0xff).b[(LITTLE_END ? 0 : /* fixme */ 0)] == 0xff);
+static_assert(bit_cast<bytes<4>, _BitInt(24)>(0xff).b[(LITTLE_END ? 0 : /* fixme */ 2)] == 0xff);
+
+enum byte : unsigned char {}; // not std::byte
+
+constexpr _BitInt(7) z = 0x7f;
+constexpr auto bad_cast = __builtin_bit_cast(byte, z); // expected-error {{constant expression}}
+// expected-note at -1 {{'bit_precise::byte' is invalid}}
+// expected-note at -2 {{byte [0]}}
+
+#if __clang_major__ > 17
+// This is #ifdef'd off to stop clangd from crashing every time I open this file in my editor
+// fixme? this crashes clang17 and before
+constexpr auto unsupported_cast = __builtin_bit_cast(uint8_t, z); // expected-error {{constant expression}}
+// expected-note at -1 {{subobject of type 'const uint8_t' (aka 'const unsigned char') is not initialized}}
+#endif
+
+// expected-note at +1 {{constexpr bit cast involving type '_BitInt(8)' is not yet supported}}
+constexpr auto _n = __builtin_bit_cast(_BitInt(8), (uint8_t)0xff); // expected-error {{constant expression}}
+
+// expected-note at +1 {{constexpr bit cast involving type '_BitInt(7)' is not yet supported}}
+constexpr auto _m = __builtin_bit_cast(_BitInt(7), (uint8_t)0xff); // expected-error {{constant expression}}
+
+// fixme: support _BitInt
+// struct bitints {
+//   _BitInt(2) x;
+//   signed _BitInt(4) y;
+// };
+//
+// constexpr auto bi = bit_cast<bitints, uint16_t>(0xff'ff);
+// static_assert(bi.x == 0x3);
+// static_assert(bi.y == -8);
+
+// fixme?: the syntax highlighting here is a little off (`signed` and `constexpr` both lose their "keyword" coloring)
+struct BF {
+  _BitInt(2) x : 2;
+  signed _BitInt(3) y : 2;
+    // expected-warning at +1 {{exceeds the width of its type}}
+  _BitInt(3) z : 4; // "oversized" bit field
+};
+
+// expected-note at +1 {{constexpr bit cast involving type '_BitInt(2)' is not yet supported}}
+constexpr auto bf = __builtin_bit_cast(BF, (uint8_t)0xff); // expected-error {{must be initialized by a constant expression}}
+
+// fixme: support _BitInt
+// constexpr auto bf = bit_cast<BF, uint8_t>(0xff);
+// static_assert(bf.x == 0x3);
+// static_assert(bf.y == -4); // or +4 ?
+// static_assert(bf.z == 0x7);
+
+} // namespace bit_precise
diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
index 29d046e2def3db..1baff5f56941a0 100644
--- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
+++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
@@ -10,16 +10,14 @@
 #  error "huh?"
 #endif
 
-template <class T, class V> struct is_same {
-  static constexpr bool value = false;
-};
-template <class T> struct is_same<T, T> {
-  static constexpr bool value = true;
-};
-
 static_assert(sizeof(int) == 4);
 static_assert(sizeof(long long) == 8);
 
+using uint8_t = unsigned char;
+using uint16_t = unsigned __INT16_TYPE__;
+using uint32_t = unsigned __INT32_TYPE__;
+using uint64_t = unsigned __INT64_TYPE__;
+
 template <class To, class From>
 constexpr To bit_cast(const From &from) {
   static_assert(sizeof(To) == sizeof(From));
@@ -134,10 +132,6 @@ void test_partially_initialized() {
   static_assert(fine.x == 1 && fine.y == 5);
 }
 
-namespace std {
-enum byte : unsigned char {};
-} // namespace std
-
 template <int N, typename T = unsigned char, int Pad = 0>
 struct bits {
   T : Pad;
@@ -154,7 +148,6 @@ constexpr bool operator==(const struct bits<N, T, P>& lhs, const struct bits<N,
 }
 
 void test_bitfields() {
-  using uint16_t = unsigned __INT16_TYPE__;
   {
     struct Q {
       // cf. CGBitFieldInfo
@@ -167,15 +160,47 @@ void test_bitfields() {
       // the LSB here.
       uint16_t q : 15;
     };
-    constexpr unsigned char bits[2] = {0xf3, 0xef};
-    constexpr Q q = bit_cast<Q>(bits);
-    static_assert(bit_cast<uint16_t>(bits) == (LITTLE_END
+    constexpr unsigned char bytes[2] = {0xf3, 0xef};
+    constexpr Q q = bit_cast<Q>(bytes);
+    static_assert(q.q == (LITTLE_END ? 0x6ff3 : (0xf3ee >> 1)));
+    static_assert(bit_cast<uint16_t>(bytes) == (LITTLE_END
                                                     ? 0xeff3
                                                     : 0xf3ef),
       "bit-field casting ought to match \"whole\"-field casting");
-    static_assert(q.q == (LITTLE_END ? 0x6ff3 : (0xf3ee >> 1)));
+
+    // similarly, "skip 1 bit of padding" followed by "read 9 bits"
+    // will truncate (shift out) either the LSB (little endian) or MSB (big endian)
+    static_assert((0xf3ee >> 1) == 0x79f7);
+    static_assert(0x01cf == (0xf3ef >> (16-9-1) & 0x1ff));
+    static_assert(bit_cast<bits<9, uint16_t, 1>>(q) == (LITTLE_END
+                                                              ? (0xeff3 >> 1) & 0x1ff
+                                                              : (0xf3ef >> (16-9-1)) & 0x1ff));
+
+    #if LITTLE_END == 0
+    // expected-note at +5 {{bit [0]}}
+    #else
+    // expected-note at +3 {{bit [15]}}
+    #endif
+    // expected-error at +1 {{constant expression}}
+    constexpr auto _i = __builtin_bit_cast(bits<15, uint16_t, 1>, q);
+    // expected-note at -1 {{indeterminate}}
   }
 
+  static_assert(round_trip<bits<8>, uint8_t>(0x8c) == 0x8c);
+  static_assert(round_trip<bits<32, uint32_t>, uint32_t>(0x8c0f'fee5) == 0x8c0ffee5);
+
+  #define MSG "endianness matters even with <=8-bit fields"
+  static_assert(bit_cast<bits<8, uint16_t, 7>, uint16_t>(0xcafe) == (LITTLE_END
+                                                                          ? 0x95
+                                                                          : 0x7f), MSG);
+  static_assert(bit_cast<bits<4, uint16_t, 10>, uint16_t>(0xcafe) == (LITTLE_END
+                                                                          ? 0x2
+                                                                          : 0xf), MSG);
+  static_assert(bit_cast<bits<4, uint32_t, 19>, uint32_t>(0xa1cafe) == (LITTLE_END
+                                                                          ? 0x4
+                                                                          : 0x5), MSG);
+  #undef MSG
+
   struct S {
     // little endian:
     //    MSB .... .... LSB
@@ -209,7 +234,7 @@ void test_bitfields() {
       return r == other.r;
     }
   };
-  using T = bits<31, signed long long>;
+  using T = bits<31, signed __INT64_TYPE__>;
 
   constexpr R r{0x4ac0ffee};
   constexpr T t = bit_cast<T>(r);
@@ -220,16 +245,16 @@ void test_bitfields() {
 
   struct U {
     // expected-warning at +1 {{exceeds the width of its type}}
-    unsigned __INT32_TYPE__ trunc : 33;
-    unsigned __INT32_TYPE__ u : 31;
+    uint32_t trunc : 33;
+    uint32_t u : 31;
     constexpr bool operator==(U const &other) const {
       return trunc == other.trunc && u == other.u;
     }
   };
   struct V {
-    unsigned __INT64_TYPE__ notrunc : 32;
-    unsigned __INT64_TYPE__ : 1;
-    unsigned __INT64_TYPE__ v : 31;
+    uint64_t notrunc : 32;
+    uint64_t : 1;
+    uint64_t v : 31;
     constexpr bool operator==(V const &other) const {
       return notrunc == other.notrunc && v == other.v;
     }
@@ -244,7 +269,7 @@ void test_bitfields() {
     static_assert(round_trip<V>(u) == u, MSG);
     static_assert(round_trip<U>(v) == v, MSG);
 
-    constexpr auto w = bit_cast<bits<12, unsigned long, 33>>(u);
+    constexpr auto w = bit_cast<bits<12, uint64_t, 33>>(u);
     static_assert(w == (LITTLE_END
                         ? 0x4ac0ffee & 0xFFF
                         : (0x4ac0ffee & (0xFFF << (31 - 12))) >> (31-12)
@@ -308,14 +333,20 @@ void test_bitfields() {
   }
 }
 
+namespace std {
+enum byte : unsigned char {};
+} // namespace std
+
+using uint8_t = unsigned char;
+
 template<int N>
-struct bytebuf {
-  using size_t = int;
-  unsigned char bytes[N];
+struct bytes {
+  using size_t = unsigned int;
+  unsigned char d[N];
 
   constexpr unsigned char &operator[](size_t index) {
     if (index < N)
-      return bytes[index];
+      return d[index];
   }
 };
 
@@ -342,15 +373,14 @@ void bitfield_indeterminate() {
     // expected-note at +1 {{subobject declared here}}
     unsigned char mem[sizeof(BF)];
   };
-  // expected-error at +3 {{initialized by a constant expression}}
-  // zzexpected-note at +2 {{bad bits}}
+  // expected-error at +2 {{initialized by a constant expression}}
   // expected-note at +1 {{not initialized}}
   constexpr M m = bit_cast<M>(bf);
 
   constexpr auto f = []() constexpr {
     // bits<24, unsigned int, LITTLE_END ? 0 : 8> B = {0xc0ffee};
     constexpr struct { unsigned short b1; unsigned char b0;  } B = {0xc0ff, 0xee};
-    return bit_cast<bytebuf<4>>(B);
+    return bit_cast<bytes<4>>(B);
   };
 
   static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee);
@@ -664,13 +694,109 @@ namespace test_bool {
 // expected-note at +1 {{cannot be represented in type 'bool'}}
 constexpr bool test_bad_bool = __builtin_bit_cast(bool, 'A'); // expected-error {{must be initialized by a constant expression}}
 
-static_assert(check_round_trip<signed char>(true));
-static_assert(check_round_trip<unsigned char>(false));
-static_assert(check_round_trip<bool>(false));
+static_assert(round_trip<signed char>(true));
+static_assert(round_trip<unsigned char>(true));
+static_assert(round_trip<bool>(false) == false);
+
+static_assert(static_cast<uint8_t>(false) == 0x0);
+static_assert(bit_cast<uint8_t>(false) == 0x0);
+static_assert(static_cast<uint8_t>(true) == 0x1);
+static_assert(bit_cast<uint8_t>(true) == 0x1);
+
+static_assert(round_trip<bool, uint8_t>(0x01) == 0x1);
+static_assert(round_trip<bool, uint8_t>(0x00) == 0x0);
+// expected-note at +2 {{cannot be represented in type 'bool'}}
+// expected-error at +1 {{constant expression}}
+constexpr auto test_bad_bool2 = __builtin_bit_cast(bool, (uint8_t)0x02);
 
-static_assert(check_round_trip<bool>((char)0));
-static_assert(check_round_trip<bool>((char)1));
+#if LITTLE_END == 1
+constexpr auto okbits = bit_cast<bits<1>>(true);
+#else
+constexpr auto okbits = bit_cast<bits<1, uint8_t, 7>>(true);
+#endif
+static_assert(okbits == 0x1);
+// expected-note at +3 {{bit [1-7]}}
+// expected-note at +2 {{or 'std::byte'; 'bool' is invalid}}
+// expected-error at +1 {{constant expression}}
+constexpr auto _weird_bool = __builtin_bit_cast(bool, okbits);
+
+// these don't work because we're trying to read the whole 8 bits to ensure
+// the value is representable, as above
+// static_assert(round_trip<bool, bits<1>>({0x1}) == 0x1);
+// static_assert(round_trip<bool, bits<1>>({0x0}) == 0x0);
+
+// these work because we're only reading 1 bit of "bool" to ensure
+// "representability"
+static_assert(round_trip<bits<1, bool>, bits<1>>({0x1}) == 0x1);
+static_assert(round_trip<bits<1, bool>, bits<1>>({0x0}) == 0x0);
+
+template <const int P, class B = bool>
+constexpr bool extract_bit(unsigned char v) {
+  return static_cast<bool>(bit_cast<bits<1, B, P>>(v).bits);
 }
+// 0xA5 is a palindrome, so endianness doesn't matter
+// (counting LSB->MSB is the same as MSB->LSB)
+static_assert(extract_bit<0>(0xA5) == 0x1);
+static_assert(extract_bit<2>(0xA5) == 0x1);
+static_assert(extract_bit<5>(0xA5) == 0x1);
+static_assert(extract_bit<7>(0xA5) == 0x1);
+
+static_assert(extract_bit<1>(0xA5) == 0x0);
+static_assert(extract_bit<3>(0xA5) == 0x0);
+static_assert(extract_bit<4>(0xA5) == 0x0);
+static_assert(extract_bit<6>(0xA5) == 0x0);
+
+enum byte : unsigned char {}; // not std::byte or unsigned char
+
+static_assert(extract_bit<5, byte>('\xa5') == 0x1);
+
+struct pad {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  bool : 5; // push field down to the LSB
+#endif
+  bool b : 3;
+};
+
+static_assert(bit_cast<pad, uint8_t>(0b001).b == true);
+static_assert(bit_cast<pad, uint8_t>(0b000).b == false);
+
+// expected-note at +1 {{cannot be represented in type 'bool'}}
+constexpr auto _bad_bool3 = __builtin_bit_cast(pad, (uint8_t)0b110); // expected-error {{must be initialized by a constant expression}}
+
+struct S {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  byte : 7;
+#endif
+  byte z : 1;
+};
+
+constexpr auto s = bit_cast<S>(pad{1});
+static_assert(s.z == 0x1);
+
+// expected-note at +3 {{bit [1-2]}}
+// expected-note at +2 {{or 'std::byte'; 'bool' is invalid}}
+// expected-error at +1 {{constant expression}}
+constexpr auto _bad_bool4 = __builtin_bit_cast(pad, s);
+
+
+// `bool` includes padding bits, but *which* single bit stores the
+// value is under-specified. These tests not-so-secretly assert that
+// it's in fact the LSB that the compiler "sees" as the value.
+struct pack {
+  bool a : 1;
+  bool b : 1;
+
+  // 1 bit of value, 5 bits of padding
+  bool c : 6;
+};
+
+constexpr auto packed = bit_cast<pack, uint8_t>(LITTLE_END ? 0x07 : 0xc1);
+static_assert(packed.a && packed.b && packed.c);
+
+static_assert(bit_cast<bits<2, uint8_t, 0>>(packed) == 0x3);
+static_assert(bit_cast<bits<1, uint8_t, LITTLE_END ? 2 : 7>>(packed) == 0x1);
+
+} // namespace test_bool
 
 namespace test_long_double {
 #ifdef __x86_64
@@ -680,9 +806,7 @@ constexpr __int128_t test_cast_to_int128 = __builtin_bit_cast(__int128_t, (long
 
 constexpr long double ld = 3.1425926539;
 
-struct bytes {
-  unsigned char d[16];
-};
+using bytes = bytes<16>;
 
 static_assert(check_round_trip<bytes>(ld));
 
@@ -696,10 +820,10 @@ constexpr bool f(bool read_uninit) {
   };
 
   for (int i = 0; i != 10; ++i)
-    if (ld_bytes[i] != b.d[i])
+    if (ld_bytes[i] != b[i])
       return false;
 
-  if (read_uninit && b.d[10]) // expected-note{{read of uninitialized object is not allowed in a constant expression}}
+  if (read_uninit && b[10]) // expected-note{{read of uninitialized object is not allowed in a constant expression}}
     return false;
 
   return true;
@@ -722,7 +846,7 @@ static_assert(bit_cast<long double>(ld539) == fivehundredandthirtynine);
 #else
 static_assert(round_trip<__int128_t>(34.0L));
 #endif
-}
+} // namespace test_long_double
 
 namespace test_vector {
 
@@ -746,9 +870,9 @@ typedef bool bool32 __attribute__((ext_vector_type(32)));
 typedef bool bool128 __attribute__((ext_vector_type(128)));
 
 static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA));
-static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0)));
-static_assert(check_round_trip<bool8>(static_cast<unsigned char>(1)));
-static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0x55)));
+static_assert(round_trip<bool8>('\x00') == 0);
+static_assert(round_trip<bool8>('\x01') == 0x1);
+static_assert(round_trip<bool8>('\x55') == 0x55);
 
 static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4));
 
@@ -766,4 +890,21 @@ constexpr bool9 bad_short_to_bool9 = __builtin_bit_cast(bool9, static_cast<unsig
 // expected-note at +1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(17)))' (vector of 17 'bool' values) is not allowed in a constant expression; element size 1 * element count 17 is not a multiple of the byte size 8}}
 constexpr bool17 bad_int_to_bool17 = __builtin_bit_cast(bool17, 0x0001CAFEU);
 
-}
+struct pad {
+  unsigned short s;
+  unsigned char c;
+};
+constexpr auto p = bit_cast<pad>(bit_cast<bool32>(0xa1c0ffee));
+static_assert(p.s == (LITTLE_END ? 0xffee : 0xa1c0));
+static_assert(p.c == (LITTLE_END ? 0xc0 : 0xff));
+
+#if LITTLE_END == 1
+// expected-note at +5 {{for byte [3]}}
+#else
+// expected-note at +3 {{for byte [0]}}
+#endif
+// expected-note at +1 {{indeterminate value}}
+constexpr auto _bad_p = __builtin_bit_cast(bool32, p); // expected-error {{initialized by a constant expression}}
+
+
+} // namespace test_vector