[clang] [Clang][C++20] Implement constexpr std::bit_cast for bit-fields (PR #74775)

Thu Jan 4 18:45:58 PST 2024

================
@@ -6901,51 +6916,260 @@ bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) {
 //===----------------------------------------------------------------------===//
 namespace {
 
-class BitCastBuffer {
-  // FIXME: We're going to need bit-level granularity when we support
-  // bit-fields.
-  // FIXME: Its possible under the C++ standard for 'char' to not be 8 bits, but
-  // we don't support a host or target where that is the case. Still, we should
-  // use a more generic type in case we ever do.
-  SmallVector<std::optional<unsigned char>, 32> Bytes;
+struct BitSlice : public std::slice {
+  BitSlice(size_t Offset, size_t Num) : std::slice(Offset, Num, 1){};
 
-  static_assert(std::numeric_limits<unsigned char>::digits >= 8,
-                "Need at least 8 bit unsigned char");
+  inline size_t end() const { return start() + size(); }
+};
 
-  bool TargetIsLittleEndian;
+struct BitFieldInfo {
+  /// The offset (in bits) within the appropriate storage type.
+  const unsigned Offset : 16;
+
+  /// The size of the value held by the bit-field, in bits.
+  const unsigned Width : 15;
+
+  /// Whether the bit-field is signed.
+  const unsigned IsSigned : 1;
+
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  const unsigned StorageSize;
+
+  /// The offset of the bitfield storage from the start of the struct.
+  const CharUnits StorageOffset;
+
+  static BitFieldInfo MakeInfo(const ASTContext &Ctx, const FieldDecl *FD,
+                               const ASTRecordLayout &Layout) {
+    const unsigned StorageSize = Ctx.getTypeSize(FD->getType()),
+                   FieldOffsetBits = Layout.getFieldOffset(FD->getFieldIndex());
+
+    unsigned Width = FD->getBitWidthValue(Ctx);
+    if (Width > StorageSize) {
+      // e.g. `unsigned uint8_t c : 12`
+      // we truncate to CHAR_BIT * sizeof(T)
+      // (the extra bits are padding)
+      Width = StorageSize;
+    }
+    unsigned Offset = FieldOffsetBits % StorageSize;
+    if (Ctx.getTargetInfo().isBigEndian()) {
+      // big endian bits count from MSB to LSB
+      // so a bit-field of width 16 and size 12 will
+      // occupy bits [0-11] on a little endian machine,
+      // but [3-15] on a big endian machine
+      Offset = StorageSize - (Offset + Width);
+    }
+    return {
+        Offset,
+        Width,
+        FD->getType()->isSignedIntegerOrEnumerationType(),
+        StorageSize,
+        Ctx.toCharUnitsFromBits((FieldOffsetBits / StorageSize) * StorageSize),
+    };
+  }
+};
 
-public:
-  BitCastBuffer(CharUnits Width, bool TargetIsLittleEndian)
-      : Bytes(Width.getQuantity()),
-        TargetIsLittleEndian(TargetIsLittleEndian) {}
-
-  [[nodiscard]] bool readObject(CharUnits Offset, CharUnits Width,
-                                SmallVectorImpl<unsigned char> &Output) const {
-    for (CharUnits I = Offset, E = Offset + Width; I != E; ++I) {
-      // If a byte of an integer is uninitialized, then the whole integer is
-      // uninitialized.
-      if (!Bytes[I.getQuantity()])
+struct BitCastBuffer {
+  // The number of bits in a `char`, needed to handle endianness (which is
+  // assumed to be exclusively big or little) for values with more bits than
+  // this number.
+  //
+  // No current platforms support varying this size.
+  static const uint64_t CharBit = 8;
+
+  const uint64_t BitWidth;
+  const bool IsNativeEndian;
+
+  APInt Data;
+  APInt Invalid; // Indeterminate bits
+
+  BitCastBuffer(uint64_t BitWidth, bool TargetIsLittleEndian, uint64_t CharBit)
+      : BitWidth(BitWidth),
+        IsNativeEndian(llvm::sys::IsLittleEndianHost == TargetIsLittleEndian),
+        Data(BitWidth, 0), Invalid(BitWidth, ~0, /* extend "sign" bit */ true) {
+    assert(Invalid.countl_one() == BitWidth);
+    assert(CharBit == BitCastBuffer::CharBit);
+  }
+
+  [[nodiscard]] bool readMasked(const uint64_t Offset, APInt &Output,
+                                const APInt &Mask) const {
+    assert(Output.getBitWidth() == Mask.getBitWidth());
+    const BitSlice Which = {Offset, Output.getBitWidth()};
+
+    const auto read = [&](const APInt &Mask) {
+      if ((getBits(Invalid, Which) & Mask) != 0)
         return false;
-      Output.push_back(*Bytes[I.getQuantity()]);
+
+      Output = (Output & ~Mask) | (getBits(Data, Which) & Mask);
+      return true;
+    };
+
+    if (!IsNativeEndian && Output.getBitWidth() > CharBit) {
+      bool OK = read(Mask.byteSwap());
+      Output = Output.byteSwap();
+      return OK;
     }
-    if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
-      std::reverse(Output.begin(), Output.end());
+
+    return read(Mask);
+  }
+
+  [[nodiscard]] inline bool readObject(const uint64_t Offset,
+                                       APInt &Output) const {
+    return readObject({Offset, Output.getBitWidth()}, Output);
+  }
+
+  [[nodiscard]] bool readObject(const BitSlice &Which, APInt &Output) const {
+    assert(Output.getBitWidth() <= BitWidth);
+    assert(Which.size() <= Output.getBitWidth());
+    assert(Which.end() <= BitWidth);
+    assert((IsNativeEndian || withinByte(Which) ||
+            APInt::getBitsSet(BitWidth, Which.start(), Which.end())
+                .byteSwap()
+                .isShiftedMask()) &&
+           "use readMasked instead");
+
+    if (getBits(Invalid, Which) != 0)
+      return false;
+
+    copyBitsFrom(Output, {0, Which.size()}, Data, Which);
+
+    if (!IsNativeEndian && Output.getBitWidth() > CharBit)
+      Output = Output.byteSwap();
+
     return true;
   }
 
-  void writeObject(CharUnits Offset, SmallVectorImpl<unsigned char> &Input) {
-    if (llvm::sys::IsLittleEndianHost != TargetIsLittleEndian)
-      std::reverse(Input.begin(), Input.end());
+  void writeMasked(const uint64_t Offset, const APInt &Input,
+                   const APInt &Mask) {
+    assert(Input.getBitWidth() == Mask.getBitWidth());
+    const uint64_t BW = Input.getBitWidth();
+    const BitSlice Dest = {Offset, BW};
 
-    size_t Index = 0;
-    for (unsigned char Byte : Input) {
-      assert(!Bytes[Offset.getQuantity() + Index] && "overwriting a byte?");
-      Bytes[Offset.getQuantity() + Index] = Byte;
-      ++Index;
+    auto write = [&](const APInt &Input, const APInt &Mask) {
+      assert((~getBits(Invalid, Dest) & Mask) == 0 && "overwriting data?");
+      const APInt Val = (Input & Mask) | (getBits(Data, Dest) & ~Mask);
+      const APInt Written = getBits(Invalid, Dest) ^ Mask;
+
+      copyBitsFrom(Data, Dest, Val, {0, BW});
+      copyBitsFrom(Invalid, Dest, Written, {0, BW});
+    };
+
+    if (!IsNativeEndian && BW > CharBit) {
+      write(Input.byteSwap(), Mask.byteSwap());
+      return;
+    }
+    write(Input, Mask);
+  }
+
+  void writeObject(const uint64_t Offset, const APInt &Input) {
+    writeObject({Offset, Input.getBitWidth()}, Input, {0, Input.getBitWidth()});
+  }
+
+  void writeObject(const BitSlice &Dst, const APInt &Input,
+                   const BitSlice &Src) {
+    assert(Src.size() == Dst.size());
+    assert(Src.end() <= Input.getBitWidth());
+    assert(Dst.end() <= BitWidth);
+    assert(~getBits(Invalid, Dst) == 0 && "overwriting data?");
+    assert((IsNativeEndian || (withinByte(Src) && withinByte(Dst)) ||
+            [&] {
+              unsigned lo, len;
+              return Src.size() % 8 == 0 &&
+                     APInt::getBitsSet(Src.size(), Src.start(), Src.end())
+                         .byteSwap()
+                         .isShiftedMask(lo, len) &&
+                     lo == Src.start() && len == Src.size() &&
+                     Dst.start() % CharBit == 0 && Dst.size() % CharBit == 0;
+            }()) &&
+           "use writeMasked instead");
+
+    auto write = [&](const APInt &Input) {
+      copyBitsFrom(Data, Dst, Input, Src);
+      clearBits(Invalid, Dst);
+    };
+
+    if (!IsNativeEndian && Input.getBitWidth() > CharBit) {
+      write(Input.byteSwap());
+      return;
     }
+
+    write(Input);
+  }
+
+  // true iff the range described by Which from start (inclusive) to end
+  // (exclusive) refers to the same addressable byte, i.e.
+  //    [0, 0)     -> yes
+  //    [0, 3)     -> yes
+  //    [0, 8)     -> yes
+  //    [16, 24)   -> yes
+  //    [123, 123) -> yes
+  //    [7, 9)     -> no
+  inline static bool withinByte(const BitSlice &Which) {
+    // NB: Which.start() may equal Which.end(), and either may be zero, so
+    // care must be taken here to avoid underflow
+    return Which.size() == 0 ||
+           Which.start() / CharBit + 1 == (Which.end() + CharBit - 1) / CharBit;
+  }
+
+  inline static APInt getBits(const APInt &Int, const BitSlice &Which) {
+    // more lenient than extractBits (which asserts `start < BitWidth`)
+    // this permits e.g. zero-width reads "one past the last bit"
+    assert(Which.end() <= Int.getBitWidth());
+    if (Which.size() == 0) {
+      return APInt::getZeroWidth();
+    }
+    return Int.extractBits(Which.size(), Which.start());
+  }
+
+  // copyBitsFrom acts like `LHS[Dst] = RHS[Src];`, if `APInt`s supported
+  // slicing
+  inline static void copyBitsFrom(APInt &LHS, const BitSlice &Dst,
+                                  const APInt &RHS, const BitSlice &Src) {
----------------
sethp wrote:

This has roughly the right API shape to it, I feel, but it's a bit of a mismatch between this and the existing APInt `insertBits` method, especially since the latter contains this snippet:

```
  // General case - set/clear individual bits in dst based on src.
  // TODO - there is scope for optimization here, but at the moment this code
  // path is barely used so prefer readability over performance.
  for (unsigned i = 0; i != subBitWidth; ++i)
    setBitVal(bitPosition + i, subBits[i]);
```

We'd fairly often be falling through to that "general" implementation here, so I went ahead and improved matters some in https://github.com/sethp/llvm-project/pull/3 : but that's an time-domain-effect-only change, and not functionally required, so my plan is to wait for this PR to land before proposing those optimizations. I'm noting it here just in case you also happen to notice that little wrinkle.

https://github.com/llvm/llvm-project/pull/74775