[clang] [clang][bytecode] Check composite bitcasts for indeterminate bits (PR #118988)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Sat Dec 7 03:36:15 PST 2024


https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/118988

>From c45a6b422ff2ad9f60e7e39d335be2e3d3fe4465 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 6 Dec 2024 15:52:38 +0100
Subject: [PATCH] [clang][bytecode] Check composite bitcasts for indeterminate
 bits

---
 clang/lib/AST/ByteCode/BitcastBuffer.cpp      | 34 ++++++++++++++++---
 clang/lib/AST/ByteCode/BitcastBuffer.h        |  4 +++
 .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 33 ++++++++++++++----
 .../ByteCode/builtin-bit-cast-bitfields.cpp   | 31 ++++++++++++-----
 4 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
index 7f29c7c2db0147..fbd500fd8f5f4d 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.cpp
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
@@ -62,11 +62,7 @@ BitcastBuffer::copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth,
 }
 
 bool BitcastBuffer::allInitialized() const {
-  Bits Sum;
-  for (BitRange BR : InitializedBits)
-    Sum += BR.size();
-
-  return Sum == FinalBitSize;
+  return rangeInitialized(Bits::zero(), FinalBitSize);
 }
 
 void BitcastBuffer::markInitialized(Bits Offset, Bits Length) {
@@ -111,6 +107,34 @@ void BitcastBuffer::markInitialized(Bits Offset, Bits Length) {
 #endif
 }
 
+bool BitcastBuffer::rangeInitialized(Bits Offset, Bits Length) const {
+  if (Length.isZero())
+    return true;
+
+  BitRange Range(Offset, Offset + Length - Bits(1));
+  Bits Sum;
+  bool FoundStart = false;
+  for (BitRange BR : InitializedBits) {
+    if (FoundStart) {
+      if (BR.contains(Range.End)) {
+        Sum += (Range.End - BR.Start + Bits(1));
+        break;
+      }
+
+      // Else, BR is completely inside Range.
+      Sum += BR.size();
+    }
+    if (BR.contains(Range.Start)) {
+      Sum += (BR.End - Range.Start + Bits(1));
+      FoundStart = true;
+    }
+  }
+
+  // Note that Sum can be larger than Range, e.g. when Range is fully
+  // contained in one range.
+  return Sum >= Range.size();
+}
+
 #if 0
   template<typename T>
   static std::string hex(T t) {
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h
index 00fbdc9b85421d..2a0d8a0cd9a81f 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.h
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.h
@@ -55,6 +55,7 @@ struct Bytes {
   Bits toBits() const { return Bits(N * 8); }
 };
 
+/// A bit range. Both Start and End are inclusive.
 struct BitRange {
   Bits Start;
   Bits End;
@@ -62,6 +63,8 @@ struct BitRange {
   BitRange(Bits Start, Bits End) : Start(Start), End(End) {}
   Bits size() const { return End - Start + Bits(1); }
   bool operator<(BitRange Other) const { return Start.N < Other.Start.N; }
+
+  bool contains(Bits B) { return Start <= B && End >= B; }
 };
 
 /// Track what bits have been initialized to known values and which ones
@@ -85,6 +88,7 @@ struct BitcastBuffer {
   /// Marks the bits in the given range as initialized.
   /// FIXME: Can we do this automatically in pushData()?
   void markInitialized(Bits Start, Bits Length);
+  bool rangeInitialized(Bits Offset, Bits Length) const;
 
   /// Push \p BitWidth bits at \p BitOffset from \p In into the buffer.
   /// \p TargetEndianness is the endianness of the target we're compiling for.
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
index 4c25a3bb132fcf..e26410c0b35e86 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
@@ -248,12 +248,10 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
         if (BitWidth.isZero())
           return true;
 
-        if (!P.isInitialized()) {
-          assert(false && "Implement uninitialized value tracking");
-          return ReturnOnUninit;
-        }
+        // Bits will be left uninitialized and diagnosed when reading.
+        if (!P.isInitialized())
+          return true;
 
-        assert(P.isInitialized());
         if (T == PT_Ptr) {
           assert(P.getType()->isNullPtrType());
           // Clang treats nullptr_t has having NO bits in its value
@@ -262,6 +260,7 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
           return true;
         }
 
+        assert(P.isInitialized());
         auto Buff =
             std::make_unique<std::byte[]>(ObjectReprChars.getQuantity());
         // Work around floating point types that contain unused padding bytes.
@@ -355,10 +354,11 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC,
       ToPtr, S.getContext(), Buffer.size(),
       [&](const Pointer &P, PrimType T, Bits BitOffset,
           bool PackedBools) -> bool {
-        CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType());
+        QualType PtrType = P.getType();
+        CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(PtrType);
         Bits FullBitWidth = Bits(ASTCtx.toBits(ObjectReprChars));
         if (T == PT_Float) {
-          const auto &Semantics = ASTCtx.getFloatTypeSemantics(P.getType());
+          const auto &Semantics = ASTCtx.getFloatTypeSemantics(PtrType);
           Bits NumBits = Bits(llvm::APFloatBase::getSizeInBits(Semantics));
           assert(NumBits.isFullByte());
           assert(NumBits.getQuantity() <= FullBitWidth.getQuantity());
@@ -382,6 +382,25 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC,
         else
           BitWidth = FullBitWidth;
 
+        // If any of the bits are uninitialized, we need to abort unless the
+        // target type is std::byte or unsigned char.
+        bool Initialized = Buffer.rangeInitialized(BitOffset, BitWidth);
+        if (!Initialized) {
+          if (!PtrType->isStdByteType() &&
+              !PtrType->isSpecificBuiltinType(BuiltinType::UChar) &&
+              !PtrType->isSpecificBuiltinType(BuiltinType::Char_U)) {
+            const Expr *E = S.Current->getExpr(OpPC);
+            S.FFDiag(E, diag::note_constexpr_bit_cast_indet_dest)
+                << PtrType << S.getLangOpts().CharIsSigned
+                << E->getSourceRange();
+
+            return false;
+          }
+          llvm::errs() << "Not all initialized\n";
+          return true;
+        }
+        llvm::errs() << "All initialized.\n";
+
         auto Memory = Buffer.copyBits(BitOffset, BitWidth, FullBitWidth,
                                       TargetEndianness);
         if (llvm::sys::IsBigEndianHost)
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
index e5337a57bf0fe4..5aa4e256e46382 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
@@ -63,7 +63,7 @@ struct bytes {
 
   constexpr unsigned char operator[](size_t index) {
     if (index < N)
-      return d[index];
+      return d[index]; // expected-note {{read of uninitialized object}}
     return -1;
   }
 };
@@ -141,11 +141,11 @@ namespace BitFields {
                                                  // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'byte' is invalid}}
 
     struct M {
-      // ref-note at +1 {{subobject declared here}}
+      // expected-note at +1 {{subobject declared here}}
       unsigned char mem[sizeof(BF)];
     };
-    // ref-error at +2 {{initialized by a constant expression}}
-    // ref-note at +1 {{not initialized}}
+    // expected-error at +2 {{initialized by a constant expression}}
+    // expected-note at +1 {{not initialized}}
     constexpr M m = bit_cast<M>(bf);
 
     constexpr auto f = []() constexpr {
@@ -156,8 +156,8 @@ namespace BitFields {
 
     static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee);
     {
-      // ref-error at +2 {{initialized by a constant expression}}
-      // ref-note at +1 {{read of uninitialized object is not allowed in a constant expression}}
+      // expected-error at +2 {{initialized by a constant expression}}
+      // expected-note at +1 {{in call to}}
       constexpr auto _bad = f()[3];
     }
 
@@ -173,8 +173,8 @@ namespace BitFields {
     };
     static_assert(g().s0 + g().s1 + g().b0 + g().b1 == 0xc0 + 0xff + 0xe + 0xe);
     {
-      // ref-error at +2 {{initialized by a constant expression}}
-      // ref-note at +1 {{read of uninitialized object is not allowed in a constant expression}}
+      // expected-error at +2 {{initialized by a constant expression}}
+      // expected-note at +1 {{read of uninitialized object is not allowed in a constant expression}}
       constexpr auto _bad = g().b2;
     }
   }
@@ -457,4 +457,19 @@ namespace IndeterminateBits {
   };
   constexpr unsigned char B = __builtin_bit_cast(unsigned char, S2{3});
   static_assert(B == (LITTLE_END ? 3 : 192));
+
+
+
+  struct S3 {
+    unsigned a : 13;
+    unsigned   : 17;
+    unsigned b : 2;
+  };
+
+  struct D {
+    unsigned a;
+  };
+  constexpr D s = __builtin_bit_cast(D, S3{12, 3}); // expected-error {{must be initialized by a constant expression}} \
+                                                    // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'unsigned int' is invalid}}
+
 }



More information about the cfe-commits mailing list