[clang] [clang][bytecode] Check primitive bit casts for indeterminate bits (PR #118954)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 6 03:30:28 PST 2024


https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/118954

>From 265be81d34dfc3f24595ccd60f72a1207b700e06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 6 Dec 2024 12:04:47 +0100
Subject: [PATCH] [clang][bytecode] Check primitive bit casts for indeterminate
 bits

Record bits ranges of initialized bits and check them in
allInitialized().
---
 clang/lib/AST/ByteCode/BitcastBuffer.cpp      | 51 +++++++++++++++++++
 clang/lib/AST/ByteCode/BitcastBuffer.h        | 31 ++++++++---
 clang/lib/AST/ByteCode/Compiler.cpp           |  7 +--
 .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp |  1 +
 .../ByteCode/builtin-bit-cast-bitfields.cpp   | 23 ++++++++-
 clang/test/AST/ByteCode/builtin-bit-cast.cpp  | 21 +++-----
 6 files changed, 105 insertions(+), 29 deletions(-)

diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
index 0cc97b0b6bf190..7f29c7c2db0147 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.cpp
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "BitcastBuffer.h"
+#include "llvm/ADT/STLExtras.h"
 
 using namespace clang;
 using namespace clang::interp;
@@ -60,6 +61,56 @@ BitcastBuffer::copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth,
   return Out;
 }
 
+bool BitcastBuffer::allInitialized() const {
+  Bits Sum;
+  for (BitRange BR : InitializedBits)
+    Sum += BR.size();
+
+  return Sum == FinalBitSize;
+}
+
+void BitcastBuffer::markInitialized(Bits Offset, Bits Length) {
+  if (Length.isZero())
+    return;
+
+  BitRange Element(Offset, Offset + Length - Bits(1));
+  if (InitializedBits.empty()) {
+    InitializedBits.push_back(Element);
+    return;
+  }
+
+  assert(InitializedBits.size() >= 1);
+  // Common case of just appending.
+  Bits End = InitializedBits.back().End;
+  if (End <= Offset) {
+    // Merge this range with the last one.
+    // In the best-case scenario, this means we only ever have
+    // one single bit range covering all bits.
+    if (End == (Offset - Bits(1))) {
+      InitializedBits.back().End = Element.End;
+      return;
+    }
+
+    // Otherwise, we can simply append.
+    InitializedBits.push_back(Element);
+  } else {
+    // Insert sorted.
+    auto It = std::upper_bound(InitializedBits.begin(), InitializedBits.end(),
+                               Element);
+    InitializedBits.insert(It, Element);
+  }
+
+#ifndef NDEBUG
+  // Ensure ranges are sorted and non-overlapping.
+  assert(llvm::is_sorted(InitializedBits));
+  for (unsigned I = 1; I != InitializedBits.size(); ++I) {
+    [[maybe_unused]] auto Prev = InitializedBits[I - 1];
+    [[maybe_unused]] auto Cur = InitializedBits[I];
+    assert(Prev.End.N < Cur.Start.N);
+  }
+#endif
+}
+
 #if 0
   template<typename T>
   static std::string hex(T t) {
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h
index c7b170ceb168fa..00fbdc9b85421d 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.h
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.h
@@ -8,6 +8,7 @@
 #ifndef LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H
 #define LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include <cassert>
 #include <cstddef>
 #include <memory>
@@ -30,14 +31,20 @@ struct Bits {
   bool nonZero() const { return N != 0; }
   bool isZero() const { return N == 0; }
 
-  Bits operator-(Bits Other) { return Bits(N - Other.N); }
-  Bits operator+(Bits Other) { return Bits(N + Other.N); }
+  Bits operator-(Bits Other) const { return Bits(N - Other.N); }
+  Bits operator+(Bits Other) const { return Bits(N + Other.N); }
   Bits operator+=(size_t O) {
     N += O;
     return *this;
   }
+  Bits operator+=(Bits O) {
+    N += O.N;
+    return *this;
+  }
 
-  bool operator>=(Bits Other) { return N >= Other.N; }
+  bool operator>=(Bits Other) const { return N >= Other.N; }
+  bool operator<=(Bits Other) const { return N <= Other.N; }
+  bool operator==(Bits Other) const { return N == Other.N; }
 };
 
 /// A quantity in bytes.
@@ -48,11 +55,21 @@ struct Bytes {
   Bits toBits() const { return Bits(N * 8); }
 };
 
+struct BitRange {
+  Bits Start;
+  Bits End;
+
+  BitRange(Bits Start, Bits End) : Start(Start), End(End) {}
+  Bits size() const { return End - Start + Bits(1); }
+  bool operator<(BitRange Other) const { return Start.N < Other.Start.N; }
+};
+
 /// Track what bits have been initialized to known values and which ones
 /// have indeterminate value.
 struct BitcastBuffer {
   Bits FinalBitSize;
   std::unique_ptr<std::byte[]> Data;
+  llvm::SmallVector<BitRange> InitializedBits;
 
   BitcastBuffer(Bits FinalBitSize) : FinalBitSize(FinalBitSize) {
     assert(FinalBitSize.isFullByte());
@@ -64,10 +81,10 @@ struct BitcastBuffer {
   Bits size() const { return FinalBitSize; }
 
   /// Returns \c true if all bits in the buffer have been initialized.
-  bool allInitialized() const {
-    // FIXME: Implement.
-    return true;
-  }
+  bool allInitialized() const;
+  /// Marks the bits in the given range as initialized.
+  /// FIXME: Can we do this automatically in pushData()?
+  void markInitialized(Bits Start, Bits Length);
 
   /// Push \p BitWidth bits at \p BitOffset from \p In into the buffer.
   /// \p TargetEndianness is the endianness of the target we're compiling for.
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 900312401bbda0..7f6295e126dcfe 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -6483,6 +6483,7 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
   QualType ToType = E->getType();
   std::optional<PrimType> ToT = classify(ToType);
 
+  // Bitcasting TO nullptr_t is always fine.
   if (ToType->isNullPtrType()) {
     if (!this->discard(SubExpr))
       return false;
@@ -6490,12 +6491,6 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
     return this->emitNullPtr(0, nullptr, E);
   }
 
-  if (FromType->isNullPtrType() && ToT) {
-    if (!this->discard(SubExpr))
-      return false;
-
-    return visitZeroInitializer(*ToT, ToType, E);
-  }
   assert(!ToType->isReferenceType());
 
   // Prepare storage for the result in case we discard.
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
index 6ee3826fb3eea6..4c25a3bb132fcf 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
@@ -287,6 +287,7 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
         }
 
         Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness);
+        Buffer.markInitialized(BitOffset, BitWidth);
         return true;
       });
 }
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
index 00f465a471b0a4..e5337a57bf0fe4 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
@@ -134,11 +134,11 @@ namespace BitFields {
     enum byte : unsigned char {};
 
     constexpr BF bf = {0x3};
-    /// Requires bitcasts to composite types.
     static_assert(bit_cast<bits<2>>(bf).bits == bf.z);
     static_assert(bit_cast<unsigned char>(bf));
 
-    static_assert(__builtin_bit_cast(byte, bf));
+    static_assert(__builtin_bit_cast(byte, bf)); // expected-error {{not an integral constant expression}} \
+                                                 // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'byte' is invalid}}
 
     struct M {
       // ref-note at +1 {{subobject declared here}}
@@ -439,3 +439,22 @@ namespace Enums {
   static_assert(
     bit_cast<X>((unsigned char)0x40).direction == X::direction::right);
 }
+
+namespace IndeterminateBits {
+  struct S {
+    unsigned a : 13;
+    unsigned   : 17;
+    unsigned b : 2;
+  };
+  constexpr unsigned A = __builtin_bit_cast(unsigned, S{12, 3}); // expected-error {{must be initialized by a constant expression}} \
+                                                                 // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'unsigned int' is invalid}}
+
+
+  /// GCC refuses to compile this as soon as we access the indeterminate bits
+  /// in the static_assert. MSVC accepts it.
+  struct S2 {
+    unsigned char a : 2;
+  };
+  constexpr unsigned char B = __builtin_bit_cast(unsigned char, S2{3});
+  static_assert(B == (LITTLE_END ? 3 : 192));
+}
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
index f89eb3584bbcff..e99ab3904c339c 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
@@ -130,12 +130,8 @@ namespace simple {
   static_assert(check_round_trip<unsigned>((int)0x0C05FEFE));
   static_assert(round_trip<float>((int)0x0C05FEFE));
 
-
-  /// This works in GCC and in the bytecode interpreter, but the current interpreter
-  /// diagnoses it.
-  /// FIXME: Should also be rejected in the bytecode interpreter.
-  static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // ref-error {{not an integral constant expression}} \
-                                                             // ref-note {{indeterminate value can only initialize an object}}
+  static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // both-error {{not an integral constant expression}} \
+                                                             // both-note {{indeterminate value can only initialize an object}}
 
   constexpr int test_from_nullptr_pass = (__builtin_bit_cast(unsigned char[sizeof(nullptr)], nullptr), 0);
   constexpr unsigned char NPData[sizeof(nullptr)] = {1,2,3,4};
@@ -394,7 +390,6 @@ void bad_types() {
   };
   static_assert(__builtin_bit_cast(int, X{0}) == 0); // both-error {{not an integral constant expression}} \
                                                      // both-note {{bit_cast from a union type is not allowed in a constant expression}}
-#if 1
 
   struct G {
     int g;
@@ -405,19 +400,17 @@ void bad_types() {
   // both-error at +2 {{constexpr variable 'x' must be initialized by a constant expression}}
   // both-note at +1 {{bit_cast to a union type is not allowed in a constant expression}}
   constexpr X x = __builtin_bit_cast(X, G{0});
-#endif
+
   struct has_pointer {
-    int *ptr; // both-note {{invalid type 'int *' is a member of 'has_pointer'}}
+    int *ptr; // both-note 2{{invalid type 'int *' is a member of 'has_pointer'}}
   };
 
   constexpr intptr_t ptr = __builtin_bit_cast(intptr_t, has_pointer{0}); // both-error {{constexpr variable 'ptr' must be initialized by a constant expression}} \
                                                                          // both-note {{bit_cast from a pointer type is not allowed in a constant expression}}
 
-#if 0
-  // expected-error at +2 {{constexpr variable 'hptr' must be initialized by a constant expression}}
-  // expected-note at +1 {{bit_cast to a pointer type is not allowed in a constant expression}}
-  constexpr has_pointer hptr =  __builtin_bit_cast(has_pointer, 0ul);
-#endif
+  // both-error at +2 {{constexpr variable 'hptr' must be initialized by a constant expression}}
+  // both-note at +1 {{bit_cast to a pointer type is not allowed in a constant expression}}
+  constexpr has_pointer hptr =  __builtin_bit_cast(has_pointer, (intptr_t)0);
 }
 
 void test_array_fill() {



More information about the cfe-commits mailing list