[clang] [clang][bytecode] Check primitive bit casts for indeterminate bits (PR #118954)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 6 03:30:28 PST 2024
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/118954
>From 265be81d34dfc3f24595ccd60f72a1207b700e06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 6 Dec 2024 12:04:47 +0100
Subject: [PATCH] [clang][bytecode] Check primitive bit casts for indeterminate
bits
Record bits ranges of initialized bits and check them in
allInitialized().
---
clang/lib/AST/ByteCode/BitcastBuffer.cpp | 51 +++++++++++++++++++
clang/lib/AST/ByteCode/BitcastBuffer.h | 31 ++++++++---
clang/lib/AST/ByteCode/Compiler.cpp | 7 +--
.../lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 1 +
.../ByteCode/builtin-bit-cast-bitfields.cpp | 23 ++++++++-
clang/test/AST/ByteCode/builtin-bit-cast.cpp | 21 +++-----
6 files changed, 105 insertions(+), 29 deletions(-)
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
index 0cc97b0b6bf190..7f29c7c2db0147 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.cpp
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
#include "BitcastBuffer.h"
+#include "llvm/ADT/STLExtras.h"
using namespace clang;
using namespace clang::interp;
@@ -60,6 +61,56 @@ BitcastBuffer::copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth,
return Out;
}
+bool BitcastBuffer::allInitialized() const {
+ Bits Sum;
+ for (BitRange BR : InitializedBits)
+ Sum += BR.size();
+
+ return Sum == FinalBitSize;
+}
+
+void BitcastBuffer::markInitialized(Bits Offset, Bits Length) {
+ if (Length.isZero())
+ return;
+
+ BitRange Element(Offset, Offset + Length - Bits(1));
+ if (InitializedBits.empty()) {
+ InitializedBits.push_back(Element);
+ return;
+ }
+
+ assert(InitializedBits.size() >= 1);
+ // Common case of just appending.
+ Bits End = InitializedBits.back().End;
+ if (End <= Offset) {
+ // Merge this range with the last one.
+ // In the best-case scenario, this means we only ever have
+ // one single bit range covering all bits.
+ if (End == (Offset - Bits(1))) {
+ InitializedBits.back().End = Element.End;
+ return;
+ }
+
+ // Otherwise, we can simply append.
+ InitializedBits.push_back(Element);
+ } else {
+ // Insert sorted.
+ auto It = std::upper_bound(InitializedBits.begin(), InitializedBits.end(),
+ Element);
+ InitializedBits.insert(It, Element);
+ }
+
+#ifndef NDEBUG
+ // Ensure ranges are sorted and non-overlapping.
+ assert(llvm::is_sorted(InitializedBits));
+ for (unsigned I = 1; I != InitializedBits.size(); ++I) {
+ [[maybe_unused]] auto Prev = InitializedBits[I - 1];
+ [[maybe_unused]] auto Cur = InitializedBits[I];
+ assert(Prev.End.N < Cur.Start.N);
+ }
+#endif
+}
+
#if 0
template<typename T>
static std::string hex(T t) {
diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h
index c7b170ceb168fa..00fbdc9b85421d 100644
--- a/clang/lib/AST/ByteCode/BitcastBuffer.h
+++ b/clang/lib/AST/ByteCode/BitcastBuffer.h
@@ -8,6 +8,7 @@
#ifndef LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H
#define LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H
+#include "llvm/ADT/SmallVector.h"
#include <cassert>
#include <cstddef>
#include <memory>
@@ -30,14 +31,20 @@ struct Bits {
bool nonZero() const { return N != 0; }
bool isZero() const { return N == 0; }
- Bits operator-(Bits Other) { return Bits(N - Other.N); }
- Bits operator+(Bits Other) { return Bits(N + Other.N); }
+ Bits operator-(Bits Other) const { return Bits(N - Other.N); }
+ Bits operator+(Bits Other) const { return Bits(N + Other.N); }
Bits operator+=(size_t O) {
N += O;
return *this;
}
+ Bits operator+=(Bits O) {
+ N += O.N;
+ return *this;
+ }
- bool operator>=(Bits Other) { return N >= Other.N; }
+ bool operator>=(Bits Other) const { return N >= Other.N; }
+ bool operator<=(Bits Other) const { return N <= Other.N; }
+ bool operator==(Bits Other) const { return N == Other.N; }
};
/// A quantity in bytes.
@@ -48,11 +55,21 @@ struct Bytes {
Bits toBits() const { return Bits(N * 8); }
};
+struct BitRange {
+ Bits Start;
+ Bits End;
+
+ BitRange(Bits Start, Bits End) : Start(Start), End(End) {}
+ Bits size() const { return End - Start + Bits(1); }
+ bool operator<(BitRange Other) const { return Start.N < Other.Start.N; }
+};
+
/// Track what bits have been initialized to known values and which ones
/// have indeterminate value.
struct BitcastBuffer {
Bits FinalBitSize;
std::unique_ptr<std::byte[]> Data;
+ llvm::SmallVector<BitRange> InitializedBits;
BitcastBuffer(Bits FinalBitSize) : FinalBitSize(FinalBitSize) {
assert(FinalBitSize.isFullByte());
@@ -64,10 +81,10 @@ struct BitcastBuffer {
Bits size() const { return FinalBitSize; }
/// Returns \c true if all bits in the buffer have been initialized.
- bool allInitialized() const {
- // FIXME: Implement.
- return true;
- }
+ bool allInitialized() const;
+ /// Marks the bits in the given range as initialized.
+ /// FIXME: Can we do this automatically in pushData()?
+ void markInitialized(Bits Start, Bits Length);
/// Push \p BitWidth bits at \p BitOffset from \p In into the buffer.
/// \p TargetEndianness is the endianness of the target we're compiling for.
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 900312401bbda0..7f6295e126dcfe 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -6483,6 +6483,7 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
QualType ToType = E->getType();
std::optional<PrimType> ToT = classify(ToType);
+ // Bitcasting TO nullptr_t is always fine.
if (ToType->isNullPtrType()) {
if (!this->discard(SubExpr))
return false;
@@ -6490,12 +6491,6 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
return this->emitNullPtr(0, nullptr, E);
}
- if (FromType->isNullPtrType() && ToT) {
- if (!this->discard(SubExpr))
- return false;
-
- return visitZeroInitializer(*ToT, ToType, E);
- }
assert(!ToType->isReferenceType());
// Prepare storage for the result in case we discard.
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
index 6ee3826fb3eea6..4c25a3bb132fcf 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
@@ -287,6 +287,7 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
}
Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness);
+ Buffer.markInitialized(BitOffset, BitWidth);
return true;
});
}
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
index 00f465a471b0a4..e5337a57bf0fe4 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
@@ -134,11 +134,11 @@ namespace BitFields {
enum byte : unsigned char {};
constexpr BF bf = {0x3};
- /// Requires bitcasts to composite types.
static_assert(bit_cast<bits<2>>(bf).bits == bf.z);
static_assert(bit_cast<unsigned char>(bf));
- static_assert(__builtin_bit_cast(byte, bf));
+ static_assert(__builtin_bit_cast(byte, bf)); // expected-error {{not an integral constant expression}} \
+ // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'byte' is invalid}}
struct M {
// ref-note at +1 {{subobject declared here}}
@@ -439,3 +439,22 @@ namespace Enums {
static_assert(
bit_cast<X>((unsigned char)0x40).direction == X::direction::right);
}
+
+namespace IndeterminateBits {
+ struct S {
+ unsigned a : 13;
+ unsigned : 17;
+ unsigned b : 2;
+ };
+ constexpr unsigned A = __builtin_bit_cast(unsigned, S{12, 3}); // expected-error {{must be initialized by a constant expression}} \
+ // expected-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte'; 'unsigned int' is invalid}}
+
+
+ /// GCC refuses to compile this as soon as we access the indeterminate bits
+ /// in the static_assert. MSVC accepts it.
+ struct S2 {
+ unsigned char a : 2;
+ };
+ constexpr unsigned char B = __builtin_bit_cast(unsigned char, S2{3});
+ static_assert(B == (LITTLE_END ? 3 : 192));
+}
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
index f89eb3584bbcff..e99ab3904c339c 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
@@ -130,12 +130,8 @@ namespace simple {
static_assert(check_round_trip<unsigned>((int)0x0C05FEFE));
static_assert(round_trip<float>((int)0x0C05FEFE));
-
- /// This works in GCC and in the bytecode interpreter, but the current interpreter
- /// diagnoses it.
- /// FIXME: Should also be rejected in the bytecode interpreter.
- static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // ref-error {{not an integral constant expression}} \
- // ref-note {{indeterminate value can only initialize an object}}
+ static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // both-error {{not an integral constant expression}} \
+ // both-note {{indeterminate value can only initialize an object}}
constexpr int test_from_nullptr_pass = (__builtin_bit_cast(unsigned char[sizeof(nullptr)], nullptr), 0);
constexpr unsigned char NPData[sizeof(nullptr)] = {1,2,3,4};
@@ -394,7 +390,6 @@ void bad_types() {
};
static_assert(__builtin_bit_cast(int, X{0}) == 0); // both-error {{not an integral constant expression}} \
// both-note {{bit_cast from a union type is not allowed in a constant expression}}
-#if 1
struct G {
int g;
@@ -405,19 +400,17 @@ void bad_types() {
// both-error at +2 {{constexpr variable 'x' must be initialized by a constant expression}}
// both-note at +1 {{bit_cast to a union type is not allowed in a constant expression}}
constexpr X x = __builtin_bit_cast(X, G{0});
-#endif
+
struct has_pointer {
- int *ptr; // both-note {{invalid type 'int *' is a member of 'has_pointer'}}
+ int *ptr; // both-note 2{{invalid type 'int *' is a member of 'has_pointer'}}
};
constexpr intptr_t ptr = __builtin_bit_cast(intptr_t, has_pointer{0}); // both-error {{constexpr variable 'ptr' must be initialized by a constant expression}} \
// both-note {{bit_cast from a pointer type is not allowed in a constant expression}}
-#if 0
- // expected-error at +2 {{constexpr variable 'hptr' must be initialized by a constant expression}}
- // expected-note at +1 {{bit_cast to a pointer type is not allowed in a constant expression}}
- constexpr has_pointer hptr = __builtin_bit_cast(has_pointer, 0ul);
-#endif
+ // both-error at +2 {{constexpr variable 'hptr' must be initialized by a constant expression}}
+ // both-note at +1 {{bit_cast to a pointer type is not allowed in a constant expression}}
+ constexpr has_pointer hptr = __builtin_bit_cast(has_pointer, (intptr_t)0);
}
void test_array_fill() {
More information about the cfe-commits
mailing list