[clang] [clang][bytecode] Partially address string literal uniqueness (PR #142555)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 3 01:02:54 PDT 2025
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/142555
>From a98453657b881d8cacebac99551becf85168649f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 3 Jun 2025 09:24:57 +0200
Subject: [PATCH] [clang][bytecode] Partially address string literal uniqueness
This still leaves the case of the
constexpr auto b3 = name1() == name1();
test from cxx20.cpp broken.
---
clang/lib/AST/ByteCode/Interp.cpp | 45 ++++++++++++++++++++++++++++++
clang/lib/AST/ByteCode/Interp.h | 15 ++++++++++
clang/lib/AST/ByteCode/Pointer.cpp | 11 ++++++++
clang/lib/AST/ByteCode/Pointer.h | 1 +
clang/test/AST/ByteCode/cxx11.cpp | 28 +++++++++++++++++++
clang/test/AST/ByteCode/cxx20.cpp | 4 +--
6 files changed, 102 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index e454d9e3bc218..19c95b12b9198 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -1979,6 +1979,51 @@ bool DiagTypeid(InterpState &S, CodePtr OpPC) {
return false;
}
+bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
+ const Pointer &RHS) {
+ unsigned LHSOffset = LHS.getIndex();
+ unsigned RHSOffset = RHS.getIndex();
+ unsigned LHSLength = (LHS.getNumElems() - 1) * LHS.elemSize();
+ unsigned RHSLength = (RHS.getNumElems() - 1) * RHS.elemSize();
+
+ StringRef LHSStr((const char *)LHS.atIndex(0).getRawAddress(), LHSLength);
+ StringRef RHSStr((const char *)RHS.atIndex(0).getRawAddress(), RHSLength);
+ int32_t IndexDiff = RHSOffset - LHSOffset;
+ if (IndexDiff < 0) {
+ if (static_cast<int32_t>(LHSLength) < -IndexDiff)
+ return false;
+ LHSStr = LHSStr.drop_front(-IndexDiff);
+ } else {
+ if (static_cast<int32_t>(RHSLength) < IndexDiff)
+ return false;
+ RHSStr = RHSStr.drop_front(IndexDiff);
+ }
+
+ unsigned ShorterCharWidth;
+ StringRef Shorter;
+ StringRef Longer;
+ if (LHSLength < RHSLength) {
+ ShorterCharWidth = LHS.elemSize();
+ Shorter = LHSStr;
+ Longer = RHSStr;
+ } else {
+ ShorterCharWidth = RHS.elemSize();
+ Shorter = RHSStr;
+ Longer = LHSStr;
+ }
+
+ // The null terminator isn't included in the string data, so check for it
+ // manually. If the longer string doesn't have a null terminator where the
+ // shorter string ends, they aren't potentially overlapping.
+ for (unsigned NullByte : llvm::seq(ShorterCharWidth)) {
+ if (Shorter.size() + NullByte >= Longer.size())
+ break;
+ if (Longer[Shorter.size() + NullByte])
+ return false;
+ }
+ return Shorter == Longer.take_front(Shorter.size());
+}
+
// https://github.com/llvm/llvm-project/issues/102513
#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
#pragma optimize("", off)
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 5473733578d7e..1af3bdb42a3b9 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -1034,6 +1034,9 @@ static inline bool IsOpaqueConstantCall(const CallExpr *E) {
Builtin == Builtin::BI__builtin_function_start);
}
+bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
+ const Pointer &RHS);
+
template <>
inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
using BoolT = PrimConv<PT_Bool>::T;
@@ -1068,6 +1071,18 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
return true;
}
+ // FIXME: The source check here isn't entirely correct.
+ if (LHS.pointsToStringLiteral() && RHS.pointsToStringLiteral() &&
+ LHS.getFieldDesc()->asExpr() != RHS.getFieldDesc()->asExpr()) {
+ if (arePotentiallyOverlappingStringLiterals(LHS, RHS)) {
+ const SourceInfo &Loc = S.Current->getSource(OpPC);
+ S.FFDiag(Loc, diag::note_constexpr_literal_comparison)
+ << LHS.toDiagnosticString(S.getASTContext())
+ << RHS.toDiagnosticString(S.getASTContext());
+ return false;
+ }
+ }
+
if (Pointer::hasSameBase(LHS, RHS)) {
if (LHS.inUnion() && RHS.inUnion()) {
// If the pointers point into a union, things are a little more
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp
index 6c2566ba20bde..50453c72c582b 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -571,6 +571,17 @@ bool Pointer::pointsToLiteral() const {
return E && !isa<MaterializeTemporaryExpr, StringLiteral>(E);
}
+bool Pointer::pointsToStringLiteral() const {
+ if (isZero() || !isBlockPointer())
+ return false;
+
+ if (block()->isDynamic())
+ return false;
+
+ const Expr *E = block()->getDescriptor()->asExpr();
+ return E && isa<StringLiteral>(E);
+}
+
std::optional<std::pair<Pointer, Pointer>>
Pointer::computeSplitPoint(const Pointer &A, const Pointer &B) {
if (!A.isBlockPointer() || !B.isBlockPointer())
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 479da09004685..0234ab02ab8f6 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -756,6 +756,7 @@ class Pointer {
/// Whether this points to a block that's been created for a "literal lvalue",
/// i.e. a non-MaterializeTemporaryExpr Expr.
bool pointsToLiteral() const;
+ bool pointsToStringLiteral() const;
/// Prints the pointer.
void print(llvm::raw_ostream &OS) const;
diff --git a/clang/test/AST/ByteCode/cxx11.cpp b/clang/test/AST/ByteCode/cxx11.cpp
index 44725f13e6a58..b34e7823220e2 100644
--- a/clang/test/AST/ByteCode/cxx11.cpp
+++ b/clang/test/AST/ByteCode/cxx11.cpp
@@ -260,3 +260,31 @@ namespace ZeroSizeCmp {
static_assert(&start != &end, ""); // both-error {{constant expression}} \
// both-note {{comparison of pointers '&start' and '&end' to unrelated zero-sized objects}}
}
+
+namespace OverlappingStrings {
+ static_assert(+"foo" != +"bar", "");
+ static_assert(&"xfoo"[1] != &"yfoo"[1], "");
+ static_assert(+"foot" != +"foo", "");
+ static_assert(+"foo\0bar" != +"foo\0baz", "");
+
+
+#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
+ static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
+ static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
+ static_assert(fold((const char*)u"AAA" != (const char*)"AAA\0\0x"), "");
+
+ constexpr const char *string = "hello";
+ constexpr const char *also_string = string;
+ static_assert(string == string, "");
+ static_assert(string == also_string, "");
+
+
+ // These strings may overlap, and so the result of the comparison is unknown.
+ constexpr bool may_overlap_1 = +"foo" == +"foo"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
+ constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
+ constexpr bool may_overlap_3 = +"foo" == &"bar\0foo"[4]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
+ constexpr bool may_overlap_4 = &"xfoo"[1] == &"xfoo"[1]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
+
+
+
+}
diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp
index 0b2234ef83298..e0fb38e106102 100644
--- a/clang/test/AST/ByteCode/cxx20.cpp
+++ b/clang/test/AST/ByteCode/cxx20.cpp
@@ -122,8 +122,8 @@ static_assert(!b4);
constexpr auto bar(const char *p) { return p + __builtin_strlen(p); }
constexpr auto b5 = bar(p1) == p1;
static_assert(!b5);
-constexpr auto b6 = bar(p1) == ""; // ref-error {{must be initialized by a constant expression}} \
- // ref-note {{comparison of addresses of potentially overlapping literals}}
+constexpr auto b6 = bar(p1) == ""; // both-error {{must be initialized by a constant expression}} \
+ // both-note {{comparison of addresses of potentially overlapping literals}}
constexpr auto b7 = bar(p1) + 1 == ""; // both-error {{must be initialized by a constant expression}} \
// both-note {{comparison against pointer '&"test1"[6]' that points past the end of a complete object has unspecified value}}
More information about the cfe-commits
mailing list