[clang] [clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation (PR #109208)

Richard Smith via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 19 11:40:35 PDT 2024


https://github.com/zygoloid updated https://github.com/llvm/llvm-project/pull/109208

>From 81193568c17a89f6cf42f43a82fb1fbf0f90184d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Wed, 18 Sep 2024 21:59:56 +0000
Subject: [PATCH 01/10] Implement current CWG direction for string literal
 comparisons.

Track the identity of each string literal object produced by evaluation
with a global version number. Accept comparisons between literals of the
same version, and between literals of different versions that cannot
possibly be placed in overlapping storage. Treat the remaining
comparisons as non-constant.
---
 clang/include/clang/AST/ASTContext.h          |  11 ++
 .../include/clang/Basic/DiagnosticASTKinds.td |   2 +
 clang/lib/AST/ExprConstant.cpp                | 119 +++++++++++++++---
 clang/test/AST/ByteCode/builtin-functions.cpp |   3 +-
 clang/test/AST/ByteCode/cxx20.cpp             |  20 ++-
 clang/test/SemaCXX/builtins.cpp               |   2 +-
 .../SemaCXX/constant-expression-cxx11.cpp     |  36 ++++--
 7 files changed, 154 insertions(+), 39 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index b65a1f7dff5bc1..6170bcd4f15ae3 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -324,6 +324,13 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// This is lazily created.  This is intentionally not serialized.
   mutable llvm::StringMap<StringLiteral *> StringLiteralCache;
 
+  /// The next string literal "version" to allocate during constant evaluation.
+  /// This is used to distinguish between repeated evaluations of the same
+  /// string literal.
+  ///
+  /// TODO: Ensure version numbers don't collide when deserialized.
+  unsigned NextStringLiteralVersion = 0;
+
   /// MD5 hash of CUID. It is calculated when first used and cached by this
   /// data member.
   mutable std::string CUIDHash;
@@ -3278,6 +3285,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// PredefinedExpr to cache evaluated results.
   StringLiteral *getPredefinedStringLiteralFromCache(StringRef Key) const;
 
+  /// Return the next version number to be used for a string literal evaluated
+  /// as part of constant evaluation.
+  unsigned getNextStringLiteralVersion() { return NextStringLiteralVersion++; }
+
   /// Return a declaration for the global GUID object representing the given
   /// GUID value.
   MSGuidDecl *getMSGuidDecl(MSGuidDeclParts Parts) const;
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 21a307d1e89878..76e693f6b4a6ca 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -96,6 +96,8 @@ def note_constexpr_pointer_constant_comparison : Note<
   "at runtime">;
 def note_constexpr_literal_comparison : Note<
   "comparison of addresses of literals has unspecified value">;
+def note_constexpr_opaque_call_comparison : Note<
+  "comparison against opaque constant has unspecified value">;
 def note_constexpr_pointer_weak_comparison : Note<
   "comparison against address of weak declaration '%0' can only be performed "
   "at runtime">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6387e375dda79c..d9384a7c125a82 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -54,8 +54,10 @@
 #include "clang/Basic/DiagnosticSema.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APFixedPoint.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/SipHash.h"
@@ -2061,8 +2063,8 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) {
   return true;
 }
 
-/// Should this call expression be treated as a no-op?
-static bool IsNoOpCall(const CallExpr *E) {
+/// Should this call expression be treated as forming an opaque constant?
+static bool IsOpaqueConstantCall(const CallExpr *E) {
   unsigned Builtin = E->getBuiltinCallee();
   return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
           Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
@@ -2070,6 +2072,12 @@ static bool IsNoOpCall(const CallExpr *E) {
           Builtin == Builtin::BI__builtin_function_start);
 }
 
+static bool IsOpaqueConstantCall(const LValue &LVal) {
+  auto *BaseExpr =
+      llvm::dyn_cast_or_null<CallExpr>(LVal.Base.dyn_cast<const Expr *>());
+  return BaseExpr && IsOpaqueConstantCall(BaseExpr);
+}
+
 static bool IsGlobalLValue(APValue::LValueBase B) {
   // C++11 [expr.const]p3 An address constant expression is a prvalue core
   // constant expression of pointer type that evaluates to...
@@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) {
   case Expr::ObjCBoxedExprClass:
     return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer();
   case Expr::CallExprClass:
-    return IsNoOpCall(cast<CallExpr>(E));
+    return IsOpaqueConstantCall(cast<CallExpr>(E));
   // For GCC compatibility, &&label has static storage duration.
   case Expr::AddrLabelExprClass:
     return true;
@@ -2142,11 +2150,81 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
   return LVal.Base.dyn_cast<const ValueDecl*>();
 }
 
-static bool IsLiteralLValue(const LValue &Value) {
-  if (Value.getLValueCallIndex())
+// Information about an LValueBase that is some kind of string.
+struct LValueBaseString {
+  std::string ObjCEncodeStorage;
+  StringRef Bytes;
+  int CharWidth;
+};
+
+// Gets the lvalue base of LVal as a string.
+static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
+                                  LValueBaseString &AsString) {
+  const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>();
+  if (!BaseExpr)
+    return false;
+
+  // For ObjCEncodeExpr, we need to compute and store the string.
+  if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) {
+    Info.Ctx.getObjCEncodingForType(EE->getEncodedType(),
+                                    AsString.ObjCEncodeStorage);
+    AsString.Bytes = AsString.ObjCEncodeStorage;
+    AsString.CharWidth = 1;
+    return true;
+  }
+
+  // Otherwise, we have a StringLiteral.
+  const auto *Lit = dyn_cast<StringLiteral>(BaseExpr);
+  if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr))
+    Lit = PE->getFunctionName();
+
+  if (!Lit)
+    return false;
+
+  AsString.Bytes = Lit->getBytes();
+  AsString.CharWidth = Lit->getCharByteWidth();
+  return true;
+}
+
+// Determine whether two string literals potentially overlap. This will be the
+// case if they agree on the values of all the bytes on the overlapping region
+// between them.
+static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
+                                                    const LValue &LHS,
+                                                    const LValue &RHS) {
+  LValueBaseString LHSString, RHSString;
+  if (!GetLValueBaseAsString(Info, LHS, LHSString) ||
+      !GetLValueBaseAsString(Info, RHS, RHSString))
     return false;
-  const Expr *E = Value.Base.dyn_cast<const Expr*>();
-  return E && !isa<MaterializeTemporaryExpr>(E);
+
+  // This is the byte offset to the location of the first character of LHS
+  // within RHS. We don't need to look at the characters of one string that
+  // would appear before the start of the other string if they were merged.
+  CharUnits Offset = RHS.Offset - LHS.Offset;
+  if (Offset.isPositive()) {
+    RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
+  } else if (Offset.isNegative()) {
+    LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
+  }
+
+  bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
+  StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;
+  StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes;
+  int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth;
+
+  // The null terminator isn't included in the string data, so check for it
+  // manually. If the longer string doesn't have a null terminator where the
+  // shorter string ends, they aren't potentially overlapping.
+  for (int nullByte : llvm::seq(ShorterCharWidth)) {
+    if (Shorter.size() + nullByte >= Longer.size())
+      break;
+    if (Longer[Shorter.size() + nullByte])
+      return false;
+  }
+
+  // Otherwise, they're potentially overlapping if and only if the overlapping
+  // region is the same.
+  return Shorter == Longer.take_front(Shorter.size());
 }
 
 static bool IsWeakLValue(const LValue &Value) {
@@ -8573,7 +8651,10 @@ class LValueExprEvaluator
   bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
   bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
   bool VisitMemberExpr(const MemberExpr *E);
-  bool VisitStringLiteral(const StringLiteral *E) { return Success(E); }
+  bool VisitStringLiteral(const StringLiteral *E) {
+    uint64_t Version = Info.getASTContext().getNextStringLiteralVersion();
+    return Success(APValue::LValueBase(E, 0, static_cast<unsigned>(Version)));
+  }
   bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); }
   bool VisitCXXTypeidExpr(const CXXTypeidExpr *E);
   bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
@@ -9639,7 +9720,7 @@ static bool isOneByteCharacterType(QualType T) {
 
 bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                                 unsigned BuiltinOp) {
-  if (IsNoOpCall(E))
+  if (IsOpaqueConstantCall(E))
     return Success(E);
 
   switch (BuiltinOp) {
@@ -13889,13 +13970,21 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
           (!RHSValue.Base && !RHSValue.Offset.isZero()))
         return DiagComparison(diag::note_constexpr_pointer_constant_comparison,
                               !RHSValue.Base);
-      // It's implementation-defined whether distinct literals will have
-      // distinct addresses. In clang, the result of such a comparison is
-      // unspecified, so it is not a constant expression. However, we do know
-      // that the address of a literal will be non-null.
-      if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) &&
-          LHSValue.Base && RHSValue.Base)
+      // C++2c [intro.object]/10:
+      //   Two objects [...] may have the same address if [...] they are both
+      //   potentially non-unique objects.
+      // C++2c [intro.object]/9:
+      //   An object is potentially non-unique if it is a string literal object,
+      //   the backing array of an initializer list, or a subobject thereof.
+      //
+      // This makes the comparison result unspecified, so it's not a constant
+      // expression.
+      //
+      // TODO: Do we need to handle the initializer list case here?
+      if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue))
         return DiagComparison(diag::note_constexpr_literal_comparison);
+      if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue))
+        return DiagComparison(diag::note_constexpr_opaque_call_comparison);
       // We can't tell whether weak symbols will end up pointing to the same
       // object.
       if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue))
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 9fd5eae67a21f6..3de0baaade50d0 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -966,7 +966,8 @@ namespace shufflevector {
 namespace FunctionStart {
   void a(void) {}
   static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
-                                                       // both-note {{comparison of addresses of literals has unspecified value}}
+                                                       // ref-note {{comparison against opaque constant has unspecified value}} \
+                                                       // expected-note {{comparison of addresses of literals has unspecified value}}
 }
 
 namespace BuiltinInImplicitCtor {
diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp
index 9bbc3dbe0073d3..f2a87bae55b13e 100644
--- a/clang/test/AST/ByteCode/cxx20.cpp
+++ b/clang/test/AST/ByteCode/cxx20.cpp
@@ -99,7 +99,7 @@ constexpr int f() {
 static_assert(f());
 #endif
 
-/// Distinct literals have disctinct addresses.
+/// Distinct literals have distinct addresses.
 /// see https://github.com/llvm/llvm-project/issues/58754
 constexpr auto foo(const char *p) { return p; }
 constexpr auto p1 = "test1";
@@ -108,22 +108,16 @@ constexpr auto p2 = "test2";
 constexpr bool b1 = foo(p1) == foo(p1);
 static_assert(b1);
 
-constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \
-                                        // ref-note {{comparison of addresses of literals}} \
-                                        // ref-note {{declared here}}
-static_assert(!b2); // ref-error {{not an integral constant expression}} \
-                    // ref-note {{not a constant expression}}
+constexpr bool b2 = foo(p1) == foo(p2);
+static_assert(!b2);
 
 constexpr auto name1() { return "name1"; }
 constexpr auto name2() { return "name2"; }
 
-constexpr auto b3 = name1() == name1();
-static_assert(b3);
-constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \
-                                        // ref-note {{has unspecified value}} \
-                                        // ref-note {{declared here}}
-static_assert(!b4); // ref-error {{not an integral constant expression}} \
-                    // ref-note {{not a constant expression}}
+constexpr auto b3 = name1() == name1(); // ref-error {{must be initialized by a constant expression}} \
+                                        // ref-note {{comparison of addresses of literals}}
+constexpr auto b4 = name1() == name2();
+static_assert(!b4);
 
 namespace UninitializedFields {
   class A {
diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index f47ed3a1f7ebfc..7df405f0662a15 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -47,7 +47,7 @@ void a(void) {}
 int n;
 void *p = __builtin_function_start(n);               // expected-error {{argument must be a function}}
 static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}}
-// expected-note at -1 {{comparison of addresses of literals has unspecified value}}
+// expected-note at -1 {{comparison against opaque constant has unspecified value}}
 } // namespace function_start
 
 void no_ms_builtins() {
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index 44ef540f41fa8c..3d56c3e351f9bd 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -std=c++20 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx11_20,cxx20_23,pre-cxx23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion
 // RUN: %clang_cc1 -std=c++11 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx11_20,cxx11,pre-cxx23    -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion
 
+// This macro forces its argument to be constant-folded, even if it's not
+// otherwise a constant expression.
+#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
+
 namespace StaticAssertFoldTest {
 
 int x;
@@ -358,11 +362,31 @@ struct Str {
 
 extern char externalvar[];
 constexpr bool constaddress = (void *)externalvar == (void *)0x4000UL; // expected-error {{must be initialized by a constant expression}} expected-note {{reinterpret_cast}}
-constexpr bool litaddress = "foo" == "foo"; // expected-error {{must be initialized by a constant expression}}
-// expected-note at -1 {{comparison of addresses of literals has unspecified value}}
-// cxx20_23-warning at -2 {{comparison between two arrays is deprecated}}
 static_assert(0 != "foo", "");
 
+// OK: These string literals cannot possibly overlap.
+static_assert(+"foo" != +"bar", "");
+static_assert("xfoo" + 1 != "yfoo" + 1, "");
+static_assert(+"foot" != +"foo", "");
+static_assert(+"foo\0bar" != +"foo\0baz", "");
+
+// These can't overlap because the null terminator for UTF-16 is two bytes wide.
+static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
+static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
+
+// These strings may overlap, and so the result of the comparison is unknown.
+constexpr bool may_overlap_1 = +"foo" == +"foo"; // expected-error {{}} expected-note {{addresses of literals}}
+constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // expected-error {{}} expected-note {{addresses of literals}}
+constexpr bool may_overlap_3 = +"foo" == "bar\0foo" + 4; // expected-error {{}} expected-note {{addresses of literals}}
+constexpr bool may_overlap_4 = "xfoo" + 1 == "xfoo" + 1; // expected-error {{}} expected-note {{addresses of literals}}
+
+// These may overlap even though they have different encodings.
+// One of these two comparisons is non-constant, but due to endianness we don't
+// know which one.
+constexpr bool may_overlap_different_encoding[] =
+  {fold((const char*)u"A" != (const char*)"xA\0\0\0x" + 1), fold((const char*)u"A" != (const char*)"x\0A\0\0x" + 1)};
+  // expected-error at -2 {{}} expected-note at -1 {{addresses of literals}}
+
 }
 
 namespace MaterializeTemporary {
@@ -1543,16 +1567,10 @@ namespace MutableMembers {
 
 namespace Fold {
 
-  // This macro forces its argument to be constant-folded, even if it's not
-  // otherwise a constant expression.
-  #define fold(x) (__builtin_constant_p(x) ? (x) : (x))
-
   constexpr int n = (long)(char*)123; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
   constexpr int m = fold((long)(char*)123); // ok
   static_assert(m == 123, "");
 
-  #undef fold
-
 }
 
 namespace DR1454 {

>From 2f8cfbe8c5f09c89b7a572b65985c1e1693e8d89 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Wed, 18 Sep 2024 22:23:06 +0000
Subject: [PATCH 02/10] Add a test that repeated evaluation in the same
 function call might produce different string literals each time.

---
 clang/test/SemaCXX/constant-expression-cxx14.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/clang/test/SemaCXX/constant-expression-cxx14.cpp b/clang/test/SemaCXX/constant-expression-cxx14.cpp
index 70ab5dcd357c1c..01326aed8be63d 100644
--- a/clang/test/SemaCXX/constant-expression-cxx14.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx14.cpp
@@ -1306,3 +1306,18 @@ constexpr int field(int a) {
 static_assert(field(3), ""); // expected-error {{constant expression}} \
                              // expected-note {{in call to 'field(3)'}}
 }
+
+namespace literal_comparison {
+
+constexpr bool different_in_loop(bool b = false) {
+  if (b) return false;
+
+  const char *p[2] = {};
+  for (const char *&r : p)
+    r = "hello";
+  return p[0] == p[1]; // expected-note {{addresses of literals}}
+}
+constexpr bool check = different_in_loop();
+  // expected-error at -1 {{}} expected-note at -1 {{in call}}
+
+}
\ No newline at end of file

>From def1aa7c677fd5fdaa4a8b4c24a5858e03371fb1 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Wed, 18 Sep 2024 22:27:09 +0000
Subject: [PATCH 03/10] A couple more tests.

---
 clang/test/SemaCXX/constant-expression-cxx11.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index 3d56c3e351f9bd..e2ea984b37cd04 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -374,6 +374,11 @@ static_assert(+"foo\0bar" != +"foo\0baz", "");
 static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
 static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
 
+constexpr const char *string = "hello";
+constexpr const char *also_string = string;
+static_assert(string == string, "");
+static_assert(string == also_string, "");
+
 // These strings may overlap, and so the result of the comparison is unknown.
 constexpr bool may_overlap_1 = +"foo" == +"foo"; // expected-error {{}} expected-note {{addresses of literals}}
 constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // expected-error {{}} expected-note {{addresses of literals}}

>From 0e8200bbc4db01ba6629f83bf856e4943445fbf0 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 10:30:45 -0700
Subject: [PATCH 04/10] Use _if_present not _or_null

Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
 clang/lib/AST/ExprConstant.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d9384a7c125a82..a7d3a859c66e33 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2073,8 +2073,8 @@ static bool IsOpaqueConstantCall(const CallExpr *E) {
 }
 
 static bool IsOpaqueConstantCall(const LValue &LVal) {
-  auto *BaseExpr =
-      llvm::dyn_cast_or_null<CallExpr>(LVal.Base.dyn_cast<const Expr *>());
+  const auto *BaseExpr =
+      llvm::dyn_cast_if_present<CallExpr>(LVal.Base.dyn_cast<const Expr *>());
   return BaseExpr && IsOpaqueConstantCall(BaseExpr);
 }
 

>From 7286ca96776b80f4daba655146c1b51a11cc98fe Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 10:39:37 -0700
Subject: [PATCH 05/10] Remove redundant braces.

Co-authored-by: Aaron Ballman <aaron at aaronballman.com>
---
 clang/lib/AST/ExprConstant.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a7d3a859c66e33..64c6f0a4655021 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2201,11 +2201,10 @@ static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
   // within RHS. We don't need to look at the characters of one string that
   // would appear before the start of the other string if they were merged.
   CharUnits Offset = RHS.Offset - LHS.Offset;
-  if (Offset.isPositive()) {
+  if (Offset.isPositive())
     RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
-  } else if (Offset.isNegative()) {
+  else if (Offset.isNegative())
     LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
-  }
 
   bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
   StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;

>From 8dbf82603a5636eeee65b0e608fdc13629d02bdb Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 17:43:29 +0000
Subject: [PATCH 06/10] Improve diagnosis for opaque constant calls.

---
 clang/include/clang/Basic/DiagnosticASTKinds.td  | 2 +-
 clang/lib/AST/ExprConstant.cpp                   | 3 ++-
 clang/test/AST/ByteCode/builtin-functions.cpp    | 2 +-
 clang/test/SemaCXX/builtins.cpp                  | 2 +-
 clang/test/SemaCXX/constant-expression-cxx14.cpp | 2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 76e693f6b4a6ca..679afc053dfe45 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -97,7 +97,7 @@ def note_constexpr_pointer_constant_comparison : Note<
 def note_constexpr_literal_comparison : Note<
   "comparison of addresses of literals has unspecified value">;
 def note_constexpr_opaque_call_comparison : Note<
-  "comparison against opaque constant has unspecified value">;
+  "comparison against opaque constant address '%0' has unspecified value">;
 def note_constexpr_pointer_weak_comparison : Note<
   "comparison against address of weak declaration '%0' can only be performed "
   "at runtime">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 64c6f0a4655021..df1cb9a50cffb9 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13983,7 +13983,8 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
       if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue))
         return DiagComparison(diag::note_constexpr_literal_comparison);
       if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue))
-        return DiagComparison(diag::note_constexpr_opaque_call_comparison);
+        return DiagComparison(diag::note_constexpr_opaque_call_comparison,
+                              !IsOpaqueConstantCall(LHSValue));
       // We can't tell whether weak symbols will end up pointing to the same
       // object.
       if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue))
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 3de0baaade50d0..b0cb39975ce796 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -966,7 +966,7 @@ namespace shufflevector {
 namespace FunctionStart {
   void a(void) {}
   static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
-                                                       // ref-note {{comparison against opaque constant has unspecified value}} \
+                                                       // ref-note {{comparison against opaque constant address '__builtin_function_start(a)' has unspecified value}} \
                                                        // expected-note {{comparison of addresses of literals has unspecified value}}
 }
 
diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index 7df405f0662a15..66d82f1f5eaf25 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -47,7 +47,7 @@ void a(void) {}
 int n;
 void *p = __builtin_function_start(n);               // expected-error {{argument must be a function}}
 static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}}
-// expected-note at -1 {{comparison against opaque constant has unspecified value}}
+// expected-note at -1 {{comparison against opaque constant address '__builtin_function_start(a)' has unspecified value}}
 } // namespace function_start
 
 void no_ms_builtins() {
diff --git a/clang/test/SemaCXX/constant-expression-cxx14.cpp b/clang/test/SemaCXX/constant-expression-cxx14.cpp
index 01326aed8be63d..936d3600953b9a 100644
--- a/clang/test/SemaCXX/constant-expression-cxx14.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx14.cpp
@@ -1320,4 +1320,4 @@ constexpr bool different_in_loop(bool b = false) {
 constexpr bool check = different_in_loop();
   // expected-error at -1 {{}} expected-note at -1 {{in call}}
 
-}
\ No newline at end of file
+}

>From 077f4f892d51eaaef9995e7881ee791a96794246 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 18:13:53 +0000
Subject: [PATCH 07/10] Address some review comments.

---
 clang/lib/AST/ExprConstant.cpp                | 15 ++++++++++++---
 clang/test/AST/ByteCode/builtin-functions.cpp |  2 +-
 clang/test/SemaCXX/builtins.cpp               |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index df1cb9a50cffb9..3b6c02af6137c0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2189,6 +2189,15 @@ static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
 // Determine whether two string literals potentially overlap. This will be the
 // case if they agree on the values of all the bytes on the overlapping region
 // between them.
+//
+// The overlapping region is the portion of the two string literals that must
+// overlap in memory if the pointers actually point to the same address at
+// runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then
+// the overlapping region is "cdef\0", which in this case does agree, so the
+// strings are potentially overlapping. Conversely, for "foobar" + 3 versus
+// "bazbar" + 3, the overlapping region contains all of both strings, so they
+// are not potentially overlapping, even though they agree from the given
+// addresses onwards.
 static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
                                                     const LValue &LHS,
                                                     const LValue &RHS) {
@@ -2201,10 +2210,10 @@ static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
   // within RHS. We don't need to look at the characters of one string that
   // would appear before the start of the other string if they were merged.
   CharUnits Offset = RHS.Offset - LHS.Offset;
-  if (Offset.isPositive())
-    RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
-  else if (Offset.isNegative())
+  if (Offset.isNegative())
     LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
+  else
+    RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
 
   bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
   StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index b0cb39975ce796..48d5c14ce5b88b 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -966,7 +966,7 @@ namespace shufflevector {
 namespace FunctionStart {
   void a(void) {}
   static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
-                                                       // ref-note {{comparison against opaque constant address '__builtin_function_start(a)' has unspecified value}} \
+                                                       // ref-note {{comparison against opaque constant address '&__builtin_function_start(a)' has unspecified value}} \
                                                        // expected-note {{comparison of addresses of literals has unspecified value}}
 }
 
diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index 66d82f1f5eaf25..6b73ca34cde865 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -47,7 +47,7 @@ void a(void) {}
 int n;
 void *p = __builtin_function_start(n);               // expected-error {{argument must be a function}}
 static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}}
-// expected-note at -1 {{comparison against opaque constant address '__builtin_function_start(a)' has unspecified value}}
+// expected-note at -1 {{comparison against opaque constant address '&__builtin_function_start(a)' has unspecified value}}
 } // namespace function_start
 
 void no_ms_builtins() {

>From 4c869c954bf77104896921840e5836c0fbaf8145 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 18:21:40 +0000
Subject: [PATCH 08/10] Add release note.

---
 clang/docs/ReleaseNotes.rst | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d92b59334f8f32..e9a4481ff61a4d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -74,6 +74,22 @@ C++ Specific Potentially Breaking Changes
     template <typename T>
     void f();
 
+- During constant evaluation, comparisons between different evaluations of the
+  same string literal are now correctly treated as non-constant, and comparisons
+  between string literals that cannot possibly overlap in memory are now treated
+  as constant.
+
+  .. code-block:: c++
+
+    constexpr const char *f() { return "hello"; }
+    constexpr const char *g() { return "world"; }
+    // Used to evaluate to false, now error: non-constant comparison.
+    constexpr bool a = f() == f();
+    // Might evaluate to true or false, as before.
+    bool at_runtime() { return f() == f(); }
+    // Was error, now evaluates to false.
+    constexpr bool b = f() == g();
+
 ABI Changes in This Version
 ---------------------------
 

>From f221777e0073e73b64ef1aa3805be77100eaeb91 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 18:38:15 +0000
Subject: [PATCH 09/10] Testing for other opaque constant calls.

Tweak diagnostic to avoid suggesting this is a rule from a language standard.
---
 clang/include/clang/Basic/DiagnosticASTKinds.td |  3 ++-
 clang/test/AST/ByteCode/builtin-functions.cpp   |  2 +-
 clang/test/SemaCXX/builtins.cpp                 | 14 +++++++++++---
 clang/test/SemaCXX/ptrauth-sign-constant.cpp    |  7 +++++++
 4 files changed, 21 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/SemaCXX/ptrauth-sign-constant.cpp

diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 679afc053dfe45..6a658cf14356f5 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -97,7 +97,8 @@ def note_constexpr_pointer_constant_comparison : Note<
 def note_constexpr_literal_comparison : Note<
   "comparison of addresses of literals has unspecified value">;
 def note_constexpr_opaque_call_comparison : Note<
-  "comparison against opaque constant address '%0' has unspecified value">;
+  "comparison against opaque constant address '%0' can only be performed at "
+  "runtime">;
 def note_constexpr_pointer_weak_comparison : Note<
   "comparison against address of weak declaration '%0' can only be performed "
   "at runtime">;
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 48d5c14ce5b88b..18ccee382d44e3 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -966,7 +966,7 @@ namespace shufflevector {
 namespace FunctionStart {
   void a(void) {}
   static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \
-                                                       // ref-note {{comparison against opaque constant address '&__builtin_function_start(a)' has unspecified value}} \
+                                                       // ref-note {{comparison against opaque constant address '&__builtin_function_start(a)'}} \
                                                        // expected-note {{comparison of addresses of literals has unspecified value}}
 }
 
diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp
index 6b73ca34cde865..f99bb87b9cbd40 100644
--- a/clang/test/SemaCXX/builtins.cpp
+++ b/clang/test/SemaCXX/builtins.cpp
@@ -1,13 +1,21 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions
-// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions
+// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions -fptrauth-intrinsics
+// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions -fptrauth-intrinsics
 typedef const struct __CFString * CFStringRef;
 #define CFSTR __builtin___CFStringMakeConstantString
+#define NSSTR __builtin___NSStringMakeConstantString
 
 void f() {
 #if !defined(__MVS__) && !defined(_AIX)
   // Builtin function __builtin___CFStringMakeConstantString is currently
   // unsupported on z/OS and AIX.
   (void)CFStringRef(CFSTR("Hello"));
+
+  constexpr bool a = CFSTR("Hello") == CFSTR("Hello");
+  // expected-error at -1 {{constant expression}}
+  // expected-note at -2 {{comparison against opaque constant address '&__builtin___CFStringMakeConstantString("Hello")'}}
+  constexpr bool b = NSSTR("Hello") == NSSTR("Hello");
+  // expected-error at -1 {{constant expression}}
+  // expected-note at -2 {{comparison against opaque constant address '&__builtin___NSStringMakeConstantString("Hello")'}}
 #endif
 }
 
@@ -47,7 +55,7 @@ void a(void) {}
 int n;
 void *p = __builtin_function_start(n);               // expected-error {{argument must be a function}}
 static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}}
-// expected-note at -1 {{comparison against opaque constant address '&__builtin_function_start(a)' has unspecified value}}
+// expected-note at -1 {{comparison against opaque constant address '&__builtin_function_start(a)'}}
 } // namespace function_start
 
 void no_ms_builtins() {
diff --git a/clang/test/SemaCXX/ptrauth-sign-constant.cpp b/clang/test/SemaCXX/ptrauth-sign-constant.cpp
new file mode 100644
index 00000000000000..396962e33e2fa9
--- /dev/null
+++ b/clang/test/SemaCXX/ptrauth-sign-constant.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -std=c++17 -Wno-vla -fsyntax-only -verify -fptrauth-intrinsics %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -std=c++17 -Wno-vla -fsyntax-only -verify -fptrauth-intrinsics %s
+
+int n;
+constexpr bool compare_result = __builtin_ptrauth_sign_constant(&n, 2, 0) == &n;
+// expected-error at -1 {{constant expression}}
+// expected-note at -2 {{comparison against opaque constant address '&__builtin_ptrauth_sign_constant(&n, 2, 0)'}}
\ No newline at end of file

>From fe947fa1caec83b3b5c38bd43dc4081acfdde10b Mon Sep 17 00:00:00 2001
From: Richard Smith <richard at metafoo.co.uk>
Date: Thu, 19 Sep 2024 18:40:14 +0000
Subject: [PATCH 10/10] Add a comment mentioning the CWG issue.

---
 clang/lib/AST/ExprConstant.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b6c02af6137c0..0e6447d31dc9e8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2198,6 +2198,8 @@ static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
 // "bazbar" + 3, the overlapping region contains all of both strings, so they
 // are not potentially overlapping, even though they agree from the given
 // addresses onwards.
+//
+// See open core issue CWG2765 which is discussing the desired rule here.
 static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
                                                     const LValue &LHS,
                                                     const LValue &RHS) {



More information about the cfe-commits mailing list