[clang] [clang][bytecode] Use bytecode interpreter in EvaluateCharRangeAsString (PR #138461)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Sun May 4 10:05:38 PDT 2025


https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/138461

This was always using the ast walker.

>From 8d76bc91a3a443b6563cfe26f7558adf73d2f60c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sun, 4 May 2025 07:25:20 +0200
Subject: [PATCH] [clang][bytecode] Use bytecode interpreter in
 EvaluateCharRangeAsString

This was always using the ast walker.
---
 clang/lib/AST/ByteCode/Context.cpp         | 79 ++++++++++++++++++++++
 clang/lib/AST/ByteCode/Context.h           |  9 +++
 clang/lib/AST/ByteCode/EvalEmitter.cpp     | 23 +++++++
 clang/lib/AST/ByteCode/EvalEmitter.h       |  6 ++
 clang/lib/AST/ByteCode/EvaluationResult.h  |  6 +-
 clang/lib/AST/ByteCode/Pointer.h           |  7 ++
 clang/lib/AST/ExprConstant.cpp             |  7 +-
 clang/test/SemaCXX/gnu-asm-constexpr.cpp   |  1 +
 clang/test/SemaCXX/static-assert-cxx26.cpp |  1 +
 9 files changed, 133 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp
index b35b30cc20d81..db9df7abf7a29 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -134,6 +134,85 @@ bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD,
   return true;
 }
 
+template <typename ResultT>
+bool Context::evaluateStringRepr(State &Parent, const Expr *SizeExpr,
+                                 const Expr *PtrExpr, ResultT &Result) {
+  assert(Stk.empty());
+  Compiler<EvalEmitter> C(*this, *P, Parent, Stk);
+
+  // Evaluate size value.
+  APValue SizeValue;
+  if (!evaluateAsRValue(Parent, SizeExpr, SizeValue))
+    return false;
+
+  if (!SizeValue.isInt())
+    return false;
+  uint64_t Size = SizeValue.getInt().getZExtValue();
+
+  auto PtrRes = C.interpretAsPointer(PtrExpr, [&](const Pointer &Ptr) {
+    if (Size == 0) {
+      if constexpr (std::is_same_v<ResultT, APValue>)
+        Result = APValue(APValue::UninitArray{}, 0, 0);
+      return true;
+    }
+
+    if (!Ptr.isLive() || !Ptr.getFieldDesc()->isPrimitiveArray())
+      return false;
+
+    // Must be char.
+    if (Ptr.getFieldDesc()->getElemSize() != 1 /*bytes*/)
+      return false;
+
+    if (Size > Ptr.getNumElems()) {
+      Parent.FFDiag(SizeExpr, diag::note_constexpr_access_past_end) << AK_Read;
+      Size = Ptr.getNumElems();
+    }
+
+    if constexpr (std::is_same_v<ResultT, APValue>) {
+      QualType CharTy = PtrExpr->getType()->getPointeeType();
+      Result = APValue(APValue::UninitArray{}, Size, Size);
+      for (uint64_t I = 0; I != Size; ++I) {
+        if (std::optional<APValue> ElemVal =
+                Ptr.atIndex(I).toRValue(*this, CharTy))
+          Result.getArrayInitializedElt(I) = *ElemVal;
+        else
+          return false;
+      }
+    } else {
+      assert((std::is_same_v<ResultT, std::string>));
+      if (Size < Result.max_size())
+        Result.resize(Size);
+      Result.assign(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size);
+    }
+
+    return true;
+  });
+
+  if (PtrRes.isInvalid()) {
+    C.cleanup();
+    Stk.clear();
+    return false;
+  }
+
+  return true;
+}
+
+bool Context::evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                                const Expr *PtrExpr, APValue &Result) {
+  assert(SizeExpr);
+  assert(PtrExpr);
+
+  return evaluateStringRepr(Parent, SizeExpr, PtrExpr, Result);
+}
+
+bool Context::evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                                const Expr *PtrExpr, std::string &Result) {
+  assert(SizeExpr);
+  assert(PtrExpr);
+
+  return evaluateStringRepr(Parent, SizeExpr, PtrExpr, Result);
+}
+
 const LangOptions &Context::getLangOpts() const { return Ctx.getLangOpts(); }
 
 std::optional<PrimType> Context::classify(QualType T) const {
diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h
index 5a39f40ef3f11..33bc9fad883f8 100644
--- a/clang/lib/AST/ByteCode/Context.h
+++ b/clang/lib/AST/ByteCode/Context.h
@@ -59,6 +59,11 @@ class Context final {
   /// Evaluates a toplevel initializer.
   bool evaluateAsInitializer(State &Parent, const VarDecl *VD, APValue &Result);
 
+  bool evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                         const Expr *PtrExpr, APValue &Result);
+  bool evaluateCharRange(State &Parent, const Expr *SizeExpr,
+                         const Expr *PtrExpr, std::string &Result);
+
   /// Returns the AST context.
   ASTContext &getASTContext() const { return Ctx; }
   /// Returns the language options.
@@ -122,6 +127,10 @@ class Context final {
   /// Runs a function.
   bool Run(State &Parent, const Function *Func);
 
+  template <typename ResultT>
+  bool evaluateStringRepr(State &Parent, const Expr *SizeExpr,
+                          const Expr *PtrExpr, ResultT &Result);
+
   /// Current compilation context.
   ASTContext &Ctx;
   /// Interpreter stack, shared across invocations.
diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp
index 71d688498ffa5..90aca568c9394 100644
--- a/clang/lib/AST/ByteCode/EvalEmitter.cpp
+++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp
@@ -72,6 +72,25 @@ EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD,
   return std::move(this->EvalResult);
 }
 
+EvaluationResult EvalEmitter::interpretAsPointer(const Expr *E,
+                                                 PtrCallback PtrCB) {
+  this->ReturnPointer = true;
+  this->PtrCB = PtrCB;
+
+  S.setEvalLocation(E->getExprLoc());
+  this->ConvertResultToRValue = false;
+  this->CheckFullyInitialized = false;
+  EvalResult.setSource(E);
+
+  if (!this->visitExpr(E, true)) {
+    // EvalResult may already have a result set, but something failed
+    // after that (e.g. evaluating destructors).
+    EvalResult.setInvalid();
+  }
+
+  return std::move(this->EvalResult);
+}
+
 void EvalEmitter::emitLabel(LabelTy Label) { CurrentLabel = Label; }
 
 EvalEmitter::LabelTy EvalEmitter::getLabel() { return NextLabel++; }
@@ -170,6 +189,10 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) {
     return true;
   }
 
+  // If we're returning a raw pointer, call our callback.
+  if (this->ReturnPointer)
+    return this->PtrCB(Ptr);
+
   if (!EvalResult.checkReturnValue(S, Ctx, Ptr, Info))
     return false;
   if (CheckFullyInitialized && !EvalResult.checkFullyInitialized(S, Ptr))
diff --git a/clang/lib/AST/ByteCode/EvalEmitter.h b/clang/lib/AST/ByteCode/EvalEmitter.h
index f53f86c31ec1e..4f4a78f39ef32 100644
--- a/clang/lib/AST/ByteCode/EvalEmitter.h
+++ b/clang/lib/AST/ByteCode/EvalEmitter.h
@@ -32,11 +32,14 @@ class EvalEmitter : public SourceMapper {
   using LabelTy = uint32_t;
   using AddrTy = uintptr_t;
   using Local = Scope::Local;
+  using PtrCallback = llvm::function_ref<bool(const Pointer &)>;
 
   EvaluationResult interpretExpr(const Expr *E,
                                  bool ConvertResultToRValue = false,
                                  bool DestroyToplevelScope = false);
   EvaluationResult interpretDecl(const VarDecl *VD, bool CheckFullyInitialized);
+  /// Interpret the given Expr to a Pointer.
+  EvaluationResult interpretAsPointer(const Expr *E, PtrCallback PtrCB);
 
   /// Clean up all resources.
   void cleanup();
@@ -101,6 +104,9 @@ class EvalEmitter : public SourceMapper {
   /// Whether we should check if the result has been fully
   /// initialized.
   bool CheckFullyInitialized = false;
+  bool ReturnPointer = false;
+
+  PtrCallback PtrCB;
 
   /// Temporaries which require storage.
   llvm::DenseMap<unsigned, std::unique_ptr<char[]>> Locals;
diff --git a/clang/lib/AST/ByteCode/EvaluationResult.h b/clang/lib/AST/ByteCode/EvaluationResult.h
index ef662e3779bc3..3b6c65eff1ef8 100644
--- a/clang/lib/AST/ByteCode/EvaluationResult.h
+++ b/clang/lib/AST/ByteCode/EvaluationResult.h
@@ -61,11 +61,6 @@ class EvaluationResult final {
     Value = std::move(V);
     Kind = RValue;
   }
-  void setPointer(const Pointer P) {
-    assert(empty());
-    Value = P;
-    Kind = LValue;
-  }
   void setFunctionPointer(const FunctionPointer &P) {
     assert(empty());
     Value = P;
@@ -88,6 +83,7 @@ class EvaluationResult final {
   bool isInvalid() const { return Kind == Invalid; }
   bool isLValue() const { return Kind == LValue; }
   bool isRValue() const { return Kind == RValue; }
+  bool isPointer() const { return std::holds_alternative<Pointer>(Value); }
 
   /// Returns an APValue for the evaluation result. The returned
   /// APValue might be an LValue or RValue.
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 5e7c5d69f20da..19770aa3b97bc 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -613,6 +613,13 @@ class Pointer {
 
   const Block *block() const { return asBlockPointer().Pointee; }
 
+  /// If backed by actual data (i.e. a block pointer), return
+  /// an address to that data.
+  const std::byte *getRawAddress() const {
+    assert(isBlockPointer());
+    return asBlockPointer().Pointee->rawData() + Offset;
+  }
+
   /// Returns the index into an array.
   int64_t getIndex() const {
     if (!isBlockPointer())
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index f2e49b9ea669e..441e1f955874a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -18017,10 +18017,14 @@ static bool EvaluateCharRangeAsStringImpl(const Expr *, T &Result,
                                           const Expr *PtrExpression,
                                           ASTContext &Ctx,
                                           Expr::EvalResult &Status) {
-  LValue String;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression);
   Info.InConstantContext = true;
 
+  if (Info.EnableNewConstInterp)
+    return Info.Ctx.getInterpContext().evaluateCharRange(Info, SizeExpression,
+                                                         PtrExpression, Result);
+
+  LValue String;
   FullExpressionRAII Scope(Info);
   APSInt SizeValue;
   if (!::EvaluateInteger(SizeExpression, SizeValue, Info))
@@ -18075,6 +18079,7 @@ bool Expr::EvaluateCharRangeAsString(APValue &Result,
                                      const Expr *SizeExpression,
                                      const Expr *PtrExpression, ASTContext &Ctx,
                                      EvalResult &Status) const {
+
   return EvaluateCharRangeAsStringImpl(this, Result, SizeExpression,
                                        PtrExpression, Ctx, Status);
 }
diff --git a/clang/test/SemaCXX/gnu-asm-constexpr.cpp b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
index 8813b873fab0c..77466df12bdc1 100644
--- a/clang/test/SemaCXX/gnu-asm-constexpr.cpp
+++ b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++26 -triple x86_64-gnu-linux
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++26 -triple x86_64-gnu-linux -fexperimental-new-constant-interpreter
 
 template <bool Leak>
 struct RAIIBase {
diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp
index 7d896d8b365b7..b53c67ee67932 100644
--- a/clang/test/SemaCXX/static-assert-cxx26.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify
+// RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify -fexperimental-new-constant-interpreter
 
 static_assert(true, "");
 static_assert(true, 0); // expected-error {{the message in a static assertion must be a string literal or an object with 'data()' and 'size()' member functions}}



More information about the cfe-commits mailing list