[clang] [clang][bytecode] Implement __builtin_constant_p differently (PR #122099)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 8 04:00:46 PST 2025


https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/122099

See the comment at the beginning of `InterpBuiltinConstantP.h` for an explanation.

>From 7fe5944ed5f072c0e74202fd01aa65d87aea3787 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Tue, 7 Jan 2025 13:34:08 +0100
Subject: [PATCH] [clang][bytecode] Implement __builtin_constant_p differently

---
 clang/lib/AST/ByteCode/Compiler.cpp           |   3 +
 clang/lib/AST/ByteCode/Function.h             |   8 +
 clang/lib/AST/ByteCode/Interp.h               |  11 ++
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      |  76 --------
 .../AST/ByteCode/InterpBuiltinConstantP.cpp   | 183 ++++++++++++++++++
 .../lib/AST/ByteCode/InterpBuiltinConstantP.h |  77 ++++++++
 clang/lib/AST/ByteCode/Opcodes.td             |   6 +
 clang/lib/AST/CMakeLists.txt                  |   1 +
 .../test/AST/ByteCode/builtin-constant-p.cpp  |  69 ++++++-
 clang/test/CodeGenCXX/builtin-constant-p.cpp  |   1 +
 10 files changed, 357 insertions(+), 78 deletions(-)
 create mode 100644 clang/lib/AST/ByteCode/InterpBuiltinConstantP.cpp
 create mode 100644 clang/lib/AST/ByteCode/InterpBuiltinConstantP.h

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 036f9608bf3ca1..2ad3514351f357 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4538,6 +4538,9 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
 template <class Emitter>
 bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
                                              unsigned BuiltinID) {
+  if (BuiltinID == Builtin::BI__builtin_constant_p)
+    return this->emitBCP(classifyPrim(E), E->getArg(0), E);
+
   const Function *Func = getFunction(E->getDirectCallee());
   if (!Func)
     return false;
diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h
index 409a80f59f1e94..389a57b2d27601 100644
--- a/clang/lib/AST/ByteCode/Function.h
+++ b/clang/lib/AST/ByteCode/Function.h
@@ -226,6 +226,14 @@ class Function final {
     return ParamTypes[ParamIndex];
   }
 
+  std::optional<unsigned> findParam(const ValueDecl *D) const {
+    for (auto &[K, V] : Params) {
+      if (V.second->asValueDecl() == D)
+        return K;
+    }
+    return std::nullopt;
+  }
+
 private:
   /// Construct a function representing an actual function.
   Function(Program &P, FunctionDeclTy Source, unsigned ArgSize,
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index d2aec69072e04f..4cffacbd14ad9a 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -22,6 +22,7 @@
 #include "Function.h"
 #include "FunctionPointer.h"
 #include "InterpBuiltinBitCast.h"
+#include "InterpBuiltinConstantP.h"
 #include "InterpFrame.h"
 #include "InterpStack.h"
 #include "InterpState.h"
@@ -3040,6 +3041,16 @@ bool GetTypeid(InterpState &S, CodePtr OpPC, const Type *TypePtr,
 bool GetTypeidPtr(InterpState &S, CodePtr OpPC, const Type *TypeInfoType);
 bool DiagTypeid(InterpState &S, CodePtr OpPC);
 
+/// __builtin_constant_p.
+template <PrimType Name, class T = typename PrimConv<Name>::T>
+bool BCP(InterpState &S, CodePtr OpPC, const Expr *E) {
+  BCPVisitor BV{S};
+  BV.VisitStmt(const_cast<Expr *>(E));
+
+  S.Stk.push<T>(T::from(BV.Result));
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Read opcode arguments
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0d52083b069464..57b946bc02707a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1505,77 +1505,6 @@ static bool interp__builtin_ptrauth_string_discriminator(
   return true;
 }
 
-// FIXME: This implementation is not complete.
-// The Compiler instance we create cannot access the current stack frame, local
-// variables, function parameters, etc. We also need protection from
-// side-effects, fatal errors, etc.
-static bool interp__builtin_constant_p(InterpState &S, CodePtr OpPC,
-                                       const InterpFrame *Frame,
-                                       const Function *Func,
-                                       const CallExpr *Call) {
-  const Expr *Arg = Call->getArg(0);
-  QualType ArgType = Arg->getType();
-
-  auto returnInt = [&S, Call](bool Value) -> bool {
-    pushInteger(S, Value, Call->getType());
-    return true;
-  };
-
-  // __builtin_constant_p always has one operand. The rules which gcc follows
-  // are not precisely documented, but are as follows:
-  //
-  //  - If the operand is of integral, floating, complex or enumeration type,
-  //    and can be folded to a known value of that type, it returns 1.
-  //  - If the operand can be folded to a pointer to the first character
-  //    of a string literal (or such a pointer cast to an integral type)
-  //    or to a null pointer or an integer cast to a pointer, it returns 1.
-  //
-  // Otherwise, it returns 0.
-  //
-  // FIXME: GCC also intends to return 1 for literals of aggregate types, but
-  // its support for this did not work prior to GCC 9 and is not yet well
-  // understood.
-  if (ArgType->isIntegralOrEnumerationType() || ArgType->isFloatingType() ||
-      ArgType->isAnyComplexType() || ArgType->isPointerType() ||
-      ArgType->isNullPtrType()) {
-    InterpStack Stk;
-    Compiler<EvalEmitter> C(S.Ctx, S.P, S, Stk);
-    auto Res = C.interpretExpr(Arg, /*ConvertResultToRValue=*/Arg->isGLValue());
-    if (Res.isInvalid()) {
-      C.cleanup();
-      Stk.clear();
-      return returnInt(false);
-    }
-
-    if (!Res.empty()) {
-      const APValue &LV = Res.toAPValue();
-      if (LV.isLValue()) {
-        APValue::LValueBase Base = LV.getLValueBase();
-        if (Base.isNull()) {
-          // A null base is acceptable.
-          return returnInt(true);
-        } else if (const auto *E = Base.dyn_cast<const Expr *>()) {
-          if (!isa<StringLiteral>(E))
-            return returnInt(false);
-          return returnInt(LV.getLValueOffset().isZero());
-        } else if (Base.is<TypeInfoLValue>()) {
-          // Surprisingly, GCC considers __builtin_constant_p(&typeid(int)) to
-          // evaluate to true.
-          return returnInt(true);
-        } else {
-          // Any other base is not constant enough for GCC.
-          return returnInt(false);
-        }
-      }
-    }
-
-    // Otherwise, any constant value is good enough.
-    return returnInt(true);
-  }
-
-  return returnInt(false);
-}
-
 static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
                                          const InterpFrame *Frame,
                                          const Function *Func,
@@ -2483,11 +2412,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return false;
     break;
 
-  case Builtin::BI__builtin_constant_p:
-    if (!interp__builtin_constant_p(S, OpPC, Frame, F, Call))
-      return false;
-    break;
-
   case Builtin::BI__noop:
     pushInteger(S, 0, Call->getType());
     break;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinConstantP.cpp b/clang/lib/AST/ByteCode/InterpBuiltinConstantP.cpp
new file mode 100644
index 00000000000000..441ab962a7744c
--- /dev/null
+++ b/clang/lib/AST/ByteCode/InterpBuiltinConstantP.cpp
@@ -0,0 +1,183 @@
+//===--------------- InterpBuiltinConstantP.cpp -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "InterpBuiltinConstantP.h"
+#include "Compiler.h"
+#include "EvalEmitter.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+
+using namespace clang;
+using namespace interp;
+
+bool BCPVisitor::VisitStmt(Stmt *S) {
+  switch (S->getStmtClass()) {
+  case Stmt::DeclRefExprClass:
+    return VisitDeclRefExpr(cast<DeclRefExpr>(S));
+  case Stmt::ImplicitCastExprClass:
+  case Stmt::CStyleCastExprClass:
+    return VisitCastExpr(cast<CastExpr>(S));
+  case Stmt::CallExprClass:
+    return VisitCallExpr(cast<CallExpr>(S));
+  case Stmt::UnaryOperatorClass:
+    return VisitUnaryOperator(cast<UnaryOperator>(S));
+  case Stmt::BinaryOperatorClass:
+    return VisitBinaryOperator(cast<BinaryOperator>(S));
+  case Stmt::ParenExprClass:
+    return VisitStmt(cast<ParenExpr>(S)->getSubExpr());
+  case Stmt::ConditionalOperatorClass:
+    return VisitConditionalOperator(cast<ConditionalOperator>(S));
+  case Stmt::MemberExprClass:
+    return VisitMemberExpr(cast<MemberExpr>(S));
+
+  case Stmt::IntegerLiteralClass:
+  case Stmt::FloatingLiteralClass:
+  case Stmt::CXXNullPtrLiteralExprClass:
+    return true;
+  default:
+    return Fail();
+  }
+
+  llvm_unreachable("All handled above");
+}
+
+bool BCPVisitor::VisitDeclRefExpr(DeclRefExpr *E) {
+  const ValueDecl *D = E->getDecl();
+
+  // Local variable?
+  if (auto LocalOffset = findLocal(D)) {
+    Result = pointerChainIsLive(Frame->getLocalPointer(*LocalOffset));
+  } else if (auto G = S.P.getGlobal(D)) {
+    // Fine.
+  } else if (auto P = findParam(D)) {
+    Result = pointerChainIsLive(Frame->getParamPointer(*P));
+  } else {
+    Result = false;
+  }
+
+  return Result;
+}
+
+bool BCPVisitor::VisitUnaryOperator(UnaryOperator *E) {
+  switch (E->getOpcode()) {
+  case UO_AddrOf:
+    Result = isa<CXXTypeidExpr>(E->getSubExpr());
+    break;
+  case UO_PostInc:
+  case UO_PreInc:
+  case UO_PostDec:
+  case UO_PreDec:
+    return Fail();
+  default:;
+  }
+  return Result;
+}
+
+bool BCPVisitor::VisitBinaryOperator(BinaryOperator *E) {
+  if (E->isCommaOp())
+    return VisitStmt(E->getRHS());
+
+  return VisitStmt(E->getLHS()) && VisitStmt(E->getRHS());
+}
+
+bool BCPVisitor::VisitCastExpr(CastExpr *E) {
+  if (E->getCastKind() == CK_ToVoid)
+    return Fail();
+  return VisitStmt(E->getSubExpr());
+}
+
+bool BCPVisitor::VisitCallExpr(CallExpr *E) {
+  // FIXME: We're not passing any arguments to the function call.
+  Compiler<EvalEmitter> C(S.getContext(), S.P, S, S.Stk);
+
+  auto OldDiag = S.getEvalStatus().Diag;
+  S.getEvalStatus().Diag = nullptr;
+  auto Res = C.interpretExpr(E, /*ConvertResultToRValue=*/E->isGLValue());
+
+  S.getEvalStatus().Diag = OldDiag;
+  Result = !Res.isInvalid();
+  return Result;
+}
+
+bool BCPVisitor::VisitConditionalOperator(ConditionalOperator *E) {
+  return VisitStmt(E->getCond()) && VisitStmt(E->getTrueExpr()) &&
+         VisitStmt(E->getFalseExpr());
+}
+
+bool BCPVisitor::VisitMemberExpr(MemberExpr *E) {
+  if (!isa<DeclRefExpr>(E->getBase()))
+    return Fail();
+
+  const auto *BaseDecl = cast<DeclRefExpr>(E->getBase())->getDecl();
+  const FieldDecl *FD = dyn_cast<FieldDecl>(E->getMemberDecl());
+  if (!FD)
+    return Fail();
+
+  if (!VisitStmt(E->getBase()))
+    return Fail();
+
+  Pointer BasePtr = getPointer(BaseDecl);
+  const Record *R = BasePtr.getRecord();
+  assert(R);
+  Pointer FieldPtr = BasePtr.atField(R->getField(FD)->Offset);
+  if (!pointerChainIsLive(FieldPtr))
+    return Fail();
+  return true;
+}
+
+std::optional<unsigned> BCPVisitor::findLocal(const ValueDecl *D) const {
+  const auto *Func = Frame->getFunction();
+  if (!Func)
+    return std::nullopt;
+  for (auto &Scope : Func->scopes()) {
+    for (auto &Local : Scope.locals()) {
+      if (Local.Desc->asValueDecl() == D) {
+        return Local.Offset;
+      }
+    }
+  }
+  return std::nullopt;
+}
+
+std::optional<unsigned> BCPVisitor::findParam(const ValueDecl *D) const {
+  const auto *Func = Frame->getFunction();
+  if (!Func || !Frame->Caller)
+    return std::nullopt;
+
+  return Func->findParam(D);
+}
+
+bool BCPVisitor::pointerChainIsLive(const Pointer &P) const {
+  Pointer Ptr = P;
+  for (;;) {
+    if (!Ptr.isLive() || !Ptr.isInitialized() || Ptr.isExtern() ||
+        Ptr.isDummy())
+      return false;
+
+    if (Ptr.isZero())
+      return true;
+
+    const Descriptor *Desc = Ptr.getFieldDesc();
+    if (!Desc->isPrimitive() || Desc->getPrimType() != PT_Ptr)
+      return true;
+
+    Ptr = Ptr.deref<Pointer>();
+  }
+
+  return true;
+}
+
+Pointer BCPVisitor::getPointer(const ValueDecl *D) const {
+  if (auto LocalOffset = findLocal(D))
+    return Frame->getLocalPointer(*LocalOffset);
+  if (auto G = S.P.getGlobal(D))
+    return S.P.getPtrGlobal(*G);
+  if (auto P = findParam(D))
+    return Frame->getParamPointer(*P);
+
+  llvm_unreachable("One of the ifs before should've worked.");
+}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltinConstantP.h b/clang/lib/AST/ByteCode/InterpBuiltinConstantP.h
new file mode 100644
index 00000000000000..fcdd36b450a4bc
--- /dev/null
+++ b/clang/lib/AST/ByteCode/InterpBuiltinConstantP.h
@@ -0,0 +1,77 @@
+//===-------------------- InterpBuiltinConstantP.h --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This is an implementation of __builtin_constant_p.
+//
+// __builtin_constant_p is a GCC extension that is supposed to return 1 if the
+// given expression can be evaluated at compile time. This is a problem for our
+// byte code approach, however.
+//
+// 1) We cannot just keep the expression unevaluated and create a new
+//    Compiler<EvalEmitter> when evaluating the bcp call. This doesn't work
+//    because the expression can refer to variables from the current InterpFrame
+//    or parameters from the function, etc.
+//
+// 2) We have no mechanism to suppress diagnostics and side-effects and to
+//    eventually just record whether the evaluation of an expression was
+//    successful or not, in byte code. If the evaluation fails, it
+//    fails--and will never reach the end of the bcp call. This COULD be
+//    changed, but that means changing how byte code is interpreted
+//    everywhere, just because of one builtin.
+//
+// So, here we implement our own Visitor that basically implements a subset of
+// working operations for the expression passed to __builtin_constant_p.
+//
+// While it is easy to come up with examples where we don't behave correctly,
+// __builtin_constant_p is usually used to check whether a single parameter
+// or variable is known at compile time, so the expressions used in reality
+// are very simple.
+
+#ifndef LLVM_CLANG_AST_INTERP_BUILTIN_CONSTANT_P_H
+#define LLVM_CLANG_AST_INTERP_BUILTIN_CONSTANT_P_H
+
+#include "InterpState.h"
+#include "Pointer.h"
+#include "clang/AST/DynamicRecursiveASTVisitor.h"
+
+namespace clang {
+namespace interp {
+
+class BCPVisitor final : public DynamicRecursiveASTVisitor {
+public:
+  BCPVisitor(InterpState &S) : Frame(S.Current), S(S) {}
+
+  bool VisitStmt(Stmt *S) override;
+  bool VisitDeclRefExpr(DeclRefExpr *E) override;
+  bool VisitUnaryOperator(UnaryOperator *E) override;
+  bool VisitBinaryOperator(BinaryOperator *E) override;
+  bool VisitCastExpr(CastExpr *E) override;
+  bool VisitCallExpr(CallExpr *E) override;
+  bool VisitConditionalOperator(ConditionalOperator *E) override;
+  bool VisitMemberExpr(MemberExpr *E) override;
+
+  bool Result = true;
+
+private:
+  InterpFrame *Frame;
+  InterpState &S;
+
+  std::optional<unsigned> findLocal(const ValueDecl *D) const;
+  std::optional<unsigned> findParam(const ValueDecl *D) const;
+  bool pointerChainIsLive(const Pointer &P) const;
+  Pointer getPointer(const ValueDecl *D) const;
+
+  bool Fail() {
+    Result = false;
+    return false;
+  }
+};
+
+} // namespace interp
+} // namespace clang
+#endif
diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td
index 4b0c902ab29268..45343762d51a29 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -854,3 +854,9 @@ def BitCast : Opcode;
 def GetTypeid : Opcode { let Args = [ArgTypePtr, ArgTypePtr]; }
 def GetTypeidPtr : Opcode { let Args = [ArgTypePtr]; }
 def DiagTypeid : Opcode;
+
+def BCP : Opcode {
+  let Types = [IntegerTypeClass];
+  let Args = [ArgExpr];
+  let HasGroup = 1;
+}
diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index cb13c5225b713b..e126b5931168d3 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -85,6 +85,7 @@ add_clang_library(clangAST
   ByteCode/InterpFrame.cpp
   ByteCode/InterpStack.cpp
   ByteCode/InterpState.cpp
+  ByteCode/InterpBuiltinConstantP.cpp
   ByteCode/Pointer.cpp
   ByteCode/PrimType.cpp
   ByteCode/Program.cpp
diff --git a/clang/test/AST/ByteCode/builtin-constant-p.cpp b/clang/test/AST/ByteCode/builtin-constant-p.cpp
index 62899b60064c28..3f55091b80889f 100644
--- a/clang/test/AST/ByteCode/builtin-constant-p.cpp
+++ b/clang/test/AST/ByteCode/builtin-constant-p.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s
-// RUN: %clang_cc1 -verify=ref,both %s
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both -std=c++20 %s
+// RUN: %clang_cc1 -verify=ref,both -std=c++20 %s
 
+using intptr_t = __INTPTR_TYPE__;
 
 static_assert(__builtin_constant_p(12), "");
 static_assert(__builtin_constant_p(1.0), "");
@@ -18,3 +19,67 @@ constexpr int foo(int &a) {
   return __builtin_constant_p(a);
 }
 static_assert(!foo(z));
+
+constexpr bool Local() {
+  int z = 10;
+  return __builtin_constant_p(z);
+}
+static_assert(Local());
+
+constexpr bool Local2() {
+  int z = 10;
+  return __builtin_constant_p(&z);
+}
+static_assert(!Local2());
+
+constexpr bool Parameter(int a) {
+  return __builtin_constant_p(a);
+}
+static_assert(Parameter(10));
+
+constexpr bool InvalidLocal() {
+  int *z;
+  {
+    int b = 10;
+    z = &b;
+  }
+  return __builtin_constant_p(z);
+}
+static_assert(!InvalidLocal());
+
+template<typename T> constexpr bool bcp(T t) {
+  return __builtin_constant_p(t);
+}
+
+constexpr intptr_t ptr_to_int(const void *p) {
+  return __builtin_constant_p(1) ? (intptr_t)p : (intptr_t)p; // expected-note {{cast that performs the conversions of a reinterpret_cast}}
+}
+
+/// This is from test/SemaCXX/builtin-constant-p.cpp, but it makes no sense.
+/// ptr_to_int is called before bcp(), so it fails. GCC does not accept this either.
+static_assert(bcp(ptr_to_int("foo"))); // expected-error {{not an integral constant expression}} \
+                                       // expected-note {{in call to}}
+
+constexpr bool AndFold(const int &a, const int &b) {
+  return __builtin_constant_p(a && b);
+}
+
+static_assert(AndFold(10, 20));
+static_assert(!AndFold(z, 10));
+static_assert(!AndFold(10, z));
+
+
+struct F {
+  int a;
+};
+
+constexpr F f{12};
+static_assert(__builtin_constant_p(f.a));
+
+constexpr bool Member() {
+  F f;
+  return __builtin_constant_p(f.a);
+}
+static_assert(!Member());
+
+
diff --git a/clang/test/CodeGenCXX/builtin-constant-p.cpp b/clang/test/CodeGenCXX/builtin-constant-p.cpp
index 866faa5ec9761e..39416b94375fe5 100644
--- a/clang/test/CodeGenCXX/builtin-constant-p.cpp
+++ b/clang/test/CodeGenCXX/builtin-constant-p.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
 
 // Don't crash if the argument to __builtin_constant_p isn't scalar.
 template <typename T>



More information about the cfe-commits mailing list