[clang] [clang][Interp] Implement __builtin_classify_type (PR #71972)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 10 10:43:19 PST 2023


https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/71972

This adds some infrastructure for unevaluated builtin calls, but the implementation is almost entirely copied from `ExprConstant.cpp`. I'm open for suggestions on how to share the code.

>From 01e541c726de7bd2aca290f51224e2cafcb4494d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 10 Nov 2023 19:33:21 +0100
Subject: [PATCH] [clang][Interp] Implement __builtin_classify_type

---
 clang/lib/AST/Interp/ByteCodeEmitter.cpp     |  15 +-
 clang/lib/AST/Interp/ByteCodeExprGen.cpp     |  10 +-
 clang/lib/AST/Interp/Function.cpp            |   7 +-
 clang/lib/AST/Interp/Function.h              |   5 +-
 clang/lib/AST/Interp/Interp.cpp              |   3 +-
 clang/lib/AST/Interp/InterpBuiltin.cpp       | 193 +++++++++++++++++++
 clang/test/Sema/builtin-classify-type.c      |   1 +
 clang/test/SemaCXX/builtin-classify-type.cpp |   1 +
 8 files changed, 222 insertions(+), 13 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index c8abb7c17a38ba2..89b7708c0c2a12f 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -14,6 +14,7 @@
 #include "Program.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/Basic/Builtins.h"
 #include <type_traits>
 
 using namespace clang;
@@ -84,10 +85,16 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
 
   // Create a handle over the emitted code.
   Function *Func = P.getFunction(FuncDecl);
-  if (!Func)
-    Func = P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
-                            std::move(ParamDescriptors),
-                            std::move(ParamOffsets), HasThisPointer, HasRVO);
+  if (!Func) {
+    bool IsUnevaluatedBuiltin = false;
+    if (unsigned BI = FuncDecl->getBuiltinID())
+      IsUnevaluatedBuiltin = Ctx.getASTContext().BuiltinInfo.isUnevaluated(BI);
+
+    Func =
+        P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
+                         std::move(ParamDescriptors), std::move(ParamOffsets),
+                         HasThisPointer, HasRVO, IsUnevaluatedBuiltin);
+  }
 
   assert(Func);
   // For not-yet-defined functions, we only create a Function instance and
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 15a717089660337..f1aa1d22315f658 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -2226,10 +2226,12 @@ bool ByteCodeExprGen<Emitter>::VisitBuiltinCallExpr(const CallExpr *E) {
   if (!Func)
     return false;
 
-  // Put arguments on the stack.
-  for (const auto *Arg : E->arguments()) {
-    if (!this->visit(Arg))
-      return false;
+  if (!Func->isUnevaluatedBuiltin()) {
+    // Put arguments on the stack.
+    for (const auto *Arg : E->arguments()) {
+      if (!this->visit(Arg))
+        return false;
+    }
   }
 
   if (!this->emitCallBI(Func, E, E))
diff --git a/clang/lib/AST/Interp/Function.cpp b/clang/lib/AST/Interp/Function.cpp
index 357aff7fe6229b9..784e6a2df1d8806 100644
--- a/clang/lib/AST/Interp/Function.cpp
+++ b/clang/lib/AST/Interp/Function.cpp
@@ -20,11 +20,12 @@ Function::Function(Program &P, const FunctionDecl *F, unsigned ArgSize,
                    llvm::SmallVectorImpl<PrimType> &&ParamTypes,
                    llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
                    llvm::SmallVectorImpl<unsigned> &&ParamOffsets,
-                   bool HasThisPointer, bool HasRVO)
+                   bool HasThisPointer, bool HasRVO, bool UnevaluatedBuiltin)
     : P(P), Loc(F->getBeginLoc()), F(F), ArgSize(ArgSize),
       ParamTypes(std::move(ParamTypes)), Params(std::move(Params)),
       ParamOffsets(std::move(ParamOffsets)), HasThisPointer(HasThisPointer),
-      HasRVO(HasRVO), Variadic(F->isVariadic()) {}
+      HasRVO(HasRVO), Variadic(F->isVariadic()),
+      IsUnevaluatedBuiltin(UnevaluatedBuiltin) {}
 
 Function::ParamDescriptor Function::getParamDescriptor(unsigned Offset) const {
   auto It = Params.find(Offset);
@@ -50,7 +51,7 @@ bool Function::isVirtual() const {
 }
 
 bool Function::needsRuntimeArgPop(const ASTContext &Ctx) const {
-  if (!isBuiltin())
+  if (!isBuiltin() || isUnevaluatedBuiltin())
     return false;
   return Ctx.BuiltinInfo.hasCustomTypechecking(getBuiltinID());
 }
diff --git a/clang/lib/AST/Interp/Function.h b/clang/lib/AST/Interp/Function.h
index be9b1733635f725..cbd17d90ada3944 100644
--- a/clang/lib/AST/Interp/Function.h
+++ b/clang/lib/AST/Interp/Function.h
@@ -179,6 +179,8 @@ class Function final {
 
   bool isBuiltin() const { return F->getBuiltinID() != 0; }
 
+  bool isUnevaluatedBuiltin() const { return IsUnevaluatedBuiltin; }
+
   /// Does this function need its arguments to be classified at runtime
   /// rather than at bytecode-compile-time?
   bool needsRuntimeArgPop(const ASTContext &Ctx) const;
@@ -195,7 +197,7 @@ class Function final {
            llvm::SmallVectorImpl<PrimType> &&ParamTypes,
            llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
            llvm::SmallVectorImpl<unsigned> &&ParamOffsets, bool HasThisPointer,
-           bool HasRVO);
+           bool HasRVO, bool UnevaluatedBuiltin);
 
   /// Sets the code of a function.
   void setCode(unsigned NewFrameSize, std::vector<std::byte> &&NewCode,
@@ -254,6 +256,7 @@ class Function final {
   bool HasBody = false;
   bool Defined = false;
   bool Variadic = false;
+  bool IsUnevaluatedBuiltin = false;
 
 public:
   /// Dumps the disassembled bytecode to \c llvm::errs().
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 144b674451e353c..9c96eff46171dba 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -145,7 +145,8 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) {
     return;
   }
 
-  if (S.Current->Caller && CurFunc->isVariadic()) {
+  if (S.Current->Caller && CurFunc->isVariadic() &&
+      !CurFunc->isUnevaluatedBuiltin()) {
     // CallExpr we're look for is at the return PC of the current function, i.e.
     // in the caller.
     // This code path should be executed very rarely.
diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp
index f26d298f5b60045..40e6057618f7109 100644
--- a/clang/lib/AST/Interp/InterpBuiltin.cpp
+++ b/clang/lib/AST/Interp/InterpBuiltin.cpp
@@ -439,6 +439,194 @@ static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+// Values returned by __builtin_classify_type, chosen to match the values
+/// produced by GCC's builtin.
+enum class GCCTypeClass {
+  None = -1,
+  Void = 0,
+  Integer = 1,
+  // GCC reserves 2 for character types, but instead classifies them as
+  // integers.
+  Enum = 3,
+  Bool = 4,
+  Pointer = 5,
+  // GCC reserves 6 for references, but appears to never use it (because
+  // expressions never have reference type, presumably).
+  PointerToDataMember = 7,
+  RealFloat = 8,
+  Complex = 9,
+  // GCC reserves 10 for functions, but does not use it since GCC version 6 due
+  // to decay to pointer. (Prior to version 6 it was only used in C++ mode).
+  // GCC claims to reserve 11 for pointers to member functions, but *actually*
+  // uses 12 for that purpose, same as for a class or struct. Maybe it
+  // internally implements a pointer to member as a struct?  Who knows.
+  PointerToMemberFunction = 12, // Not a bug, see above.
+  ClassOrStruct = 12,
+  Union = 13,
+  // GCC reserves 14 for arrays, but does not use it since GCC version 6 due to
+  // decay to pointer. (Prior to version 6 it was only used in C++ mode).
+  // GCC reserves 15 for strings, but actually uses 5 (pointer) for string
+  // literals.
+};
+
+/// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way
+/// as GCC.
+static GCCTypeClass EvaluateBuiltinClassifyType(QualType T,
+                                                const LangOptions &LangOpts) {
+  assert(!T->isDependentType() && "unexpected dependent type");
+
+  QualType CanTy = T.getCanonicalType();
+
+  switch (CanTy->getTypeClass()) {
+#define TYPE(ID, BASE)
+#define DEPENDENT_TYPE(ID, BASE) case Type::ID:
+#define NON_CANONICAL_TYPE(ID, BASE) case Type::ID:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(ID, BASE) case Type::ID:
+#include "clang/AST/TypeNodes.inc"
+  case Type::Auto:
+  case Type::DeducedTemplateSpecialization:
+    llvm_unreachable("unexpected non-canonical or dependent type");
+
+  case Type::Builtin:
+    switch (cast<BuiltinType>(CanTy)->getKind()) {
+#define BUILTIN_TYPE(ID, SINGLETON_ID)
+#define SIGNED_TYPE(ID, SINGLETON_ID)                                          \
+  case BuiltinType::ID:                                                        \
+    return GCCTypeClass::Integer;
+#define FLOATING_TYPE(ID, SINGLETON_ID)                                        \
+  case BuiltinType::ID:                                                        \
+    return GCCTypeClass::RealFloat;
+#define PLACEHOLDER_TYPE(ID, SINGLETON_ID)                                     \
+  case BuiltinType::ID:                                                        \
+    break;
+#include "clang/AST/BuiltinTypes.def"
+    case BuiltinType::Void:
+      return GCCTypeClass::Void;
+
+    case BuiltinType::Bool:
+      return GCCTypeClass::Bool;
+
+    case BuiltinType::Char_U:
+    case BuiltinType::UChar:
+    case BuiltinType::WChar_U:
+    case BuiltinType::Char8:
+    case BuiltinType::Char16:
+    case BuiltinType::Char32:
+    case BuiltinType::UShort:
+    case BuiltinType::UInt:
+    case BuiltinType::ULong:
+    case BuiltinType::ULongLong:
+    case BuiltinType::UInt128:
+      return GCCTypeClass::Integer;
+
+    case BuiltinType::UShortAccum:
+    case BuiltinType::UAccum:
+    case BuiltinType::ULongAccum:
+    case BuiltinType::UShortFract:
+    case BuiltinType::UFract:
+    case BuiltinType::ULongFract:
+    case BuiltinType::SatUShortAccum:
+    case BuiltinType::SatUAccum:
+    case BuiltinType::SatULongAccum:
+    case BuiltinType::SatUShortFract:
+    case BuiltinType::SatUFract:
+    case BuiltinType::SatULongFract:
+      return GCCTypeClass::None;
+
+    case BuiltinType::NullPtr:
+
+    case BuiltinType::ObjCId:
+    case BuiltinType::ObjCClass:
+    case BuiltinType::ObjCSel:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLReserveID:
+#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
+#define PPC_VECTOR_TYPE(Name, Id, Size) case BuiltinType::Id:
+#include "clang/Basic/PPCTypes.def"
+#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/RISCVVTypes.def"
+#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/WebAssemblyReferenceTypes.def"
+      return GCCTypeClass::None;
+
+    case BuiltinType::Dependent:
+      llvm_unreachable("unexpected dependent type");
+    };
+    llvm_unreachable("unexpected placeholder type");
+
+  case Type::Enum:
+    return LangOpts.CPlusPlus ? GCCTypeClass::Enum : GCCTypeClass::Integer;
+
+  case Type::Pointer:
+  case Type::ConstantArray:
+  case Type::VariableArray:
+  case Type::IncompleteArray:
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    return GCCTypeClass::Pointer;
+
+  case Type::MemberPointer:
+    return CanTy->isMemberDataPointerType()
+               ? GCCTypeClass::PointerToDataMember
+               : GCCTypeClass::PointerToMemberFunction;
+
+  case Type::Complex:
+    return GCCTypeClass::Complex;
+
+  case Type::Record:
+    return CanTy->isUnionType() ? GCCTypeClass::Union
+                                : GCCTypeClass::ClassOrStruct;
+
+  case Type::Atomic:
+    // GCC classifies _Atomic T the same as T.
+    return EvaluateBuiltinClassifyType(
+        CanTy->castAs<AtomicType>()->getValueType(), LangOpts);
+
+  case Type::BlockPointer:
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::ConstantMatrix:
+  case Type::ObjCObject:
+  case Type::ObjCInterface:
+  case Type::ObjCObjectPointer:
+  case Type::Pipe:
+  case Type::BitInt:
+    // GCC classifies vectors as None. We follow its lead and classify all
+    // other types that don't fit into the regular classification the same way.
+    return GCCTypeClass::None;
+
+  case Type::LValueReference:
+  case Type::RValueReference:
+    llvm_unreachable("invalid type for expression");
+  }
+
+  llvm_unreachable("unexpected type class");
+}
+
+static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
+                                          const InterpFrame *Frame,
+                                          const Function *Func,
+                                          const CallExpr *Call) {
+  // This is an unevaluated call, so there are no arguments on the stack.
+  assert(Call->getNumArgs() == 1);
+  const Expr *Arg = Call->getArg(0);
+
+  GCCTypeClass ResultClass =
+      EvaluateBuiltinClassifyType(Arg->getType(), S.getLangOpts());
+  int32_t ReturnVal = static_cast<int32_t>(ResultClass);
+  pushInt(S, ReturnVal);
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call) {
   InterpFrame *Frame = S.Current;
@@ -576,6 +764,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return retInt(S, OpPC, Dummy);
     break;
 
+  case Builtin::BI__builtin_classify_type:
+    if (interp__builtin_classify_type(S, OpPC, Frame, F, Call))
+      return retInt(S, OpPC, Dummy);
+    break;
+
   default:
     return false;
   }
diff --git a/clang/test/Sema/builtin-classify-type.c b/clang/test/Sema/builtin-classify-type.c
index a222ac8af0e32fd..ea96785550c9c72 100644
--- a/clang/test/Sema/builtin-classify-type.c
+++ b/clang/test/Sema/builtin-classify-type.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -fblocks
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fblocks -fexperimental-new-constant-interpreter
 
 // expected-no-diagnostics
 
diff --git a/clang/test/SemaCXX/builtin-classify-type.cpp b/clang/test/SemaCXX/builtin-classify-type.cpp
index ebc81425e401f11..f8b2ca2b4807cbb 100644
--- a/clang/test/SemaCXX/builtin-classify-type.cpp
+++ b/clang/test/SemaCXX/builtin-classify-type.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fblocks %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fblocks %s -fexperimental-new-constant-interpreter
 
 // expected-no-diagnostics
 



More information about the cfe-commits mailing list