[clang] [clang][Interp] Implement __builtin_classify_type (PR #71972)
Timm Baeder via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 10 10:43:19 PST 2023
https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/71972
This adds some infrastructure for unevaluated builtin calls, but the implementation is almost entirely copied from `ExprConstant.cpp`. I'm open for suggestions on how to share the code.
>From 01e541c726de7bd2aca290f51224e2cafcb4494d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 10 Nov 2023 19:33:21 +0100
Subject: [PATCH] [clang][Interp] Implement __builtin_classify_type
---
clang/lib/AST/Interp/ByteCodeEmitter.cpp | 15 +-
clang/lib/AST/Interp/ByteCodeExprGen.cpp | 10 +-
clang/lib/AST/Interp/Function.cpp | 7 +-
clang/lib/AST/Interp/Function.h | 5 +-
clang/lib/AST/Interp/Interp.cpp | 3 +-
clang/lib/AST/Interp/InterpBuiltin.cpp | 193 +++++++++++++++++++
clang/test/Sema/builtin-classify-type.c | 1 +
clang/test/SemaCXX/builtin-classify-type.cpp | 1 +
8 files changed, 222 insertions(+), 13 deletions(-)
diff --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index c8abb7c17a38ba2..89b7708c0c2a12f 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -14,6 +14,7 @@
#include "Program.h"
#include "clang/AST/ASTLambda.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/Basic/Builtins.h"
#include <type_traits>
using namespace clang;
@@ -84,10 +85,16 @@ ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
// Create a handle over the emitted code.
Function *Func = P.getFunction(FuncDecl);
- if (!Func)
- Func = P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
- std::move(ParamDescriptors),
- std::move(ParamOffsets), HasThisPointer, HasRVO);
+ if (!Func) {
+ bool IsUnevaluatedBuiltin = false;
+ if (unsigned BI = FuncDecl->getBuiltinID())
+ IsUnevaluatedBuiltin = Ctx.getASTContext().BuiltinInfo.isUnevaluated(BI);
+
+ Func =
+ P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
+ std::move(ParamDescriptors), std::move(ParamOffsets),
+ HasThisPointer, HasRVO, IsUnevaluatedBuiltin);
+ }
assert(Func);
// For not-yet-defined functions, we only create a Function instance and
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 15a717089660337..f1aa1d22315f658 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -2226,10 +2226,12 @@ bool ByteCodeExprGen<Emitter>::VisitBuiltinCallExpr(const CallExpr *E) {
if (!Func)
return false;
- // Put arguments on the stack.
- for (const auto *Arg : E->arguments()) {
- if (!this->visit(Arg))
- return false;
+ if (!Func->isUnevaluatedBuiltin()) {
+ // Put arguments on the stack.
+ for (const auto *Arg : E->arguments()) {
+ if (!this->visit(Arg))
+ return false;
+ }
}
if (!this->emitCallBI(Func, E, E))
diff --git a/clang/lib/AST/Interp/Function.cpp b/clang/lib/AST/Interp/Function.cpp
index 357aff7fe6229b9..784e6a2df1d8806 100644
--- a/clang/lib/AST/Interp/Function.cpp
+++ b/clang/lib/AST/Interp/Function.cpp
@@ -20,11 +20,12 @@ Function::Function(Program &P, const FunctionDecl *F, unsigned ArgSize,
llvm::SmallVectorImpl<PrimType> &&ParamTypes,
llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
llvm::SmallVectorImpl<unsigned> &&ParamOffsets,
- bool HasThisPointer, bool HasRVO)
+ bool HasThisPointer, bool HasRVO, bool UnevaluatedBuiltin)
: P(P), Loc(F->getBeginLoc()), F(F), ArgSize(ArgSize),
ParamTypes(std::move(ParamTypes)), Params(std::move(Params)),
ParamOffsets(std::move(ParamOffsets)), HasThisPointer(HasThisPointer),
- HasRVO(HasRVO), Variadic(F->isVariadic()) {}
+ HasRVO(HasRVO), Variadic(F->isVariadic()),
+ IsUnevaluatedBuiltin(UnevaluatedBuiltin) {}
Function::ParamDescriptor Function::getParamDescriptor(unsigned Offset) const {
auto It = Params.find(Offset);
@@ -50,7 +51,7 @@ bool Function::isVirtual() const {
}
bool Function::needsRuntimeArgPop(const ASTContext &Ctx) const {
- if (!isBuiltin())
+ if (!isBuiltin() || isUnevaluatedBuiltin())
return false;
return Ctx.BuiltinInfo.hasCustomTypechecking(getBuiltinID());
}
diff --git a/clang/lib/AST/Interp/Function.h b/clang/lib/AST/Interp/Function.h
index be9b1733635f725..cbd17d90ada3944 100644
--- a/clang/lib/AST/Interp/Function.h
+++ b/clang/lib/AST/Interp/Function.h
@@ -179,6 +179,8 @@ class Function final {
bool isBuiltin() const { return F->getBuiltinID() != 0; }
+ bool isUnevaluatedBuiltin() const { return IsUnevaluatedBuiltin; }
+
/// Does this function need its arguments to be classified at runtime
/// rather than at bytecode-compile-time?
bool needsRuntimeArgPop(const ASTContext &Ctx) const;
@@ -195,7 +197,7 @@ class Function final {
llvm::SmallVectorImpl<PrimType> &&ParamTypes,
llvm::DenseMap<unsigned, ParamDescriptor> &&Params,
llvm::SmallVectorImpl<unsigned> &&ParamOffsets, bool HasThisPointer,
- bool HasRVO);
+ bool HasRVO, bool UnevaluatedBuiltin);
/// Sets the code of a function.
void setCode(unsigned NewFrameSize, std::vector<std::byte> &&NewCode,
@@ -254,6 +256,7 @@ class Function final {
bool HasBody = false;
bool Defined = false;
bool Variadic = false;
+ bool IsUnevaluatedBuiltin = false;
public:
/// Dumps the disassembled bytecode to \c llvm::errs().
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 144b674451e353c..9c96eff46171dba 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -145,7 +145,8 @@ void cleanupAfterFunctionCall(InterpState &S, CodePtr OpPC) {
return;
}
- if (S.Current->Caller && CurFunc->isVariadic()) {
+ if (S.Current->Caller && CurFunc->isVariadic() &&
+ !CurFunc->isUnevaluatedBuiltin()) {
// CallExpr we're look for is at the return PC of the current function, i.e.
// in the caller.
// This code path should be executed very rarely.
diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp
index f26d298f5b60045..40e6057618f7109 100644
--- a/clang/lib/AST/Interp/InterpBuiltin.cpp
+++ b/clang/lib/AST/Interp/InterpBuiltin.cpp
@@ -439,6 +439,194 @@ static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC,
return true;
}
+// Values returned by __builtin_classify_type, chosen to match the values
+/// produced by GCC's builtin.
+enum class GCCTypeClass {
+ None = -1,
+ Void = 0,
+ Integer = 1,
+ // GCC reserves 2 for character types, but instead classifies them as
+ // integers.
+ Enum = 3,
+ Bool = 4,
+ Pointer = 5,
+ // GCC reserves 6 for references, but appears to never use it (because
+ // expressions never have reference type, presumably).
+ PointerToDataMember = 7,
+ RealFloat = 8,
+ Complex = 9,
+ // GCC reserves 10 for functions, but does not use it since GCC version 6 due
+ // to decay to pointer. (Prior to version 6 it was only used in C++ mode).
+ // GCC claims to reserve 11 for pointers to member functions, but *actually*
+ // uses 12 for that purpose, same as for a class or struct. Maybe it
+ // internally implements a pointer to member as a struct? Who knows.
+ PointerToMemberFunction = 12, // Not a bug, see above.
+ ClassOrStruct = 12,
+ Union = 13,
+ // GCC reserves 14 for arrays, but does not use it since GCC version 6 due to
+ // decay to pointer. (Prior to version 6 it was only used in C++ mode).
+ // GCC reserves 15 for strings, but actually uses 5 (pointer) for string
+ // literals.
+};
+
+/// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way
+/// as GCC.
+static GCCTypeClass EvaluateBuiltinClassifyType(QualType T,
+ const LangOptions &LangOpts) {
+ assert(!T->isDependentType() && "unexpected dependent type");
+
+ QualType CanTy = T.getCanonicalType();
+
+ switch (CanTy->getTypeClass()) {
+#define TYPE(ID, BASE)
+#define DEPENDENT_TYPE(ID, BASE) case Type::ID:
+#define NON_CANONICAL_TYPE(ID, BASE) case Type::ID:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(ID, BASE) case Type::ID:
+#include "clang/AST/TypeNodes.inc"
+ case Type::Auto:
+ case Type::DeducedTemplateSpecialization:
+ llvm_unreachable("unexpected non-canonical or dependent type");
+
+ case Type::Builtin:
+ switch (cast<BuiltinType>(CanTy)->getKind()) {
+#define BUILTIN_TYPE(ID, SINGLETON_ID)
+#define SIGNED_TYPE(ID, SINGLETON_ID) \
+ case BuiltinType::ID: \
+ return GCCTypeClass::Integer;
+#define FLOATING_TYPE(ID, SINGLETON_ID) \
+ case BuiltinType::ID: \
+ return GCCTypeClass::RealFloat;
+#define PLACEHOLDER_TYPE(ID, SINGLETON_ID) \
+ case BuiltinType::ID: \
+ break;
+#include "clang/AST/BuiltinTypes.def"
+ case BuiltinType::Void:
+ return GCCTypeClass::Void;
+
+ case BuiltinType::Bool:
+ return GCCTypeClass::Bool;
+
+ case BuiltinType::Char_U:
+ case BuiltinType::UChar:
+ case BuiltinType::WChar_U:
+ case BuiltinType::Char8:
+ case BuiltinType::Char16:
+ case BuiltinType::Char32:
+ case BuiltinType::UShort:
+ case BuiltinType::UInt:
+ case BuiltinType::ULong:
+ case BuiltinType::ULongLong:
+ case BuiltinType::UInt128:
+ return GCCTypeClass::Integer;
+
+ case BuiltinType::UShortAccum:
+ case BuiltinType::UAccum:
+ case BuiltinType::ULongAccum:
+ case BuiltinType::UShortFract:
+ case BuiltinType::UFract:
+ case BuiltinType::ULongFract:
+ case BuiltinType::SatUShortAccum:
+ case BuiltinType::SatUAccum:
+ case BuiltinType::SatULongAccum:
+ case BuiltinType::SatUShortFract:
+ case BuiltinType::SatUFract:
+ case BuiltinType::SatULongFract:
+ return GCCTypeClass::None;
+
+ case BuiltinType::NullPtr:
+
+ case BuiltinType::ObjCId:
+ case BuiltinType::ObjCClass:
+ case BuiltinType::ObjCSel:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+ case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
+ case BuiltinType::OCLSampler:
+ case BuiltinType::OCLEvent:
+ case BuiltinType::OCLClkEvent:
+ case BuiltinType::OCLQueue:
+ case BuiltinType::OCLReserveID:
+#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
+#define PPC_VECTOR_TYPE(Name, Id, Size) case BuiltinType::Id:
+#include "clang/Basic/PPCTypes.def"
+#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/RISCVVTypes.def"
+#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/WebAssemblyReferenceTypes.def"
+ return GCCTypeClass::None;
+
+ case BuiltinType::Dependent:
+ llvm_unreachable("unexpected dependent type");
+ };
+ llvm_unreachable("unexpected placeholder type");
+
+ case Type::Enum:
+ return LangOpts.CPlusPlus ? GCCTypeClass::Enum : GCCTypeClass::Integer;
+
+ case Type::Pointer:
+ case Type::ConstantArray:
+ case Type::VariableArray:
+ case Type::IncompleteArray:
+ case Type::FunctionNoProto:
+ case Type::FunctionProto:
+ return GCCTypeClass::Pointer;
+
+ case Type::MemberPointer:
+ return CanTy->isMemberDataPointerType()
+ ? GCCTypeClass::PointerToDataMember
+ : GCCTypeClass::PointerToMemberFunction;
+
+ case Type::Complex:
+ return GCCTypeClass::Complex;
+
+ case Type::Record:
+ return CanTy->isUnionType() ? GCCTypeClass::Union
+ : GCCTypeClass::ClassOrStruct;
+
+ case Type::Atomic:
+ // GCC classifies _Atomic T the same as T.
+ return EvaluateBuiltinClassifyType(
+ CanTy->castAs<AtomicType>()->getValueType(), LangOpts);
+
+ case Type::BlockPointer:
+ case Type::Vector:
+ case Type::ExtVector:
+ case Type::ConstantMatrix:
+ case Type::ObjCObject:
+ case Type::ObjCInterface:
+ case Type::ObjCObjectPointer:
+ case Type::Pipe:
+ case Type::BitInt:
+ // GCC classifies vectors as None. We follow its lead and classify all
+ // other types that don't fit into the regular classification the same way.
+ return GCCTypeClass::None;
+
+ case Type::LValueReference:
+ case Type::RValueReference:
+ llvm_unreachable("invalid type for expression");
+ }
+
+ llvm_unreachable("unexpected type class");
+}
+
+static bool interp__builtin_classify_type(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const Function *Func,
+ const CallExpr *Call) {
+ // This is an unevaluated call, so there are no arguments on the stack.
+ assert(Call->getNumArgs() == 1);
+ const Expr *Arg = Call->getArg(0);
+
+ GCCTypeClass ResultClass =
+ EvaluateBuiltinClassifyType(Arg->getType(), S.getLangOpts());
+ int32_t ReturnVal = static_cast<int32_t>(ResultClass);
+ pushInt(S, ReturnVal);
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
const CallExpr *Call) {
InterpFrame *Frame = S.Current;
@@ -576,6 +764,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
return retInt(S, OpPC, Dummy);
break;
+ case Builtin::BI__builtin_classify_type:
+ if (interp__builtin_classify_type(S, OpPC, Frame, F, Call))
+ return retInt(S, OpPC, Dummy);
+ break;
+
default:
return false;
}
diff --git a/clang/test/Sema/builtin-classify-type.c b/clang/test/Sema/builtin-classify-type.c
index a222ac8af0e32fd..ea96785550c9c72 100644
--- a/clang/test/Sema/builtin-classify-type.c
+++ b/clang/test/Sema/builtin-classify-type.c
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s -fblocks
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fblocks -fexperimental-new-constant-interpreter
// expected-no-diagnostics
diff --git a/clang/test/SemaCXX/builtin-classify-type.cpp b/clang/test/SemaCXX/builtin-classify-type.cpp
index ebc81425e401f11..f8b2ca2b4807cbb 100644
--- a/clang/test/SemaCXX/builtin-classify-type.cpp
+++ b/clang/test/SemaCXX/builtin-classify-type.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsyntax-only -verify -fblocks %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fblocks %s -fexperimental-new-constant-interpreter
// expected-no-diagnostics
More information about the cfe-commits
mailing list