[clang] [CIR] Upstream a basic version of class LexicalScope (PR #131945)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 18 17:55:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangir
Author: David Olsen (dkolsen-pgi)
<details>
<summary>Changes</summary>
Upstream the parts of class `CIRGenFunction::LexicalScope` that implement function return values. There is a bit of other functionality in here, such as the implicit `cir.yield` at the end of a non-function scope, but this is mostly about function returns.
The parts of `LexicalScope` that handle calling destructors, switch statements, ternary expressions, and exception handling still need to be upstreamed.
There is a change in the generated ClangIR (which is why many of the tests needed updating). Return values are stored in the compiler-generated variable `__retval` rather than being passed to the `cir.return` op directly. But there should be no change in the behavior of the generated code.
---
Patch is 47.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131945.diff
12 Files Affected:
- (modified) clang/include/clang/CIR/Dialect/IR/CIROps.td (+16)
- (modified) clang/include/clang/CIR/MissingFeatures.h (+1)
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+178-41)
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+147-9)
- (modified) clang/lib/CIR/CodeGen/CIRGenStmt.cpp (+16-17)
- (modified) clang/test/CIR/CodeGen/basic.c (+38-8)
- (modified) clang/test/CIR/CodeGen/basic.cpp (+16-4)
- (modified) clang/test/CIR/CodeGen/cast.cpp (+7-2)
- (modified) clang/test/CIR/CodeGen/unary.cpp (+15)
- (modified) clang/test/CIR/Lowering/basic.cpp (+16-4)
- (modified) clang/test/CIR/Lowering/func-simple.cpp (+36-15)
- (modified) clang/test/CIR/func-simple.cpp (+58-16)
``````````diff
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 2a5caf1bf1f63..352d72ff31a8a 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -832,6 +832,22 @@ def FuncOp : CIR_Op<"func", [
let hasVerifier = 1;
}
+//===----------------------------------------------------------------------===//
+// UnreachableOp
+//===----------------------------------------------------------------------===//
+
+def UnreachableOp : CIR_Op<"unreachable", [Terminator]> {
+ let summary = "invoke immediate undefined behavior";
+ let description = [{
+ If the program control flow reaches a `cir.unreachable` operation, the
+ program exhibits undefined behavior immediately. This operation is useful
+ in cases where the unreachability of a program point needs to be explicitly
+ marked.
+ }];
+
+ let assemblyFormat = "attr-dict";
+}
+
//===----------------------------------------------------------------------===//
// TrapOp
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 6adff30f5c91a..e37bff1f548c9 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -89,6 +89,7 @@ struct MissingFeatures {
static bool astVarDeclInterface() { return false; }
static bool stackSaveOp() { return false; }
static bool aggValueSlot() { return false; }
+ static bool generateDebugInfo() { return false; }
static bool fpConstraints() { return false; }
static bool sanitizers() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 2338ec9cd952a..5685339c9e637 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -135,6 +135,13 @@ mlir::Location CIRGenFunction::getLoc(mlir::Location lhs, mlir::Location rhs) {
return mlir::FusedLoc::get(locs, metadata, &getMLIRContext());
}
+void CIRGenFunction::emitAndUpdateRetAlloca(QualType type, mlir::Location loc,
+ CharUnits alignment) {
+ if (!type->isVoidType()) {
+ fnRetAlloca = emitAlloca("__retval", convertType(type), loc, alignment);
+ }
+}
+
void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty,
mlir::Location loc, CharUnits alignment,
bool isParam) {
@@ -149,6 +156,118 @@ void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty,
allocaOp.setConstantAttr(mlir::UnitAttr::get(&getMLIRContext()));
}
+void CIRGenFunction::LexicalScope::cleanup() {
+ CIRGenBuilderTy &builder = cgf.builder;
+ LexicalScope *localScope = cgf.currLexScope;
+
+ if (returnBlock != nullptr) {
+ // Write out the return block, which loads the value from `__retval` and
+ // issues the `cir.return`.
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointToEnd(returnBlock);
+ (void)emitReturn(*returnLoc);
+ }
+
+ mlir::Block *currBlock = builder.getBlock();
+ if (isGlobalInit() && !currBlock)
+ return;
+ if (currBlock->mightHaveTerminator() && currBlock->getTerminator())
+ return;
+
+ // Get rid of any empty block at the end of the scope.
+ bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
+ if (!entryBlock && currBlock->empty()) {
+ currBlock->erase();
+ if (returnBlock != nullptr && returnBlock->getUses().empty())
+ returnBlock->erase();
+ return;
+ }
+
+ // Reached the end of the scope.
+ {
+ mlir::OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointToEnd(currBlock);
+
+ if (localScope->depth == 0) {
+ // Reached the end of the function.
+ if (returnBlock != nullptr) {
+ if (returnBlock->getUses().empty())
+ returnBlock->erase();
+ else {
+ builder.create<cir::BrOp>(*returnLoc, returnBlock);
+ return;
+ }
+ }
+ emitImplicitReturn();
+ return;
+ }
+ // Reached the end of a non-function scope. Some scopes, such as those
+ // used with the ?: operator, can return a value.
+ if (!localScope->isTernary() && !currBlock->mightHaveTerminator()) {
+ !retVal ? builder.create<cir::YieldOp>(localScope->endLoc)
+ : builder.create<cir::YieldOp>(localScope->endLoc, retVal);
+ }
+ }
+}
+
+cir::ReturnOp CIRGenFunction::LexicalScope::emitReturn(mlir::Location loc) {
+ CIRGenBuilderTy &builder = cgf.getBuilder();
+
+ if (!cgf.curFn.getFunctionType().hasVoidReturn()) {
+ // Load the value from `__retval` and return it via the `cir.return` op.
+ auto value = builder.create<cir::LoadOp>(
+ loc, cgf.curFn.getFunctionType().getReturnType(), *cgf.fnRetAlloca);
+ return builder.create<cir::ReturnOp>(loc,
+ llvm::ArrayRef(value.getResult()));
+ }
+ return builder.create<cir::ReturnOp>(loc);
+}
+
+// This is copyied from CodeGenModule::MayDropFunctionReturn. This is a
+// candidate for sharing between CIRGen and CodeGen.
+static bool mayDropFunctionReturn(const ASTContext &astContext,
+ QualType returnType) {
+ // We can't just discard the return value for a record type with a complex
+ // destructor or a non-trivially copyable type.
+ if (const RecordType *recordType =
+ returnType.getCanonicalType()->getAs<RecordType>()) {
+ if (const auto *classDecl = dyn_cast<CXXRecordDecl>(recordType->getDecl()))
+ return classDecl->hasTrivialDestructor();
+ }
+ return returnType.isTriviallyCopyableType(astContext);
+}
+
+void CIRGenFunction::LexicalScope::emitImplicitReturn() {
+ CIRGenBuilderTy &builder = cgf.getBuilder();
+ LexicalScope *localScope = cgf.currLexScope;
+
+ const auto *fd = cast<clang::FunctionDecl>(cgf.curGD.getDecl());
+
+ // In C++, flowing off the end of a non-void function is always undefined
+ // behavior. In C, flowing off the end of a non-void function is undefined
+ // behavior only if the non-existent return value is used by the caller.
+ // That influences whether the terminating op is trap, unreachable, or
+ // return.
+ if (cgf.getLangOpts().CPlusPlus && !fd->hasImplicitReturnZero() &&
+ !cgf.sawAsmBlock && !fd->getReturnType()->isVoidType() &&
+ builder.getInsertionBlock()) {
+ bool shouldEmitUnreachable =
+ cgf.cgm.getCodeGenOpts().StrictReturn ||
+ !mayDropFunctionReturn(fd->getASTContext(), fd->getReturnType());
+
+ if (shouldEmitUnreachable) {
+ if (cgf.cgm.getCodeGenOpts().OptimizationLevel == 0)
+ builder.create<cir::TrapOp>(localScope->endLoc);
+ else
+ builder.create<cir::UnreachableOp>(localScope->endLoc);
+ builder.clearInsertionPoint();
+ return;
+ }
+ }
+
+ (void)emitReturn(localScope->endLoc);
+}
+
void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType,
cir::FuncOp fn, cir::FuncType funcType,
FunctionArgList args, SourceLocation loc,
@@ -156,7 +275,6 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType,
assert(!curFn &&
"CIRGenFunction can only be used for one function at a time");
- fnRetTy = returnType;
curFn = fn;
const auto *fd = dyn_cast_or_null<FunctionDecl>(gd.getDecl());
@@ -194,6 +312,12 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType,
builder.CIRBaseBuilderTy::createStore(fnBodyBegin, paramVal, addrVal);
}
assert(builder.getInsertionBlock() && "Should be valid");
+
+ // When the current function is not void, create an address to store the
+ // result value.
+ if (!returnType->isVoidType())
+ emitAndUpdateRetAlloca(returnType, getLoc(fd->getBody()->getEndLoc()),
+ getContext().getTypeAlignInChars(returnType));
}
void CIRGenFunction::finishFunction(SourceLocation endLoc) {}
@@ -208,9 +332,24 @@ mlir::LogicalResult CIRGenFunction::emitFunctionBody(const clang::Stmt *body) {
return result;
}
+static void eraseEmptyAndUnusedBlocks(cir::FuncOp func) {
+ // Remove any leftover blocks that are unreachable and empty, since they do
+ // not represent unreachable code useful for warnings nor anything deemed
+ // useful in general.
+ SmallVector<mlir::Block *> blocksToDelete;
+ for (mlir::Block &block : func.getBlocks()) {
+ if (block.empty() && block.getUses().empty())
+ blocksToDelete.push_back(&block);
+ }
+ for (mlir::Block *block : blocksToDelete)
+ block->erase();
+}
+
cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
cir::FuncType funcType) {
const auto funcDecl = cast<FunctionDecl>(gd.getDecl());
+ curGD = gd;
+
SourceLocation loc = funcDecl->getLocation();
Stmt *body = funcDecl->getBody();
SourceRange bodyRange =
@@ -219,55 +358,53 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
SourceLocRAIIObject fnLoc{*this, loc.isValid() ? getLoc(loc)
: builder.getUnknownLoc()};
- // This will be used once more code is upstreamed.
- [[maybe_unused]] mlir::Block *entryBB = fn.addEntryBlock();
+ auto validMLIRLoc = [&](clang::SourceLocation clangLoc) {
+ return clangLoc.isValid() ? getLoc(clangLoc) : builder.getUnknownLoc();
+ };
+ const mlir::Location fusedLoc = mlir::FusedLoc::get(
+ &getMLIRContext(),
+ {validMLIRLoc(bodyRange.getBegin()), validMLIRLoc(bodyRange.getEnd())});
+ mlir::Block *entryBB = fn.addEntryBlock();
FunctionArgList args;
QualType retTy = buildFunctionArgList(gd, args);
- startFunction(gd, retTy, fn, funcType, args, loc, bodyRange.getBegin());
-
- if (isa<CXXDestructorDecl>(funcDecl))
- getCIRGenModule().errorNYI(bodyRange, "C++ destructor definition");
- else if (isa<CXXConstructorDecl>(funcDecl))
- getCIRGenModule().errorNYI(bodyRange, "C++ constructor definition");
- else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
- funcDecl->hasAttr<CUDAGlobalAttr>())
- getCIRGenModule().errorNYI(bodyRange, "CUDA kernel");
- else if (isa<CXXMethodDecl>(funcDecl) &&
- cast<CXXMethodDecl>(funcDecl)->isLambdaStaticInvoker())
- getCIRGenModule().errorNYI(bodyRange, "Lambda static invoker");
- else if (funcDecl->isDefaulted() && isa<CXXMethodDecl>(funcDecl) &&
- (cast<CXXMethodDecl>(funcDecl)->isCopyAssignmentOperator() ||
- cast<CXXMethodDecl>(funcDecl)->isMoveAssignmentOperator()))
- getCIRGenModule().errorNYI(bodyRange, "Default assignment operator");
- else if (body) {
- if (mlir::failed(emitFunctionBody(body))) {
- fn.erase();
- return nullptr;
- }
- } else
- llvm_unreachable("no definition for normal function");
-
- // This code to insert a cir.return or cir.trap at the end of the function is
- // temporary until the function return code, including
- // CIRGenFunction::LexicalScope::emitImplicitReturn(), is upstreamed.
- mlir::Block &lastBlock = fn.getRegion().back();
- if (lastBlock.empty() || !lastBlock.mightHaveTerminator() ||
- !lastBlock.getTerminator()->hasTrait<mlir::OpTrait::IsTerminator>()) {
- builder.setInsertionPointToEnd(&lastBlock);
- if (mlir::isa<cir::VoidType>(funcType.getReturnType())) {
- builder.create<cir::ReturnOp>(getLoc(bodyRange.getEnd()));
+ {
+ LexicalScope lexScope(*this, fusedLoc, entryBB);
+
+ startFunction(gd, retTy, fn, funcType, args, loc, bodyRange.getBegin());
+
+ if (isa<CXXDestructorDecl>(funcDecl))
+ getCIRGenModule().errorNYI(bodyRange, "C++ destructor definition");
+ else if (isa<CXXConstructorDecl>(funcDecl))
+ getCIRGenModule().errorNYI(bodyRange, "C++ constructor definition");
+ else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
+ funcDecl->hasAttr<CUDAGlobalAttr>())
+ getCIRGenModule().errorNYI(bodyRange, "CUDA kernel");
+ else if (isa<CXXMethodDecl>(funcDecl) &&
+ cast<CXXMethodDecl>(funcDecl)->isLambdaStaticInvoker())
+ getCIRGenModule().errorNYI(bodyRange, "Lambda static invoker");
+ else if (funcDecl->isDefaulted() && isa<CXXMethodDecl>(funcDecl) &&
+ (cast<CXXMethodDecl>(funcDecl)->isCopyAssignmentOperator() ||
+ cast<CXXMethodDecl>(funcDecl)->isMoveAssignmentOperator()))
+ getCIRGenModule().errorNYI(bodyRange, "Default assignment operator");
+ else if (body) {
+ if (mlir::failed(emitFunctionBody(body))) {
+ fn.erase();
+ return nullptr;
+ }
} else {
- builder.create<cir::TrapOp>(getLoc(bodyRange.getEnd()));
+ // Anything without a body should have been handled above.
+ llvm_unreachable("no definition for normal function");
}
- }
- if (mlir::failed(fn.verifyBody()))
- return nullptr;
+ if (mlir::failed(fn.verifyBody()))
+ return nullptr;
- finishFunction(bodyRange.getEnd());
+ finishFunction(bodyRange.getEnd());
+ }
+ eraseEmptyAndUnusedBlocks(fn);
return fn;
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index f931da32d51db..b52f5ec734f70 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -49,11 +49,15 @@ class CIRGenFunction : public CIRGenTypeCache {
CIRGenBuilderTy &builder;
public:
- clang::QualType fnRetTy;
+ /// The GlobalDecl for the current function being compiled or the global
+ /// variable currently being initialized.
+ clang::GlobalDecl curGD;
- /// This is the current function or global initializer that is generated code
- /// for.
- mlir::Operation *curFn = nullptr;
+ /// The compiler-generated variable that holds the return value.
+ std::optional<mlir::Value> fnRetAlloca;
+
+ /// The function for which code is currently being generated.
+ cir::FuncOp curFn;
using DeclMapTy = llvm::DenseMap<const clang::Decl *, Address>;
/// This keeps track of the CIR allocas or globals for local C
@@ -67,15 +71,15 @@ class CIRGenFunction : public CIRGenTypeCache {
CIRGenModule &getCIRGenModule() { return cgm; }
const CIRGenModule &getCIRGenModule() const { return cgm; }
- mlir::Block *getCurFunctionEntryBlock() {
- auto fn = mlir::dyn_cast<cir::FuncOp>(curFn);
- assert(fn && "other callables NYI");
- return &fn.getRegion().front();
- }
+ mlir::Block *getCurFunctionEntryBlock() { return &curFn.getRegion().front(); }
/// Sanitizers enabled for this function.
clang::SanitizerSet sanOpts;
+ /// Whether or not a Microsoft-style asm block has been processed within
+ /// this fuction. These can potentially set the return value.
+ bool sawAsmBlock = false;
+
mlir::Type convertTypeForMem(QualType T);
mlir::Type convertType(clang::QualType T);
@@ -131,6 +135,9 @@ class CIRGenFunction : public CIRGenTypeCache {
~VarDeclContext() { restore(); }
};
+ void emitAndUpdateRetAlloca(clang::QualType type, mlir::Location loc,
+ clang::CharUnits alignment);
+
public:
/// Use to track source locations across nested visitor traversals.
/// Always use a `SourceLocRAIIObject` to change currSrcLoc.
@@ -330,9 +337,140 @@ class CIRGenFunction : public CIRGenTypeCache {
FunctionArgList args, clang::SourceLocation loc,
clang::SourceLocation startLoc);
+ /// Represents a scope, including function bodies, compound statements, and
+ /// the substatements of if/while/do/for/switch/try statements. This class
+ /// handles any automatic cleanup, along with the return value.
+ struct LexicalScope {
+ private:
+ // TODO(CIR): This will live in the base class RunCleanupScope once that
+ // class is upstreamed.
+ CIRGenFunction &cgf;
+
+ // Block containing cleanup code for things initialized in this lexical
+ // context (scope).
+ mlir::Block *cleanupBlock = nullptr;
+
+ // Points to the scope entry block. This is useful, for instance, for
+ // helping to insert allocas before finalizing any recursive CodeGen from
+ // switches.
+ mlir::Block *entryBlock;
+
+ LexicalScope *parentScope = nullptr;
+
+ // Only Regular is used at the moment. Support for other kinds will be
+ // added as the relevant statements/expressions are upstreamed.
+ enum Kind {
+ Regular, // cir.if, cir.scope, if_regions
+ Ternary, // cir.ternary
+ Switch, // cir.switch
+ Try, // cir.try
+ GlobalInit // cir.global initialization code
+ };
+ Kind scopeKind = Kind::Regular;
+
+ // The scope return value.
+ mlir::Value retVal = nullptr;
+
+ mlir::Location beginLoc;
+ mlir::Location endLoc;
+
+ public:
+ unsigned depth = 0;
+
+ LexicalScope(CIRGenFunction &cgf, mlir::Location loc, mlir::Block *eb)
+ : cgf(cgf), entryBlock(eb), parentScope(cgf.currLexScope),
+ beginLoc(loc), endLoc(loc) {
+
+ assert(entryBlock && "LexicalScope requires an entry block");
+ cgf.currLexScope = this;
+ if (parentScope)
+ ++depth;
+
+ if (const auto fusedLoc = mlir::dyn_cast<mlir::FusedLoc>(loc)) {
+ assert(fusedLoc.getLocations().size() == 2 && "too many locations");
+ beginLoc = fusedLoc.getLocations()[0];
+ endLoc = fusedLoc.getLocations()[1];
+ }
+ }
+
+ void setRetVal(mlir::Value v) { retVal = v; }
+
+ void cleanup();
+ void restore() { cgf.currLexScope = parentScope; }
+
+ ~LexicalScope() {
+ assert(!cir::MissingFeatures::generateDebugInfo());
+ cleanup();
+ restore();
+ }
+
+ // ---
+ // Kind
+ // ---
+ bool isGlobalInit() { return scopeKind == Kind::GlobalInit; }
+ bool isRegular() { return scopeKind == Kind::Regular; }
+ bool isSwitch() { return scopeKind == Kind::Switch; }
+ bool isTernary() { return scopeKind == Kind::Ternary; }
+ bool isTry() { return scopeKind == Kind::Try; }
+
+ void setAsGlobalInit() { scopeKind = Kind::GlobalInit; }
+ void setAsSwitch() { scopeKind = Kind::Switch; }
+ void setAsTernary() { scopeKind = Kind::Ternary; }
+
+ // ---
+ // Return handling.
+ // ---
+
+ private:
+ // `returnBlock`, `returnLoc`, and all the functions that deal with them
+ // will change and become more complicated when `switch` statements are
+ // upstreamed. `case` statements within the `switch` are in the same scope
+ // but have their own regions. Therefore the LexicalScope will need to
+ // keep track of multiple return blocks.
+ mlir::Block *returnBlock = nullptr;
+ std::optional<mlir::Location> returnLoc;
+
+ // See the comment on `getOrCreateRetBlock`.
+ mlir::Block *createRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
+ assert(returnBlock == nullptr && "only one return block per scope");
+ // Create the cleanup block but don't hook it up just yet.
+ mlir::OpBuilder::InsertionGuard guard(cgf.builder);
+ returnBlock =
+ cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
+ updateRetLoc(returnBlock, loc);
+ return returnBlock;
+ }
+
+ cir::ReturnOp emitReturn(mlir::Location loc);
+ void emitImplicitReturn();
+
+ public:
+ mlir::Block *getRetBlock() { return returnBlock; }
+ mlir::Location getRetLoc(mlir::Block *b) { return *returnLoc; }
+ void updateRetLoc(mlir::Block *b, mlir::Location loc) { returnLoc = loc; }
+
+ // Create the return block for this scope, or return the existing one.
+ // This get-or-create logic is necessary to handle multiple return
+ // statements within the same scope, which can happen if some of them are
+ // dead code or if there is a `goto` into the middle of the scope.
+ mlir::Block *getOrCreateRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
+ if (returnBlock == nullptr) {
+ returnBlock = createRetBlock(cgf, loc);
+ return returnBlock;
+ }
+ updateRetLoc(returnBlock, loc);
+ return returnBlock;
+ }
+
+ mlir::Block *getEntryBlock() { return entryBlock; }
+ };
+
+ LexicalScope *currLexScope = nullptr;
+
Address createTempAlloca(mlir::Type ty, CharUnits align, mlir::Location loc,
const Tw...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131945
More information about the cfe-commits
mailing list