[Mlir-commits] [mlir] 5acd6e0 - [AsyncToLLVM] Align frames to 64 bytes
Benjamin Kramer
llvmlistbot at llvm.org
Mon Jan 17 09:52:27 PST 2022
Author: Benjamin Kramer
Date: 2022-01-17T18:51:42+01:00
New Revision: 5acd6e05221574643feb8477ad07e89fd766ec53
URL: https://github.com/llvm/llvm-project/commit/5acd6e05221574643feb8477ad07e89fd766ec53
DIFF: https://github.com/llvm/llvm-project/commit/5acd6e05221574643feb8477ad07e89fd766ec53.diff
LOG: [AsyncToLLVM] Align frames to 64 bytes
Coroutine lowering always takes the natural alignment when spilling to
the frame (issue #53148) so using AVX2 or AVX512 in a coroutine doesn't
work. Always overalign to 64 bytes to avoid this issue until we have a
better solution.
Differential Revision: https://reviews.llvm.org/D117501
Added:
Modified:
mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index a3ed0e15b6849..7d504709082de 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -14,6 +14,7 @@
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/Async/IR/Async.h"
+#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
@@ -227,37 +228,6 @@ static void addAsyncRuntimeApiDeclarations(ModuleOp module) {
AsyncAPI::awaitAllAndExecuteFunctionType(ctx));
}
-//===----------------------------------------------------------------------===//
-// Add malloc/free declarations to the module.
-//===----------------------------------------------------------------------===//
-
-static constexpr const char *kMalloc = "malloc";
-static constexpr const char *kFree = "free";
-
-static void addLLVMFuncDecl(ModuleOp module, ImplicitLocOpBuilder &builder,
- StringRef name, Type ret, ArrayRef<Type> params) {
- if (module.lookupSymbol(name))
- return;
- Type type = LLVM::LLVMFunctionType::get(ret, params);
- builder.create<LLVM::LLVMFuncOp>(name, type);
-}
-
-/// Adds malloc/free declarations to the module.
-static void addCRuntimeDeclarations(ModuleOp module) {
- using namespace mlir::LLVM;
-
- MLIRContext *ctx = module.getContext();
- auto builder =
- ImplicitLocOpBuilder::atBlockEnd(module.getLoc(), module.getBody());
-
- auto voidTy = LLVMVoidType::get(ctx);
- auto i64 = IntegerType::get(ctx, 64);
- auto i8Ptr = LLVMPointerType::get(IntegerType::get(ctx, 8));
-
- addLLVMFuncDecl(module, builder, kMalloc, i8Ptr, {i64});
- addLLVMFuncDecl(module, builder, kFree, voidTy, {i8Ptr});
-}
-
//===----------------------------------------------------------------------===//
// Coroutine resume function wrapper.
//===----------------------------------------------------------------------===//
@@ -365,11 +335,18 @@ class CoroBeginOpConversion : public OpConversionPattern<CoroBeginOp> {
// Get coroutine frame size: @llvm.coro.size.i64.
auto coroSize =
rewriter.create<LLVM::CoroSizeOp>(loc, rewriter.getI64Type());
+ // The coroutine lowering doesn't properly account for alignment of the
+ // frame, so align everything to 64 bytes which ought to be enough for
+ // everyone. https://llvm.org/PR53148
+ auto coroAlign = rewriter.create<LLVM::ConstantOp>(
+ op->getLoc(), rewriter.getI64Type(), rewriter.getI64IntegerAttr(64));
// Allocate memory for the coroutine frame.
+ auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn(
+ op->getParentOfType<ModuleOp>(), rewriter.getI64Type());
auto coroAlloc = rewriter.create<LLVM::CallOp>(
- loc, i8Ptr, SymbolRefAttr::get(rewriter.getContext(), kMalloc),
- ValueRange(coroSize.getResult()));
+ loc, i8Ptr, SymbolRefAttr::get(allocFuncOp),
+ ValueRange{coroAlign, coroSize.getResult()});
// Begin a coroutine: @llvm.coro.begin.
auto coroId = CoroBeginOpAdaptor(adaptor.getOperands()).id();
@@ -401,9 +378,11 @@ class CoroFreeOpConversion : public OpConversionPattern<CoroFreeOp> {
rewriter.create<LLVM::CoroFreeOp>(loc, i8Ptr, adaptor.getOperands());
// Free the memory.
- rewriter.replaceOpWithNewOp<LLVM::CallOp>(
- op, TypeRange(), SymbolRefAttr::get(rewriter.getContext(), kFree),
- ValueRange(coroMem.getResult()));
+ auto freeFuncOp =
+ LLVM::lookupOrCreateFreeFn(op->getParentOfType<ModuleOp>());
+ rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, TypeRange(),
+ SymbolRefAttr::get(freeFuncOp),
+ ValueRange(coroMem.getResult()));
return success();
}
@@ -968,7 +947,6 @@ void ConvertAsyncToLLVMPass::runOnOperation() {
// We delay adding the resume function until it's needed because it currently
// fails to compile unless '-O0' is specified.
addAsyncRuntimeApiDeclarations(module);
- addCRuntimeDeclarations(module);
// Lower async.runtime and async.coro operations to Async Runtime API and
// LLVM coroutine intrinsics.
diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
index 7e47448e7f3ba..1377854bf3ab4 100644
--- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
@@ -14,7 +14,8 @@ func @coro_begin() {
// CHECK: %[[ID:.*]] = llvm.intr.coro.id
%0 = async.coro.id
// CHECK: %[[SIZE:.*]] = llvm.intr.coro.size : i64
- // CHECK: %[[ALLOC:.*]] = llvm.call @malloc(%[[SIZE]])
+ // CHECK: %[[ALIGN:.*]] = llvm.mlir.constant(64 : i64) : i64
+ // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[SIZE]])
// CHECK: %[[HDL:.*]] = llvm.intr.coro.begin %[[ID]], %[[ALLOC]]
%1 = async.coro.begin %0
return
More information about the Mlir-commits
mailing list