[Mlir-commits] [mlir] [mlir][sparse] fix stack overflow due to memref.alloca in loops (PR #69786)
Peiming Liu
llvmlistbot at llvm.org
Fri Oct 20 14:48:44 PDT 2023
https://github.com/PeimingLiu updated https://github.com/llvm/llvm-project/pull/69786
>From e93c52708b543b688fa19d9fb18c3003ba78f7fc Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 19:59:20 +0000
Subject: [PATCH 1/4] [mlir][sparse] fix stack overflow due to alloca
instruction inside loops
---
mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 2 +-
.../Transforms/SparseTensorConversion.cpp | 11 ++++++++++-
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
index 6ac0912f6f706c5..d079b1e5d92eece 100644
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -120,7 +120,7 @@ def ExecuteRegionOp : SCF_Op<"execute_region", [
//===----------------------------------------------------------------------===//
def ForOp : SCF_Op<"for",
- [AutomaticAllocationScope, DeclareOpInterfaceMethods<LoopLikeOpInterface,
+ [DeclareOpInterfaceMethods<LoopLikeOpInterface,
["getInits", "getSingleInductionVar", "getSingleLowerBound",
"getSingleStep", "getSingleUpperBound", "getYieldedValues",
"promoteIfSingleIteration", "replaceWithAdditionalYields"]>,
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 73f5e3eeb7d512e..64874c49f6641fb 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -589,6 +589,13 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
const auto stt = getSparseTensorType(op.getTensor());
const auto elemTp = stt.getElementType();
const Level lvlRank = stt.getLvlRank();
+ // We need a alloc scope here as the InsertOp is always generated inside a
+ // loop, which lead to stack overflow.
+ auto allocaScope = rewriter.create<memref::AllocaScopeOp>(
+ loc, adaptor.getTensor().getType());
+ Block *scopeBlock = rewriter.createBlock(&allocaScope.getBodyRegion());
+ rewriter.setInsertionPointToStart(scopeBlock);
+
auto lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
auto vref = genAllocaScalar(rewriter, loc, elemTp);
storeAll(rewriter, loc, lvlCoords, adaptor.getLvlCoords());
@@ -596,7 +603,9 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)};
createFuncCall(rewriter, loc, name, {},
{adaptor.getTensor(), lvlCoords, vref}, EmitCInterface::On);
- rewriter.replaceOp(op, adaptor.getTensor());
+ rewriter.create<memref::AllocaScopeReturnOp>(loc, adaptor.getTensor());
+ rewriter.setInsertionPointAfter(allocaScope);
+ rewriter.replaceOp(op, allocaScope.getResult(0));
return success();
}
};
>From c96dd30a2a8ed9981dbc285d4334b762c53bcaf6 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 20:04:03 +0000
Subject: [PATCH 2/4] address comments
---
.../Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 64874c49f6641fb..c55751e6a4d8135 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -589,7 +589,7 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
const auto stt = getSparseTensorType(op.getTensor());
const auto elemTp = stt.getElementType();
const Level lvlRank = stt.getLvlRank();
- // We need a alloc scope here as the InsertOp is always generated inside a
+ // We need an alloca scope here as the InsertOp is always generated inside a
// loop, which lead to stack overflow.
auto allocaScope = rewriter.create<memref::AllocaScopeOp>(
loc, adaptor.getTensor().getType());
>From 4652da2f44221b0bba2c8577b971cb8dc96b5268 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 21:41:54 +0000
Subject: [PATCH 3/4] hosit alloca outside the loop instead
---
mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 2 +-
.../Transforms/SparseTensorConversion.cpp | 23 +++++++++----------
2 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
index d079b1e5d92eece..6ac0912f6f706c5 100644
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -120,7 +120,7 @@ def ExecuteRegionOp : SCF_Op<"execute_region", [
//===----------------------------------------------------------------------===//
def ForOp : SCF_Op<"for",
- [DeclareOpInterfaceMethods<LoopLikeOpInterface,
+ [AutomaticAllocationScope, DeclareOpInterfaceMethods<LoopLikeOpInterface,
["getInits", "getSingleInductionVar", "getSingleLowerBound",
"getSingleStep", "getSingleUpperBound", "getYieldedValues",
"promoteIfSingleIteration", "replaceWithAdditionalYields"]>,
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index c55751e6a4d8135..ba85da140039cab 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -589,23 +589,22 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
const auto stt = getSparseTensorType(op.getTensor());
const auto elemTp = stt.getElementType();
const Level lvlRank = stt.getLvlRank();
- // We need an alloca scope here as the InsertOp is always generated inside a
- // loop, which lead to stack overflow.
- auto allocaScope = rewriter.create<memref::AllocaScopeOp>(
- loc, adaptor.getTensor().getType());
- Block *scopeBlock = rewriter.createBlock(&allocaScope.getBodyRegion());
- rewriter.setInsertionPointToStart(scopeBlock);
-
- auto lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
- auto vref = genAllocaScalar(rewriter, loc, elemTp);
+ Value lvlCoords, vref;
+ {
+ OpBuilder::InsertionGuard guard(rewriter);
+ auto loop = op->getParentOfType<LoopLikeOpInterface>();
+ // Hoists alloca outside the loop to avoid stack overflow.
+ rewriter.setInsertionPoint(loop);
+ lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
+ vref = genAllocaScalar(rewriter, loc, elemTp);
+ }
storeAll(rewriter, loc, lvlCoords, adaptor.getLvlCoords());
rewriter.create<memref::StoreOp>(loc, adaptor.getValue(), vref);
SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)};
createFuncCall(rewriter, loc, name, {},
{adaptor.getTensor(), lvlCoords, vref}, EmitCInterface::On);
- rewriter.create<memref::AllocaScopeReturnOp>(loc, adaptor.getTensor());
- rewriter.setInsertionPointAfter(allocaScope);
- rewriter.replaceOp(op, allocaScope.getResult(0));
+
+ rewriter.replaceOp(op, adaptor.getTensor());
return success();
}
};
>From 531ac92a023204bc52196e4e584247a3dee705e0 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 21:48:29 +0000
Subject: [PATCH 4/4] fix bugs
---
.../SparseTensor/Transforms/SparseTensorConversion.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index ba85da140039cab..a711a84938b43ee 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -593,8 +593,10 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
{
OpBuilder::InsertionGuard guard(rewriter);
auto loop = op->getParentOfType<LoopLikeOpInterface>();
- // Hoists alloca outside the loop to avoid stack overflow.
- rewriter.setInsertionPoint(loop);
+ if (loop) {
+ // Hoists alloca outside the loop to avoid stack overflow.
+ rewriter.setInsertionPoint(loop);
+ }
lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
vref = genAllocaScalar(rewriter, loc, elemTp);
}
More information about the Mlir-commits
mailing list