[Mlir-commits] [mlir] [mlir][sparse] fix stack overflow due to memref.alloca in loops (PR #69786)

Peiming Liu llvmlistbot at llvm.org
Fri Oct 20 14:50:02 PDT 2023


https://github.com/PeimingLiu updated https://github.com/llvm/llvm-project/pull/69786

>From e93c52708b543b688fa19d9fb18c3003ba78f7fc Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 19:59:20 +0000
Subject: [PATCH 1/5] [mlir][sparse] fix stack overflow due to alloca
 instruction inside loops

---
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td            |  2 +-
 .../Transforms/SparseTensorConversion.cpp             | 11 ++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
index 6ac0912f6f706c5..d079b1e5d92eece 100644
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -120,7 +120,7 @@ def ExecuteRegionOp : SCF_Op<"execute_region", [
 //===----------------------------------------------------------------------===//
 
 def ForOp : SCF_Op<"for",
-      [AutomaticAllocationScope, DeclareOpInterfaceMethods<LoopLikeOpInterface,
+      [DeclareOpInterfaceMethods<LoopLikeOpInterface,
        ["getInits", "getSingleInductionVar", "getSingleLowerBound",
         "getSingleStep", "getSingleUpperBound", "getYieldedValues",
         "promoteIfSingleIteration", "replaceWithAdditionalYields"]>,
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 73f5e3eeb7d512e..64874c49f6641fb 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -589,6 +589,13 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
     const auto stt = getSparseTensorType(op.getTensor());
     const auto elemTp = stt.getElementType();
     const Level lvlRank = stt.getLvlRank();
+    // We need a alloc scope here as the InsertOp is always generated inside a
+    // loop, which lead to stack overflow.
+    auto allocaScope = rewriter.create<memref::AllocaScopeOp>(
+        loc, adaptor.getTensor().getType());
+    Block *scopeBlock = rewriter.createBlock(&allocaScope.getBodyRegion());
+    rewriter.setInsertionPointToStart(scopeBlock);
+
     auto lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
     auto vref = genAllocaScalar(rewriter, loc, elemTp);
     storeAll(rewriter, loc, lvlCoords, adaptor.getLvlCoords());
@@ -596,7 +603,9 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
     SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)};
     createFuncCall(rewriter, loc, name, {},
                    {adaptor.getTensor(), lvlCoords, vref}, EmitCInterface::On);
-    rewriter.replaceOp(op, adaptor.getTensor());
+    rewriter.create<memref::AllocaScopeReturnOp>(loc, adaptor.getTensor());
+    rewriter.setInsertionPointAfter(allocaScope);
+    rewriter.replaceOp(op, allocaScope.getResult(0));
     return success();
   }
 };

>From c96dd30a2a8ed9981dbc285d4334b762c53bcaf6 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 20:04:03 +0000
Subject: [PATCH 2/5] address comments

---
 .../Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 64874c49f6641fb..c55751e6a4d8135 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -589,7 +589,7 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
     const auto stt = getSparseTensorType(op.getTensor());
     const auto elemTp = stt.getElementType();
     const Level lvlRank = stt.getLvlRank();
-    // We need a alloc scope here as the InsertOp is always generated inside a
+    // We need an alloca scope here as the InsertOp is always generated inside a
     // loop, which lead to stack overflow.
     auto allocaScope = rewriter.create<memref::AllocaScopeOp>(
         loc, adaptor.getTensor().getType());

>From 4652da2f44221b0bba2c8577b971cb8dc96b5268 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 21:41:54 +0000
Subject: [PATCH 3/5] hosit alloca outside the loop instead

---
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td    |  2 +-
 .../Transforms/SparseTensorConversion.cpp     | 23 +++++++++----------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
index d079b1e5d92eece..6ac0912f6f706c5 100644
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -120,7 +120,7 @@ def ExecuteRegionOp : SCF_Op<"execute_region", [
 //===----------------------------------------------------------------------===//
 
 def ForOp : SCF_Op<"for",
-      [DeclareOpInterfaceMethods<LoopLikeOpInterface,
+      [AutomaticAllocationScope, DeclareOpInterfaceMethods<LoopLikeOpInterface,
        ["getInits", "getSingleInductionVar", "getSingleLowerBound",
         "getSingleStep", "getSingleUpperBound", "getYieldedValues",
         "promoteIfSingleIteration", "replaceWithAdditionalYields"]>,
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index c55751e6a4d8135..ba85da140039cab 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -589,23 +589,22 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
     const auto stt = getSparseTensorType(op.getTensor());
     const auto elemTp = stt.getElementType();
     const Level lvlRank = stt.getLvlRank();
-    // We need an alloca scope here as the InsertOp is always generated inside a
-    // loop, which lead to stack overflow.
-    auto allocaScope = rewriter.create<memref::AllocaScopeOp>(
-        loc, adaptor.getTensor().getType());
-    Block *scopeBlock = rewriter.createBlock(&allocaScope.getBodyRegion());
-    rewriter.setInsertionPointToStart(scopeBlock);
-
-    auto lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
-    auto vref = genAllocaScalar(rewriter, loc, elemTp);
+    Value lvlCoords, vref;
+    {
+      OpBuilder::InsertionGuard guard(rewriter);
+      auto loop = op->getParentOfType<LoopLikeOpInterface>();
+      // Hoists alloca outside the loop to avoid stack overflow.
+      rewriter.setInsertionPoint(loop);
+      lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
+      vref = genAllocaScalar(rewriter, loc, elemTp);
+    }
     storeAll(rewriter, loc, lvlCoords, adaptor.getLvlCoords());
     rewriter.create<memref::StoreOp>(loc, adaptor.getValue(), vref);
     SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)};
     createFuncCall(rewriter, loc, name, {},
                    {adaptor.getTensor(), lvlCoords, vref}, EmitCInterface::On);
-    rewriter.create<memref::AllocaScopeReturnOp>(loc, adaptor.getTensor());
-    rewriter.setInsertionPointAfter(allocaScope);
-    rewriter.replaceOp(op, allocaScope.getResult(0));
+
+    rewriter.replaceOp(op, adaptor.getTensor());
     return success();
   }
 };

>From 531ac92a023204bc52196e4e584247a3dee705e0 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 21:48:29 +0000
Subject: [PATCH 4/5] fix bugs

---
 .../SparseTensor/Transforms/SparseTensorConversion.cpp      | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index ba85da140039cab..a711a84938b43ee 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -593,8 +593,10 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
     {
       OpBuilder::InsertionGuard guard(rewriter);
       auto loop = op->getParentOfType<LoopLikeOpInterface>();
-      // Hoists alloca outside the loop to avoid stack overflow.
-      rewriter.setInsertionPoint(loop);
+      if (loop) {
+        // Hoists alloca outside the loop to avoid stack overflow.
+        rewriter.setInsertionPoint(loop);
+      }
       lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
       vref = genAllocaScalar(rewriter, loc, elemTp);
     }

>From 425834b3eadd4e2d50d0dbe805b83669e2c75378 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Fri, 20 Oct 2023 21:49:48 +0000
Subject: [PATCH 5/5] revert unintended change

---
 .../Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index a711a84938b43ee..a1f725333530d90 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -605,7 +605,6 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
     SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)};
     createFuncCall(rewriter, loc, name, {},
                    {adaptor.getTensor(), lvlCoords, vref}, EmitCInterface::On);
-
     rewriter.replaceOp(op, adaptor.getTensor());
     return success();
   }



More information about the Mlir-commits mailing list