[Mlir-commits] [mlir] [MLIR][LLVMIR] Fix inline byval alloca hoisting out of allocation scope (PR #185399)

Berke Ates llvmlistbot at llvm.org
Mon Mar 9 06:09:16 PDT 2026


https://github.com/Berke-Ates updated https://github.com/llvm/llvm-project/pull/185399

>From 17da49376c712303d3f87137d037cfb9609787c3 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 13:12:18 +0100
Subject: [PATCH 1/2] [MLIR][LLVMIR] Fix inline byval alloca hoisting out of
 allocation scope

---
 .../Transforms/InlinerInterfaceImpl.cpp       | 22 ++++++++++++---
 mlir/test/Dialect/LLVMIR/inlining.mlir        | 27 +++++++++++++++++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index 0e43480e82926..baaae51ef5a21 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -603,10 +603,26 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
   // Allocate the new value on the stack.
   Value allocaOp;
   {
-    // Since this is a static alloca, we can put it directly in the entry block,
-    // so they can be absorbed into the prologue/epilogue at code generation.
+    // Walk up from the call site to find the innermost AutomaticAllocationScope
+    // (e.g. an llvm.func or scf.forall). Placing the alloca at the entry block
+    // of that scope keeps it inside parallel regions rather than hoisting it
+    // out, while still landing at the function entry block for the common
+    // non-parallel case.
     OpBuilder::InsertionGuard insertionGuard(builder);
-    Block *entryBlock = &(*argument.getParentRegion()->begin());
+    Block *entryBlock = nullptr;
+    Block *cursor = builder.getInsertionBlock();
+    while (cursor) {
+      Operation *parentOp = cursor->getParentOp();
+      if (!parentOp)
+        break;
+      if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>()) {
+        entryBlock = &cursor->getParent()->front();
+        break;
+      }
+      cursor = parentOp->getBlock();
+    }
+    if (!entryBlock)
+      entryBlock = &(*argument.getParentRegion()->begin());
     builder.setInsertionPointToStart(entryBlock);
     Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
                                          builder.getI64IntegerAttr(1));
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index 70ce7ca20986b..e84a4a45ca45b 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -570,6 +570,33 @@ llvm.func @test_byval_global() {
 
 // -----
 
+// Check that inlining does not hoist byval allocas out of automatic allocation
+// scopes, such as parallel forall regions. Each parallel iteration must have
+// its own private copy of the byval argument.
+
+llvm.func @byval_in_parallel(%ptr : !llvm.ptr { llvm.byval = f32 }) {
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @test_byval_in_parallel_region
+// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
+llvm.func @test_byval_in_parallel_region(%ptr : !llvm.ptr) {
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c1 = arith.constant 1 : index
+  // Verify the alloca is not hoisted out of the parallel region.
+  // CHECK-NOT: llvm.alloca
+  // CHECK: scf.forall
+  scf.forall (%i) = (%c0) to (%c4) step (%c1) {
+    // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f32
+    // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
+    llvm.call @byval_in_parallel(%ptr) : (!llvm.ptr) -> ()
+  }
+  llvm.return
+}
+
+// -----
+
 llvm.func @ignored_attrs(%ptr : !llvm.ptr { llvm.inreg, llvm.nocapture, llvm.nofree, llvm.preallocated = i32, llvm.returned, llvm.alignstack = 32 : i64, llvm.writeonly, llvm.noundef, llvm.nonnull }, %x : i32 { llvm.zeroext }) -> (!llvm.ptr { llvm.noundef, llvm.inreg, llvm.nonnull }) {
   llvm.return %ptr : !llvm.ptr
 }

>From 5e44e2c7337b94174c445cb7043f452539f28566 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 14:09:04 +0100
Subject: [PATCH 2/2] addressed PR comments

---
 .../LLVMIR/Transforms/InlinerInterfaceImpl.cpp | 18 ++++--------------
 mlir/test/Dialect/LLVMIR/inlining.mlir         |  9 ++++-----
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index baaae51ef5a21..b61138ad4678b 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -609,20 +609,10 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
     // out, while still landing at the function entry block for the common
     // non-parallel case.
     OpBuilder::InsertionGuard insertionGuard(builder);
-    Block *entryBlock = nullptr;
-    Block *cursor = builder.getInsertionBlock();
-    while (cursor) {
-      Operation *parentOp = cursor->getParentOp();
-      if (!parentOp)
-        break;
-      if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>()) {
-        entryBlock = &cursor->getParent()->front();
-        break;
-      }
-      cursor = parentOp->getBlock();
-    }
-    if (!entryBlock)
-      entryBlock = &(*argument.getParentRegion()->begin());
+    Operation *scope = builder.getInsertionBlock()->getParentOp();
+    if (!scope->mightHaveTrait<OpTrait::AutomaticAllocationScope>())
+      scope = scope->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
+    Block *entryBlock = &scope->getRegion(0).front();
     builder.setInsertionPointToStart(entryBlock);
     Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
                                          builder.getI64IntegerAttr(1));
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index e84a4a45ca45b..cc3600af431ea 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -582,15 +582,14 @@ llvm.func @byval_in_parallel(%ptr : !llvm.ptr { llvm.byval = f32 }) {
 // CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
 llvm.func @test_byval_in_parallel_region(%ptr : !llvm.ptr) {
   %c0 = arith.constant 0 : index
-  %c4 = arith.constant 4 : index
-  %c1 = arith.constant 1 : index
-  // Verify the alloca is not hoisted out of the parallel region.
+  // Verify the alloca is not hoisted out of the allocation scope.
   // CHECK-NOT: llvm.alloca
-  // CHECK: scf.forall
-  scf.forall (%i) = (%c0) to (%c4) step (%c1) {
+  // CHECK: test.alloca_scope_region
+  test.alloca_scope_region {
     // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f32
     // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
     llvm.call @byval_in_parallel(%ptr) : (!llvm.ptr) -> ()
+    test.region_yield %c0 : index
   }
   llvm.return
 }



More information about the Mlir-commits mailing list