[Mlir-commits] [mlir] [MLIR][LLVMIR] Preserve byval alignment in memcpy after inlining (PR #185433)

Berke Ates llvmlistbot at llvm.org
Mon Mar 9 07:54:03 PDT 2026


https://github.com/Berke-Ates updated https://github.com/llvm/llvm-project/pull/185433

>From 17da49376c712303d3f87137d037cfb9609787c3 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 13:12:18 +0100
Subject: [PATCH 1/4] [MLIR][LLVMIR] Fix inline byval alloca hoisting out of
 allocation scope

---
 .../Transforms/InlinerInterfaceImpl.cpp       | 22 ++++++++++++---
 mlir/test/Dialect/LLVMIR/inlining.mlir        | 27 +++++++++++++++++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index 0e43480e82926..baaae51ef5a21 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -603,10 +603,26 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
   // Allocate the new value on the stack.
   Value allocaOp;
   {
-    // Since this is a static alloca, we can put it directly in the entry block,
-    // so they can be absorbed into the prologue/epilogue at code generation.
+    // Walk up from the call site to find the innermost AutomaticAllocationScope
+    // (e.g. an llvm.func or scf.forall). Placing the alloca at the entry block
+    // of that scope keeps it inside parallel regions rather than hoisting it
+    // out, while still landing at the function entry block for the common
+    // non-parallel case.
     OpBuilder::InsertionGuard insertionGuard(builder);
-    Block *entryBlock = &(*argument.getParentRegion()->begin());
+    Block *entryBlock = nullptr;
+    Block *cursor = builder.getInsertionBlock();
+    while (cursor) {
+      Operation *parentOp = cursor->getParentOp();
+      if (!parentOp)
+        break;
+      if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>()) {
+        entryBlock = &cursor->getParent()->front();
+        break;
+      }
+      cursor = parentOp->getBlock();
+    }
+    if (!entryBlock)
+      entryBlock = &(*argument.getParentRegion()->begin());
     builder.setInsertionPointToStart(entryBlock);
     Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
                                          builder.getI64IntegerAttr(1));
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index 70ce7ca20986b..e84a4a45ca45b 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -570,6 +570,33 @@ llvm.func @test_byval_global() {
 
 // -----
 
+// Check that inlining does not hoist byval allocas out of automatic allocation
+// scopes, such as parallel forall regions. Each parallel iteration must have
+// its own private copy of the byval argument.
+
+llvm.func @byval_in_parallel(%ptr : !llvm.ptr { llvm.byval = f32 }) {
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @test_byval_in_parallel_region
+// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
+llvm.func @test_byval_in_parallel_region(%ptr : !llvm.ptr) {
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c1 = arith.constant 1 : index
+  // Verify the alloca is not hoisted out of the parallel region.
+  // CHECK-NOT: llvm.alloca
+  // CHECK: scf.forall
+  scf.forall (%i) = (%c0) to (%c4) step (%c1) {
+    // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f32
+    // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
+    llvm.call @byval_in_parallel(%ptr) : (!llvm.ptr) -> ()
+  }
+  llvm.return
+}
+
+// -----
+
 llvm.func @ignored_attrs(%ptr : !llvm.ptr { llvm.inreg, llvm.nocapture, llvm.nofree, llvm.preallocated = i32, llvm.returned, llvm.alignstack = 32 : i64, llvm.writeonly, llvm.noundef, llvm.nonnull }, %x : i32 { llvm.zeroext }) -> (!llvm.ptr { llvm.noundef, llvm.inreg, llvm.nonnull }) {
   llvm.return %ptr : !llvm.ptr
 }

>From 5e44e2c7337b94174c445cb7043f452539f28566 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 14:09:04 +0100
Subject: [PATCH 2/4] addressed PR comments

---
 .../LLVMIR/Transforms/InlinerInterfaceImpl.cpp | 18 ++++--------------
 mlir/test/Dialect/LLVMIR/inlining.mlir         |  9 ++++-----
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index baaae51ef5a21..b61138ad4678b 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -609,20 +609,10 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
     // out, while still landing at the function entry block for the common
     // non-parallel case.
     OpBuilder::InsertionGuard insertionGuard(builder);
-    Block *entryBlock = nullptr;
-    Block *cursor = builder.getInsertionBlock();
-    while (cursor) {
-      Operation *parentOp = cursor->getParentOp();
-      if (!parentOp)
-        break;
-      if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>()) {
-        entryBlock = &cursor->getParent()->front();
-        break;
-      }
-      cursor = parentOp->getBlock();
-    }
-    if (!entryBlock)
-      entryBlock = &(*argument.getParentRegion()->begin());
+    Operation *scope = builder.getInsertionBlock()->getParentOp();
+    if (!scope->mightHaveTrait<OpTrait::AutomaticAllocationScope>())
+      scope = scope->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
+    Block *entryBlock = &scope->getRegion(0).front();
     builder.setInsertionPointToStart(entryBlock);
     Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
                                          builder.getI64IntegerAttr(1));
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index e84a4a45ca45b..cc3600af431ea 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -582,15 +582,14 @@ llvm.func @byval_in_parallel(%ptr : !llvm.ptr { llvm.byval = f32 }) {
 // CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
 llvm.func @test_byval_in_parallel_region(%ptr : !llvm.ptr) {
   %c0 = arith.constant 0 : index
-  %c4 = arith.constant 4 : index
-  %c1 = arith.constant 1 : index
-  // Verify the alloca is not hoisted out of the parallel region.
+  // Verify the alloca is not hoisted out of the allocation scope.
   // CHECK-NOT: llvm.alloca
-  // CHECK: scf.forall
-  scf.forall (%i) = (%c0) to (%c4) step (%c1) {
+  // CHECK: test.alloca_scope_region
+  test.alloca_scope_region {
     // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f32
     // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
     llvm.call @byval_in_parallel(%ptr) : (!llvm.ptr) -> ()
+    test.region_yield %c0 : index
   }
   llvm.return
 }

>From cd0776e82542eb753c0faab30482fd970bb18845 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 15:46:38 +0100
Subject: [PATCH 3/4] [MLIR][LLVMIR] Preserve byval alignment in memcpy after
 inlining

---
 .../Transforms/InlinerInterfaceImpl.cpp       | 13 +++++++++++-
 mlir/test/Dialect/LLVMIR/inlining.mlir        | 20 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index b61138ad4678b..c9b6359ecc433 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -623,8 +623,19 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
   Value copySize =
       LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
                                builder.getI64IntegerAttr(elementTypeSize));
+  // Preserve the alignment of the destination (alloca) in the memcpy's
+  // arg_attrs.
+  MLIRContext *ctx = builder.getContext();
+  NamedAttribute dstAlignAttr =
+      builder.getNamedAttr(LLVM::LLVMDialect::getAlignAttrName(),
+                           builder.getI64IntegerAttr(targetAlignment));
+  ArrayAttr argAttrs =
+      builder.getArrayAttr({DictionaryAttr::get(ctx, {dstAlignAttr})});
   LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
-                         /*isVolatile=*/false);
+                         /*isVolatile=*/false,
+                         /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
+                         /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr, argAttrs,
+                         /*res_attrs=*/nullptr);
   return allocaOp;
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index cc3600af431ea..e2c60da47e180 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -570,6 +570,26 @@ llvm.func @test_byval_global() {
 
 // -----
 
+// Check that alignment information is preserved in the memcpy when inlining
+// byval arguments.
+
+llvm.func @byval_aligned_arg(%ptr : !llvm.ptr { llvm.byval = i32, llvm.align = 16 }) {
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @test_byval_memcpy_alignment
+// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
+llvm.func @test_byval_memcpy_alignment(%ptr : !llvm.ptr) {
+  // Verify the memcpy carries the alignment info from the byval attribute.
+  // CHECK: %[[ALLOCA:.+]] = llvm.alloca{{.+}}alignment = 16
+  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
+  // CHECK-SAME: {llvm.align = 16 : i64}
+  llvm.call @byval_aligned_arg(%ptr) : (!llvm.ptr) -> ()
+  llvm.return
+}
+
+// -----
+
 // Check that inlining does not hoist byval allocas out of automatic allocation
 // scopes, such as parallel forall regions. Each parallel iteration must have
 // its own private copy of the byval argument.

>From b77efbb379adab25b03f00d9d6ff28e3c5237c22 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 15:46:38 +0100
Subject: [PATCH 4/4] [MLIR][LLVMIR] Preserve byval alignment in memcpy after
 inlining

---
 .../Transforms/InlinerInterfaceImpl.cpp       | 13 +++++++++++-
 mlir/test/Dialect/LLVMIR/inlining.mlir        | 20 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index b61138ad4678b..c9b6359ecc433 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -623,8 +623,19 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
   Value copySize =
       LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
                                builder.getI64IntegerAttr(elementTypeSize));
+  // Preserve the alignment of the destination (alloca) in the memcpy's
+  // arg_attrs.
+  MLIRContext *ctx = builder.getContext();
+  NamedAttribute dstAlignAttr =
+      builder.getNamedAttr(LLVM::LLVMDialect::getAlignAttrName(),
+                           builder.getI64IntegerAttr(targetAlignment));
+  ArrayAttr argAttrs =
+      builder.getArrayAttr({DictionaryAttr::get(ctx, {dstAlignAttr})});
   LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
-                         /*isVolatile=*/false);
+                         /*isVolatile=*/false,
+                         /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
+                         /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr, argAttrs,
+                         /*res_attrs=*/nullptr);
   return allocaOp;
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index cc3600af431ea..e2c60da47e180 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -570,6 +570,26 @@ llvm.func @test_byval_global() {
 
 // -----
 
+// Check that alignment information is preserved in the memcpy when inlining
+// byval arguments.
+
+llvm.func @byval_aligned_arg(%ptr : !llvm.ptr { llvm.byval = i32, llvm.align = 16 }) {
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @test_byval_memcpy_alignment
+// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
+llvm.func @test_byval_memcpy_alignment(%ptr : !llvm.ptr) {
+  // Verify the memcpy carries the alignment info from the byval attribute.
+  // CHECK: %[[ALLOCA:.+]] = llvm.alloca{{.+}}alignment = 16
+  // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
+  // CHECK-SAME: {llvm.align = 16 : i64}
+  llvm.call @byval_aligned_arg(%ptr) : (!llvm.ptr) -> ()
+  llvm.return
+}
+
+// -----
+
 // Check that inlining does not hoist byval allocas out of automatic allocation
 // scopes, such as parallel forall regions. Each parallel iteration must have
 // its own private copy of the byval argument.



More information about the Mlir-commits mailing list