[Mlir-commits] [mlir] [MLIR][LLVMIR] Preserve byval alignment in memcpy after inlining (PR #185433)
Berke Ates
llvmlistbot at llvm.org
Mon Mar 9 07:49:08 PDT 2026
https://github.com/Berke-Ates created https://github.com/llvm/llvm-project/pull/185433
This PR adds alignment attributes to the generated memcpy intrinsics after inlining functions with byval arguments.
>From 17da49376c712303d3f87137d037cfb9609787c3 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 13:12:18 +0100
Subject: [PATCH 1/3] [MLIR][LLVMIR] Fix inline byval alloca hoisting out of
allocation scope
---
.../Transforms/InlinerInterfaceImpl.cpp | 22 ++++++++++++---
mlir/test/Dialect/LLVMIR/inlining.mlir | 27 +++++++++++++++++++
2 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index 0e43480e82926..baaae51ef5a21 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -603,10 +603,26 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
// Allocate the new value on the stack.
Value allocaOp;
{
- // Since this is a static alloca, we can put it directly in the entry block,
- // so they can be absorbed into the prologue/epilogue at code generation.
+ // Walk up from the call site to find the innermost AutomaticAllocationScope
+ // (e.g. an llvm.func or scf.forall). Placing the alloca at the entry block
+ // of that scope keeps it inside parallel regions rather than hoisting it
+ // out, while still landing at the function entry block for the common
+ // non-parallel case.
OpBuilder::InsertionGuard insertionGuard(builder);
- Block *entryBlock = &(*argument.getParentRegion()->begin());
+ Block *entryBlock = nullptr;
+ Block *cursor = builder.getInsertionBlock();
+ while (cursor) {
+ Operation *parentOp = cursor->getParentOp();
+ if (!parentOp)
+ break;
+ if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>()) {
+ entryBlock = &cursor->getParent()->front();
+ break;
+ }
+ cursor = parentOp->getBlock();
+ }
+ if (!entryBlock)
+ entryBlock = &(*argument.getParentRegion()->begin());
builder.setInsertionPointToStart(entryBlock);
Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
builder.getI64IntegerAttr(1));
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index 70ce7ca20986b..e84a4a45ca45b 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -570,6 +570,33 @@ llvm.func @test_byval_global() {
// -----
+// Check that inlining does not hoist byval allocas out of automatic allocation
+// scopes, such as parallel forall regions. Each parallel iteration must have
+// its own private copy of the byval argument.
+
+llvm.func @byval_in_parallel(%ptr : !llvm.ptr { llvm.byval = f32 }) {
+ llvm.return
+}
+
+// CHECK-LABEL: llvm.func @test_byval_in_parallel_region
+// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
+llvm.func @test_byval_in_parallel_region(%ptr : !llvm.ptr) {
+ %c0 = arith.constant 0 : index
+ %c4 = arith.constant 4 : index
+ %c1 = arith.constant 1 : index
+ // Verify the alloca is not hoisted out of the parallel region.
+ // CHECK-NOT: llvm.alloca
+ // CHECK: scf.forall
+ scf.forall (%i) = (%c0) to (%c4) step (%c1) {
+ // CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f32
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
+ llvm.call @byval_in_parallel(%ptr) : (!llvm.ptr) -> ()
+ }
+ llvm.return
+}
+
+// -----
+
llvm.func @ignored_attrs(%ptr : !llvm.ptr { llvm.inreg, llvm.nocapture, llvm.nofree, llvm.preallocated = i32, llvm.returned, llvm.alignstack = 32 : i64, llvm.writeonly, llvm.noundef, llvm.nonnull }, %x : i32 { llvm.zeroext }) -> (!llvm.ptr { llvm.noundef, llvm.inreg, llvm.nonnull }) {
llvm.return %ptr : !llvm.ptr
}
>From 5e44e2c7337b94174c445cb7043f452539f28566 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 14:09:04 +0100
Subject: [PATCH 2/3] addressed PR comments
---
.../LLVMIR/Transforms/InlinerInterfaceImpl.cpp | 18 ++++--------------
mlir/test/Dialect/LLVMIR/inlining.mlir | 9 ++++-----
2 files changed, 8 insertions(+), 19 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index baaae51ef5a21..b61138ad4678b 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -609,20 +609,10 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
// out, while still landing at the function entry block for the common
// non-parallel case.
OpBuilder::InsertionGuard insertionGuard(builder);
- Block *entryBlock = nullptr;
- Block *cursor = builder.getInsertionBlock();
- while (cursor) {
- Operation *parentOp = cursor->getParentOp();
- if (!parentOp)
- break;
- if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>()) {
- entryBlock = &cursor->getParent()->front();
- break;
- }
- cursor = parentOp->getBlock();
- }
- if (!entryBlock)
- entryBlock = &(*argument.getParentRegion()->begin());
+ Operation *scope = builder.getInsertionBlock()->getParentOp();
+ if (!scope->mightHaveTrait<OpTrait::AutomaticAllocationScope>())
+ scope = scope->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
+ Block *entryBlock = &scope->getRegion(0).front();
builder.setInsertionPointToStart(entryBlock);
Value one = LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
builder.getI64IntegerAttr(1));
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index e84a4a45ca45b..cc3600af431ea 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -582,15 +582,14 @@ llvm.func @byval_in_parallel(%ptr : !llvm.ptr { llvm.byval = f32 }) {
// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
llvm.func @test_byval_in_parallel_region(%ptr : !llvm.ptr) {
%c0 = arith.constant 0 : index
- %c4 = arith.constant 4 : index
- %c1 = arith.constant 1 : index
- // Verify the alloca is not hoisted out of the parallel region.
+ // Verify the alloca is not hoisted out of the allocation scope.
// CHECK-NOT: llvm.alloca
- // CHECK: scf.forall
- scf.forall (%i) = (%c0) to (%c4) step (%c1) {
+ // CHECK: test.alloca_scope_region
+ test.alloca_scope_region {
// CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f32
// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
llvm.call @byval_in_parallel(%ptr) : (!llvm.ptr) -> ()
+ test.region_yield %c0 : index
}
llvm.return
}
>From cd0776e82542eb753c0faab30482fd970bb18845 Mon Sep 17 00:00:00 2001
From: Berke-Ates <berke at ates.ch>
Date: Mon, 9 Mar 2026 15:46:38 +0100
Subject: [PATCH 3/3] [MLIR][LLVMIR] Preserve byval alignment in memcpy after
inlining
---
.../Transforms/InlinerInterfaceImpl.cpp | 13 +++++++++++-
mlir/test/Dialect/LLVMIR/inlining.mlir | 20 +++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index b61138ad4678b..c9b6359ecc433 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -623,8 +623,19 @@ static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
Value copySize =
LLVM::ConstantOp::create(builder, loc, builder.getI64Type(),
builder.getI64IntegerAttr(elementTypeSize));
+ // Preserve the alignment of the destination (alloca) in the memcpy's
+ // arg_attrs.
+ MLIRContext *ctx = builder.getContext();
+ NamedAttribute dstAlignAttr =
+ builder.getNamedAttr(LLVM::LLVMDialect::getAlignAttrName(),
+ builder.getI64IntegerAttr(targetAlignment));
+ ArrayAttr argAttrs =
+ builder.getArrayAttr({DictionaryAttr::get(ctx, {dstAlignAttr})});
LLVM::MemcpyOp::create(builder, loc, allocaOp, argument, copySize,
- /*isVolatile=*/false);
+ /*isVolatile=*/false,
+ /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
+ /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr, argAttrs,
+ /*res_attrs=*/nullptr);
return allocaOp;
}
diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir
index cc3600af431ea..e2c60da47e180 100644
--- a/mlir/test/Dialect/LLVMIR/inlining.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining.mlir
@@ -570,6 +570,26 @@ llvm.func @test_byval_global() {
// -----
+// Check that alignment information is preserved in the memcpy when inlining
+// byval arguments.
+
+llvm.func @byval_aligned_arg(%ptr : !llvm.ptr { llvm.byval = i32, llvm.align = 16 }) {
+ llvm.return
+}
+
+// CHECK-LABEL: llvm.func @test_byval_memcpy_alignment
+// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr
+llvm.func @test_byval_memcpy_alignment(%ptr : !llvm.ptr) {
+ // Verify the memcpy carries the alignment info from the byval attribute.
+ // CHECK: %[[ALLOCA:.+]] = llvm.alloca{{.+}}alignment = 16
+ // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]]
+ // CHECK-SAME: {llvm.align = 16 : i64}
+ llvm.call @byval_aligned_arg(%ptr) : (!llvm.ptr) -> ()
+ llvm.return
+}
+
+// -----
+
// Check that inlining does not hoist byval allocas out of automatic allocation
// scopes, such as parallel forall regions. Each parallel iteration must have
// its own private copy of the byval argument.
More information about the Mlir-commits
mailing list