[Mlir-commits] [mlir] [MLIR][SROA] Replace pattern based approach with a one-shot one (PR #85437)
Christian Ulmann
llvmlistbot at llvm.org
Sun Mar 17 23:52:19 PDT 2024
https://github.com/Dinistro updated https://github.com/llvm/llvm-project/pull/85437
>From aa6570a01aa55111708b5d87a3cda3672bdb979f Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christian.ulmann at nextsilicon.com>
Date: Fri, 15 Mar 2024 07:57:47 +0000
Subject: [PATCH 1/4] [MLIR][SROA] Replace pattern based approach with a
one-shot one
This commit changes MLIR's SROA implementation back from being pattern
based into a full pass. This is beneficial for upcomming changes that
rely more heavily on the datalayout.
---
mlir/include/mlir/Transforms/SROA.h | 20 ----
mlir/lib/Transforms/SROA.cpp | 41 ++++---
mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir | 104 ++++++++++--------
3 files changed, 85 insertions(+), 80 deletions(-)
diff --git a/mlir/include/mlir/Transforms/SROA.h b/mlir/include/mlir/Transforms/SROA.h
index 0b3e72400c2873..1af1fe930723f1 100644
--- a/mlir/include/mlir/Transforms/SROA.h
+++ b/mlir/include/mlir/Transforms/SROA.h
@@ -9,11 +9,9 @@
#ifndef MLIR_TRANSFORMS_SROA_H
#define MLIR_TRANSFORMS_SROA_H
-#include "mlir/IR/PatternMatch.h"
#include "mlir/Interfaces/MemorySlotInterfaces.h"
#include "mlir/Support/LogicalResult.h"
#include "llvm/ADT/Statistic.h"
-#include <variant>
namespace mlir {
@@ -29,24 +27,6 @@ struct SROAStatistics {
llvm::Statistic *maxSubelementAmount = nullptr;
};
-/// Pattern applying SROA to the regions of the operations on which it
-/// matches.
-class SROAPattern
- : public OpInterfaceRewritePattern<DestructurableAllocationOpInterface> {
-public:
- using OpInterfaceRewritePattern::OpInterfaceRewritePattern;
-
- SROAPattern(MLIRContext *context, SROAStatistics statistics = {},
- PatternBenefit benefit = 1)
- : OpInterfaceRewritePattern(context, benefit), statistics(statistics) {}
-
- LogicalResult matchAndRewrite(DestructurableAllocationOpInterface allocator,
- PatternRewriter &rewriter) const override;
-
-private:
- SROAStatistics statistics;
-};
-
/// Attempts to destructure the slots of destructurable allocators. Returns
/// failure if no slot was destructured.
LogicalResult tryToDestructureMemorySlots(
diff --git a/mlir/lib/Transforms/SROA.cpp b/mlir/lib/Transforms/SROA.cpp
index 3ceda51e1c894c..977714e75373cf 100644
--- a/mlir/lib/Transforms/SROA.cpp
+++ b/mlir/lib/Transforms/SROA.cpp
@@ -9,7 +9,6 @@
#include "mlir/Transforms/SROA.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Interfaces/MemorySlotInterfaces.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
namespace mlir {
@@ -205,13 +204,6 @@ LogicalResult mlir::tryToDestructureMemorySlots(
return success(destructuredAny);
}
-LogicalResult
-SROAPattern::matchAndRewrite(DestructurableAllocationOpInterface allocator,
- PatternRewriter &rewriter) const {
- hasBoundedRewriteRecursion();
- return tryToDestructureMemorySlots({allocator}, rewriter, statistics);
-}
-
namespace {
struct SROA : public impl::SROABase<SROA> {
@@ -223,12 +215,35 @@ struct SROA : public impl::SROABase<SROA> {
SROAStatistics statistics{&destructuredAmount, &slotsWithMemoryBenefit,
&maxSubelementAmount};
- RewritePatternSet rewritePatterns(&getContext());
- rewritePatterns.add<SROAPattern>(&getContext(), statistics);
- FrozenRewritePatternSet frozen(std::move(rewritePatterns));
+ bool changed = false;
+
+ for (Region ®ion : scopeOp->getRegions()) {
+ if (region.getBlocks().empty())
+ continue;
- if (failed(applyPatternsAndFoldGreedily(scopeOp, frozen)))
- signalPassFailure();
+ OpBuilder builder(®ion.front(), region.front().begin());
+ IRRewriter rewriter(builder);
+
+ // Destructuring a slot can allow for further destructuring of other
+ // slots, destructuring is tried until no destructuring succeeds.
+ while (true) {
+ SmallVector<DestructurableAllocationOpInterface> allocators;
+ // Build a list of allocators to attempt to destructure the slots of.
+ for (Block &block : region)
+ for (Operation &op : block.getOperations())
+ if (auto allocator =
+ dyn_cast<DestructurableAllocationOpInterface>(op))
+ allocators.emplace_back(allocator);
+
+ if (failed(
+ tryToDestructureMemorySlots(allocators, rewriter, statistics)))
+ break;
+
+ changed = true;
+ }
+ }
+ if (!changed)
+ markAllAnalysesPreserved();
}
};
diff --git a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
index c2e3458134ba4b..04c4af1a3e79a8 100644
--- a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
@@ -146,12 +146,10 @@ llvm.func @invalid_indirect_memset() -> i32 {
// CHECK-LABEL: llvm.func @memset_double_use
llvm.func @memset_double_use() -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
- // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
- // CHECK-DAG: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
- // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
- // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
- // CHECK-DAG: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+ // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
%memset_value = llvm.mlir.constant(42 : i8) : i8
@@ -159,8 +157,11 @@ llvm.func @memset_double_use() -> i32 {
%memset_len = llvm.mlir.constant(8 : i32) : i32
// We expect two generated memset, one for each field.
// CHECK-NOT: "llvm.intr.memset"
- // CHECK-DAG: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[MEMSET_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN]]) <{isVolatile = false}>
// CHECK-NOT: "llvm.intr.memset"
"llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
%2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
@@ -208,13 +209,10 @@ llvm.func @memset_considers_alignment() -> i32 {
// CHECK-LABEL: llvm.func @memset_considers_packing
llvm.func @memset_considers_packing() -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
- // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
- // CHECK-DAG: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
- // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
- // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
- // CHECK-DAG: %[[MEMSET_LEN_WHOLE:.*]] = llvm.mlir.constant(4 : i32) : i32
- // CHECK-DAG: %[[MEMSET_LEN_PARTIAL:.*]] = llvm.mlir.constant(3 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+ // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
%memset_value = llvm.mlir.constant(42 : i8) : i8
@@ -222,7 +220,10 @@ llvm.func @memset_considers_packing() -> i32 {
%memset_len = llvm.mlir.constant(8 : i32) : i32
// Now all fields are touched by the memset.
// CHECK-NOT: "llvm.intr.memset"
+ // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+ // CHECK: %[[MEMSET_LEN_WHOLE:.*]] = llvm.mlir.constant(4 : i32) : i32
// CHECK: "llvm.intr.memset"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_WHOLE]]) <{isVolatile = false}>
+ // CHECK: %[[MEMSET_LEN_PARTIAL:.*]] = llvm.mlir.constant(3 : i32) : i32
// CHECK: "llvm.intr.memset"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]], %[[MEMSET_LEN_PARTIAL]]) <{isVolatile = false}>
// CHECK-NOT: "llvm.intr.memset"
"llvm.intr.memset"(%1, %memset_value, %memset_len) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
@@ -241,14 +242,14 @@ llvm.func @memset_considers_packing() -> i32 {
// CHECK-LABEL: llvm.func @memcpy_dest
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
- // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
- // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
- // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
%memcpy_len = llvm.mlir.constant(40 : i32) : i32
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMCPY_LEN]]) <{isVolatile = false}>
"llvm.intr.memcpy"(%1, %other_array, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
@@ -261,9 +262,8 @@ llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
// CHECK-LABEL: llvm.func @memcpy_src
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
// After SROA, only one i32 will be actually used, so only 4 bytes will be set.
- // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
// CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
@@ -271,14 +271,18 @@ llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
// Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
// We can only check that the amount of operations and allocated slots is correct, which should be sufficient
// as unused slots are not generated.
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memcpy"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
"llvm.intr.memcpy"(%other_array, %1, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
%3 = llvm.load %2 : !llvm.ptr -> i32
@@ -289,14 +293,18 @@ llvm.func @memcpy_src(%other_array: !llvm.ptr) -> i32 {
// CHECK-LABEL: llvm.func @memcpy_double
llvm.func @memcpy_double() -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
- // CHECK-DAG: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.mlir.constant(1 : i32) : i32
- // CHECK-COUNT-2: = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK: = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // TODO: This should also disappear.
+ // CHECK: = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<1 x i32>
%1 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
%2 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
+ // Match the dead constant, to avoid collision with the newly created one.
+ // CHECK: llvm.mlir.constant
%memcpy_len = llvm.mlir.constant(4 : i32) : i32
// CHECK-NOT: "llvm.intr.memcpy"
+ // CHECK: %[[MEMCPY_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %[[MEMCPY_LEN]]) <{isVolatile = false}>
// CHECK-NOT: "llvm.intr.memcpy"
"llvm.intr.memcpy"(%1, %2, %memcpy_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
@@ -346,14 +354,14 @@ llvm.func @memcpy_no_volatile(%other_array: !llvm.ptr) -> i32 {
// CHECK-LABEL: llvm.func @memmove_dest
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
- // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
- // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
- // CHECK-DAG: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.array<10 x i32> : (i32) -> !llvm.ptr
%memmove_len = llvm.mlir.constant(40 : i32) : i32
// CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
// CHECK: "llvm.intr.memmove"(%[[ALLOCA]], %[[SLOT_IN_OTHER]], %[[MEMMOVE_LEN]]) <{isVolatile = false}>
"llvm.intr.memmove"(%1, %other_array, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
@@ -366,9 +374,7 @@ llvm.func @memmove_dest(%other_array: !llvm.ptr) -> i32 {
// CHECK-LABEL: llvm.func @memmove_src
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
llvm.func @memmove_src(%other_array: !llvm.ptr) -> i32 {
- // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
- // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
- // CHECK-DAG: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK-COUNT-4: = llvm.alloca %[[ALLOCA_LEN]] x i32
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i32) -> !llvm.ptr
@@ -376,14 +382,18 @@ llvm.func @memmove_src(%other_array: !llvm.ptr) -> i32 {
// Unfortunately because of FileCheck limitations it is not possible to check which slot gets read from.
// We can only check that the amount of operations and allocated slots is correct, which should be sufficient
// as unused slots are not generated.
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
- // CHECK-DAG: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
- // CHECK-DAG: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
+ // CHECK: %[[SLOT_IN_OTHER:.*]] = llvm.getelementptr %[[OTHER_ARRAY]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
+ // CHECK: %[[MEMMOVE_LEN:.*]] = llvm.mlir.constant(4 : i32) : i32
+ // CHECK: "llvm.intr.memmove"(%[[SLOT_IN_OTHER]], %{{.*}}, %[[MEMMOVE_LEN]]) <{isVolatile = false}>
"llvm.intr.memmove"(%other_array, %1, %memmove_len) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
%2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i32>
%3 = llvm.load %2 : !llvm.ptr -> i32
>From d9e6c170642d66412895bcb784b2447fd194d1d6 Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christian.ulmann at nextsilicon.com>
Date: Sat, 16 Mar 2024 14:48:48 +0000
Subject: [PATCH 2/4] fix region walk
---
mlir/lib/Transforms/SROA.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/mlir/lib/Transforms/SROA.cpp b/mlir/lib/Transforms/SROA.cpp
index 977714e75373cf..c5fa3b2e1db37b 100644
--- a/mlir/lib/Transforms/SROA.cpp
+++ b/mlir/lib/Transforms/SROA.cpp
@@ -229,11 +229,9 @@ struct SROA : public impl::SROABase<SROA> {
while (true) {
SmallVector<DestructurableAllocationOpInterface> allocators;
// Build a list of allocators to attempt to destructure the slots of.
- for (Block &block : region)
- for (Operation &op : block.getOperations())
- if (auto allocator =
- dyn_cast<DestructurableAllocationOpInterface>(op))
- allocators.emplace_back(allocator);
+ region.walk([&](DestructurableAllocationOpInterface allocator) {
+ allocators.emplace_back(allocator);
+ });
if (failed(
tryToDestructureMemorySlots(allocators, rewriter, statistics)))
>From 38d1631a7557bf1155779e580406863492b6a8ab Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christian.ulmann at nextsilicon.com>
Date: Sat, 16 Mar 2024 15:12:36 +0000
Subject: [PATCH 3/4] improve TODO comment
---
mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
index 04c4af1a3e79a8..ba73025814cc05 100644
--- a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
@@ -296,7 +296,8 @@ llvm.func @memcpy_double() -> i32 {
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
%0 = llvm.mlir.constant(1 : i32) : i32
// CHECK: = llvm.alloca %[[ALLOCA_LEN]] x i32
- // TODO: This should also disappear.
+ // TODO: This should also disappear as a GEP with all zero indices should be
+ // ignored.
// CHECK: = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<1 x i32>
%1 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
%2 = llvm.alloca %0 x !llvm.array<1 x i32> : (i32) -> !llvm.ptr
>From 205f6b6d063faa688d18b02886ce8c97a739485b Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christian.ulmann at nextsilicon.com>
Date: Mon, 18 Mar 2024 06:52:05 +0000
Subject: [PATCH 4/4] improve TODO comment
---
mlir/lib/Transforms/SROA.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mlir/lib/Transforms/SROA.cpp b/mlir/lib/Transforms/SROA.cpp
index c5fa3b2e1db37b..6111489bdebefd 100644
--- a/mlir/lib/Transforms/SROA.cpp
+++ b/mlir/lib/Transforms/SROA.cpp
@@ -229,6 +229,8 @@ struct SROA : public impl::SROABase<SROA> {
while (true) {
SmallVector<DestructurableAllocationOpInterface> allocators;
// Build a list of allocators to attempt to destructure the slots of.
+ // TODO: Update list on the fly to avoid repeated visiting of the same
+ // allocators.
region.walk([&](DestructurableAllocationOpInterface allocator) {
allocators.emplace_back(allocator);
});
More information about the Mlir-commits
mailing list