[Mlir-commits] [mlir] [mlir][llvm] Add support for memset.inline (PR #115711)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Nov 12 03:13:48 PST 2024
https://github.com/PikachuHyA updated https://github.com/llvm/llvm-project/pull/115711
>From efa6488148c3056ecd7861a0b702126671eb6b6f Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Mon, 11 Nov 2024 20:44:47 +0800
Subject: [PATCH 01/13] [mlir][llvm] Add support for memset.inline
---
.../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 26 ++
mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp | 4 +
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 255 +++++++++++++-----
.../test/Target/LLVMIR/llvmir-intrinsics.mlir | 4 +
4 files changed, 224 insertions(+), 65 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index d07ebbacc60434..85785938405859 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -256,6 +256,32 @@ def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2],
];
}
+def LLVM_MemsetInlineOp : LLVM_ZeroResultIntrOp<"memset.inline", [0, 2],
+ [DeclareOpInterfaceMethods<PromotableMemOpInterface>,
+ DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>,
+ DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>],
+ /*requiresAccessGroup=*/1, /*requiresAliasAnalysis=*/1,
+ /*requiresOpBundles=*/0, /*immArgPositions=*/[2, 3],
+ /*immArgAttrNames=*/["len", "isVolatile"]> {
+ dag args = (ins Arg<LLVM_AnyPointer,"",[MemWrite]>:$dst,
+ I8:$val, APIntAttr:$len, I1Attr:$isVolatile);
+ // Append the alias attributes defined by LLVM_IntrOpBase.
+ let arguments = !con(args, aliasAttrs);
+ let builders = [
+ OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len,
+ "bool":$isVolatile), [{
+ build($_builder, $_state, dst, val, len,
+ $_builder.getBoolAttr(isVolatile));
+ }]>,
+ OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len,
+ "IntegerAttr":$isVolatile), [{
+ build($_builder, $_state, dst, val, len, isVolatile,
+ /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
+ /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
+ }]>
+ ];
+}
+
def LLVM_NoAliasScopeDeclOp
: LLVM_ZeroResultIntrOp<"experimental.noalias.scope.decl"> {
let arguments = (ins LLVM_AliasScopeAttr:$scope);
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
index cff16afc73af3f..a59900745d026e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
@@ -94,6 +94,10 @@ SmallVector<Value> mlir::LLVM::MemsetOp::getAccessedOperands() {
return {getDst()};
}
+SmallVector<Value> mlir::LLVM::MemsetInlineOp::getAccessedOperands() {
+ return {getDst()};
+}
+
SmallVector<Value> mlir::LLVM::CallOp::getAccessedOperands() {
return llvm::to_vector(
llvm::make_filter_range(getArgOperands(), [](Value arg) {
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 5dc506c14ef96f..e262964c9f348c 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -904,6 +904,14 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemcpyInlineOp op) {
return memIntrLen.getZExtValue();
}
+template <>
+std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
+ APInt memIntrLen = op.getLen();
+ if (memIntrLen.getBitWidth() > 64)
+ return {};
+ return memIntrLen.getZExtValue();
+}
+
} // namespace
/// Returns whether one can be sure the memory intrinsic does not write outside
@@ -931,23 +939,37 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
}
//===----------------------------------------------------------------------===//
-// Interfaces for memset
+// Interfaces for memset && memset.inline
//===----------------------------------------------------------------------===//
-bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
+template <class MemsetLike>
+static bool memsetCanRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+ const DataLayout &dataLayout) {
+ if (&slot.elemType.getDialect() != op.getOperation()->getDialect())
+ return false;
-bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
- return getDst() == slot.ptr;
+ if (op.getIsVolatile())
+ return false;
+
+ if (!cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap())
+ return false;
+
+ if (!areAllIndicesI32(slot))
+ return false;
+
+ return definitelyWritesOnlyWithinSlot(op, slot, dataLayout);
}
-Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
- Value reachingDef,
- const DataLayout &dataLayout) {
+template <class MemsetLike>
+static Value memsetGetStored(MemsetLike op, const MemorySlot &slot,
+ OpBuilder &builder) {
// TODO: Support non-integer types.
return TypeSwitch<Type, Value>(slot.elemType)
.Case([&](IntegerType intType) -> Value {
if (intType.getWidth() == 8)
- return getVal();
+ return op.getVal();
assert(intType.getWidth() % 8 == 0);
@@ -955,14 +977,14 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
// or-ing it with the previous value.
uint64_t coveredBits = 8;
Value currentValue =
- builder.create<LLVM::ZExtOp>(getLoc(), intType, getVal());
+ builder.create<LLVM::ZExtOp>(op.getLoc(), intType, op.getVal());
while (coveredBits < intType.getWidth()) {
- Value shiftBy =
- builder.create<LLVM::ConstantOp>(getLoc(), intType, coveredBits);
+ Value shiftBy = builder.create<LLVM::ConstantOp>(op.getLoc(), intType,
+ coveredBits);
Value shifted =
- builder.create<LLVM::ShlOp>(getLoc(), currentValue, shiftBy);
+ builder.create<LLVM::ShlOp>(op.getLoc(), currentValue, shiftBy);
currentValue =
- builder.create<LLVM::OrOp>(getLoc(), currentValue, shifted);
+ builder.create<LLVM::OrOp>(op.getLoc(), currentValue, shifted);
coveredBits *= 2;
}
@@ -974,10 +996,12 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
});
}
-bool LLVM::MemsetOp::canUsesBeRemoved(
- const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
- SmallVectorImpl<OpOperand *> &newBlockingUses,
- const DataLayout &dataLayout) {
+template <class MemsetLike>
+static bool
+memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
// TODO: Support non-integer types.
bool canConvertType =
TypeSwitch<Type, bool>(slot.elemType)
@@ -988,62 +1012,74 @@ bool LLVM::MemsetOp::canUsesBeRemoved(
if (!canConvertType)
return false;
- if (getIsVolatile())
+ if (op.getIsVolatile())
return false;
- return getStaticMemIntrLen(*this) == dataLayout.getTypeSize(slot.elemType);
+ return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
}
-
-DeletionKind LLVM::MemsetOp::removeBlockingUses(
- const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
- OpBuilder &builder, Value reachingDefinition,
- const DataLayout &dataLayout) {
- return DeletionKind::Delete;
+namespace {
+template <class MemsetLike>
+void createMemsetLenAttr(MemsetLike op, IntegerAttr &memsetLenAttr) {
+ bool successfulMatch =
+ matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
+ (void)successfulMatch;
+ assert(successfulMatch);
}
-
-LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses(
- const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
- const DataLayout &dataLayout) {
- return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+template <>
+void createMemsetLenAttr(LLVM::MemsetInlineOp op, IntegerAttr &memsetLenAttr) {
+ memsetLenAttr = op.getLenAttr();
+}
+template <class MemsetLike>
+void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
+ IntegerAttr memsetLenAttr,
+ uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index) {
+
+ Value newMemsetSizeValue =
+ builder
+ .create<LLVM::ConstantOp>(
+ toReplace.getLen().getLoc(),
+ IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
+ .getResult();
+
+ builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
+ toReplace.getVal(), newMemsetSizeValue,
+ toReplace.getIsVolatile());
}
+template <>
+void createMemsetLikeToReplace(OpBuilder &builder,
+ LLVM::MemsetInlineOp toReplace,
+ IntegerAttr memsetLenAttr,
+ uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index) {
-bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
- SmallPtrSetImpl<Attribute> &usedIndices,
- SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
- const DataLayout &dataLayout) {
- if (&slot.elemType.getDialect() != getOperation()->getDialect())
- return false;
-
- if (getIsVolatile())
- return false;
-
- if (!cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap())
- return false;
-
- if (!areAllIndicesI32(slot))
- return false;
+ auto newMemsetSizeValue =
+ IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
- return definitelyWritesOnlyWithinSlot(*this, slot, dataLayout);
+ builder.create<LLVM::MemsetInlineOp>(
+ toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
+ newMemsetSizeValue, toReplace.getIsVolatile());
}
+} // namespace
+template <class MemsetLike>
+static DeletionKind
+memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots, OpBuilder &builder,
+ const DataLayout &dataLayout) {
-DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
- DenseMap<Attribute, MemorySlot> &subslots,
- OpBuilder &builder,
- const DataLayout &dataLayout) {
std::optional<DenseMap<Attribute, Type>> types =
cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap();
IntegerAttr memsetLenAttr;
- bool successfulMatch =
- matchPattern(getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
- (void)successfulMatch;
- assert(successfulMatch);
+ createMemsetLenAttr(op, memsetLenAttr);
bool packed = false;
if (auto structType = dyn_cast<LLVM::LLVMStructType>(slot.elemType))
packed = structType.isPacked();
- Type i32 = IntegerType::get(getContext(), 32);
+ Type i32 = IntegerType::get(op.getContext(), 32);
uint64_t memsetLen = memsetLenAttr.getValue().getZExtValue();
uint64_t covered = 0;
for (size_t i = 0; i < types->size(); i++) {
@@ -1063,16 +1099,8 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
// Otherwise, only compute its offset within the original memset.
if (subslots.contains(index)) {
uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
-
- Value newMemsetSizeValue =
- builder
- .create<LLVM::ConstantOp>(
- getLen().getLoc(),
- IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
- .getResult();
-
- builder.create<LLVM::MemsetOp>(getLoc(), subslots.at(index).ptr, getVal(),
- newMemsetSizeValue, getIsVolatile());
+ createMemsetLikeToReplace(builder, op, memsetLenAttr, newMemsetSize,
+ subslots, index);
}
covered += typeSize;
@@ -1081,6 +1109,103 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
return DeletionKind::Delete;
}
+bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
+ return getDst() == slot.ptr;
+}
+
+Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
+ Value reachingDef,
+ const DataLayout &dataLayout) {
+ return memsetGetStored(*this, slot, builder);
+}
+
+bool LLVM::MemsetOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
+ return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses,
+ dataLayout);
+}
+
+DeletionKind LLVM::MemsetOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder, Value reachingDefinition,
+ const DataLayout &dataLayout) {
+ return DeletionKind::Delete;
+}
+
+LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses(
+ const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+ const DataLayout &dataLayout) {
+ return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+}
+
+bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+ const DataLayout &dataLayout) {
+ return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed,
+ dataLayout);
+}
+
+DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ OpBuilder &builder,
+ const DataLayout &dataLayout) {
+ return memsetRewire(*this, slot, subslots, builder, dataLayout);
+}
+
+bool LLVM::MemsetInlineOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool LLVM::MemsetInlineOp::storesTo(const MemorySlot &slot) {
+ return getDst() == slot.ptr;
+}
+
+Value LLVM::MemsetInlineOp::getStored(const MemorySlot &slot,
+ OpBuilder &builder, Value reachingDef,
+ const DataLayout &dataLayout) {
+ return memsetGetStored(*this, slot, builder);
+}
+
+bool LLVM::MemsetInlineOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
+ return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses,
+ dataLayout);
+}
+
+DeletionKind LLVM::MemsetInlineOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder, Value reachingDefinition,
+ const DataLayout &dataLayout) {
+ return DeletionKind::Delete;
+}
+
+LogicalResult LLVM::MemsetInlineOp::ensureOnlySafeAccesses(
+ const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+ const DataLayout &dataLayout) {
+ return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+}
+
+bool LLVM::MemsetInlineOp::canRewire(
+ const DestructurableMemorySlot &slot,
+ SmallPtrSetImpl<Attribute> &usedIndices,
+ SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+ const DataLayout &dataLayout) {
+ return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed,
+ dataLayout);
+}
+
+DeletionKind
+LLVM::MemsetInlineOp::rewire(const DestructurableMemorySlot &slot,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ OpBuilder &builder, const DataLayout &dataLayout) {
+ return memsetRewire(*this, slot, subslots, builder, dataLayout);
+}
+
//===----------------------------------------------------------------------===//
// Interfaces for memcpy/memmove
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index cb712eb4e1262d..9d45f219cf746e 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -533,6 +533,10 @@ llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) {
%i1 = llvm.mlir.constant(false) : i1
// CHECK: call void @llvm.memset.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 %{{.*}}, i1 false
"llvm.intr.memset"(%arg2, %arg3, %arg0) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
+ // CHECK: call void @llvm.memset.inline.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 10, i1 true
+ "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i32}> : (!llvm.ptr, i8) -> ()
+ // CHECK: call void @llvm.memset.inline.p0.i64(ptr %{{.*}}, i8 %{{.*}}, i64 10, i1 true
+ "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i64}> : (!llvm.ptr, i8) -> ()
llvm.return
}
>From 128be764f40b54bb856686ce3cab386e17dd464b Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:50:43 +0800
Subject: [PATCH 02/13] change && to and
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index e262964c9f348c..bab64cce815dad 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -939,7 +939,7 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
}
//===----------------------------------------------------------------------===//
-// Interfaces for memset && memset.inline
+// Interfaces for memset and memset.inline
//===----------------------------------------------------------------------===//
template <class MemsetLike>
>From 99827b93c21ef2ab0c91c45f5957127c3c9ec011 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:52:20 +0800
Subject: [PATCH 03/13] refactor createMemsetLenAttr
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index bab64cce815dad..7823c8c5b24810 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1019,15 +1019,17 @@ memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
}
namespace {
template <class MemsetLike>
-void createMemsetLenAttr(MemsetLike op, IntegerAttr &memsetLenAttr) {
+IntegerAttr createMemsetLenAttr(MemsetLike op) {
+ IntegerAttr memsetLenAttr;
bool successfulMatch =
matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
(void)successfulMatch;
assert(successfulMatch);
+ return memsetLenAttr;
}
template <>
-void createMemsetLenAttr(LLVM::MemsetInlineOp op, IntegerAttr &memsetLenAttr) {
- memsetLenAttr = op.getLenAttr();
+IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
+ return op.getLenAttr();
}
template <class MemsetLike>
void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
>From 774bd17acfe3ef41d11b45e2d5876cd7ac8ea6c5 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:55:30 +0800
Subject: [PATCH 04/13] clean newline
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 7823c8c5b24810..a72ae457e1336b 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1017,6 +1017,7 @@ memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
}
+
namespace {
template <class MemsetLike>
IntegerAttr createMemsetLenAttr(MemsetLike op) {
@@ -1037,7 +1038,6 @@ void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
uint64_t newMemsetSize,
DenseMap<Attribute, MemorySlot> &subslots,
Attribute index) {
-
Value newMemsetSizeValue =
builder
.create<LLVM::ConstantOp>(
@@ -1065,6 +1065,7 @@ void createMemsetLikeToReplace(OpBuilder &builder,
newMemsetSizeValue, toReplace.getIsVolatile());
}
} // namespace
+
template <class MemsetLike>
static DeletionKind
memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
>From d64e0b0f6e215597933f8f665f5f3c1a0c9ee63f Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:57:14 +0800
Subject: [PATCH 05/13] rename MemsetLike -> MemsetIntr
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index a72ae457e1336b..80b65a82d9b4b9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -942,8 +942,8 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
// Interfaces for memset and memset.inline
//===----------------------------------------------------------------------===//
-template <class MemsetLike>
-static bool memsetCanRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+template <class MemsetIntr>
+static bool memsetCanRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
SmallPtrSetImpl<Attribute> &usedIndices,
SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
const DataLayout &dataLayout) {
@@ -962,8 +962,8 @@ static bool memsetCanRewire(MemsetLike op, const DestructurableMemorySlot &slot,
return definitelyWritesOnlyWithinSlot(op, slot, dataLayout);
}
-template <class MemsetLike>
-static Value memsetGetStored(MemsetLike op, const MemorySlot &slot,
+template <class MemsetIntr>
+static Value memsetGetStored(MemsetIntr op, const MemorySlot &slot,
OpBuilder &builder) {
// TODO: Support non-integer types.
return TypeSwitch<Type, Value>(slot.elemType)
@@ -996,9 +996,9 @@ static Value memsetGetStored(MemsetLike op, const MemorySlot &slot,
});
}
-template <class MemsetLike>
+template <class MemsetIntr>
static bool
-memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
+memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot,
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
const DataLayout &dataLayout) {
@@ -1019,8 +1019,8 @@ memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
}
namespace {
-template <class MemsetLike>
-IntegerAttr createMemsetLenAttr(MemsetLike op) {
+template <class MemsetIntr>
+IntegerAttr createMemsetLenAttr(MemsetIntr op) {
IntegerAttr memsetLenAttr;
bool successfulMatch =
matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
@@ -1032,8 +1032,8 @@ template <>
IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
return op.getLenAttr();
}
-template <class MemsetLike>
-void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
+template <class MemsetIntr>
+void createMemsetIntrToReplace(OpBuilder &builder, MemsetIntr toReplace,
IntegerAttr memsetLenAttr,
uint64_t newMemsetSize,
DenseMap<Attribute, MemorySlot> &subslots,
@@ -1050,7 +1050,7 @@ void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
toReplace.getIsVolatile());
}
template <>
-void createMemsetLikeToReplace(OpBuilder &builder,
+void createMemsetIntrToReplace(OpBuilder &builder,
LLVM::MemsetInlineOp toReplace,
IntegerAttr memsetLenAttr,
uint64_t newMemsetSize,
@@ -1066,9 +1066,9 @@ void createMemsetLikeToReplace(OpBuilder &builder,
}
} // namespace
-template <class MemsetLike>
+template <class MemsetIntr>
static DeletionKind
-memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
DenseMap<Attribute, MemorySlot> &subslots, OpBuilder &builder,
const DataLayout &dataLayout) {
@@ -1102,7 +1102,7 @@ memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
// Otherwise, only compute its offset within the original memset.
if (subslots.contains(index)) {
uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
- createMemsetLikeToReplace(builder, op, memsetLenAttr, newMemsetSize,
+ createMemsetIntrToReplace(builder, op, memsetLenAttr, newMemsetSize,
subslots, index);
}
>From 81546f5d7b2c22584555b44592330cf1b2fc901c Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 15:15:46 +0800
Subject: [PATCH 06/13] fix call createMemsetLenAttr
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 80b65a82d9b4b9..5d7d213c2cc408 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1075,8 +1075,7 @@ memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
std::optional<DenseMap<Attribute, Type>> types =
cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap();
- IntegerAttr memsetLenAttr;
- createMemsetLenAttr(op, memsetLenAttr);
+ IntegerAttr memsetLenAttr = createMemsetLenAttr(op);
bool packed = false;
if (auto structType = dyn_cast<LLVM::LLVMStructType>(slot.elemType))
>From 41db2825fc9adc6c895fe19fc99b12876a66e54e Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 15:16:25 +0800
Subject: [PATCH 07/13] add test llvm.intr.memset.inline in intrinsic.ll
---
mlir/test/Target/LLVMIR/Import/intrinsic.ll | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index 606b11175f572f..e857e252ff0839 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -505,6 +505,10 @@ define void @memmove_test(i32 %0, ptr %1, ptr %2) {
define void @memset_test(i32 %0, ptr %1, i8 %2) {
; CHECK: "llvm.intr.memset"(%{{.*}}, %{{.*}}, %{{.*}}) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
call void @llvm.memset.p0.i32(ptr %1, i8 %2, i32 %0, i1 false)
+ ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i64}> : (!llvm.ptr, i8) -> ()
+ call void @llvm.memset.inline.p0.i64(ptr %1, i8 %2, i64 10, i1 false)
+ ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i32}> : (!llvm.ptr, i8) -> ()
+ call void @llvm.memset.inline.p0.i32(ptr %1, i8 %2, i32 10, i1 false)
ret void
}
>From bc756d974cca3772d4eaa9532ce4cbb3341cf165 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 16:42:32 +0800
Subject: [PATCH 08/13] add comment of getStaticMemIntrLen
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 5d7d213c2cc408..2c88c624c4a3a1 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -904,6 +904,10 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemcpyInlineOp op) {
return memIntrLen.getZExtValue();
}
+/// Returns the length of the given memory intrinsic in bytes if it can be known
+/// at compile-time on a best-effort basis, nothing otherwise.
+/// Because MemsetInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
template <>
std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
APInt memIntrLen = op.getLen();
>From c0ad3c9fa4a557053665a234f0bd77adcf1c16a6 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:18:25 +0800
Subject: [PATCH 09/13] add tests in mem2reg-intrinsics.mlir
---
.../Dialect/LLVMIR/mem2reg-intrinsics.mlir | 132 ++++++++++++++++++
1 file changed, 132 insertions(+)
diff --git a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
index 4fc80a87f20df5..646667505a3732 100644
--- a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
@@ -23,6 +23,28 @@ llvm.func @basic_memset(%memset_value: i8) -> i32 {
// -----
+// CHECK-LABEL: llvm.func @basic_memset_inline
+// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
+llvm.func @basic_memset_inline(%memset_value: i8) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4 : i32}> : (!llvm.ptr, i8) -> ()
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i32
+ // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32
+ // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]]
+ // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]]
+ // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32
+ // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]]
+ // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]]
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[VALUE_32]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @basic_memset_constant
llvm.func @basic_memset_constant() -> i32 {
%0 = llvm.mlir.constant(1 : i32) : i32
@@ -45,6 +67,27 @@ llvm.func @basic_memset_constant() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @basic_memset_inline_constant
+llvm.func @basic_memset_inline_constant() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: %[[C42:.*]] = llvm.mlir.constant(42 : i8) : i8
+ // CHECK: %[[VALUE_42:.*]] = llvm.zext %[[C42]] : i8 to i32
+ // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32
+ // CHECK: %[[SHIFTED_42:.*]] = llvm.shl %[[VALUE_42]], %[[C8]] : i32
+ // CHECK: %[[OR0:.*]] = llvm.or %[[VALUE_42]], %[[SHIFTED_42]] : i32
+ // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32
+ // CHECK: %[[SHIFTED:.*]] = llvm.shl %[[OR0]], %[[C16]] : i32
+ // CHECK: %[[RES:..*]] = llvm.or %[[OR0]], %[[SHIFTED]] : i32
+ // CHECK: llvm.return %[[RES]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @exotic_target_memset
// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
llvm.func @exotic_target_memset(%memset_value: i8) -> i40 {
@@ -71,6 +114,31 @@ llvm.func @exotic_target_memset(%memset_value: i8) -> i40 {
// -----
+// CHECK-LABEL: llvm.func @exotic_target_memset_inline
+// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
+llvm.func @exotic_target_memset_inline(%memset_value: i8) -> i40 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i40 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 5}> : (!llvm.ptr, i8) -> ()
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i40
+ // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i40) : i40
+ // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]]
+ // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]]
+ // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i40) : i40
+ // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]]
+ // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]]
+ // CHECK: %[[C32:.*]] = llvm.mlir.constant(32 : i40) : i40
+ // CHECK: %[[SHIFTED_COMPL:.*]] = llvm.shl %[[VALUE_32]], %[[C32]]
+ // CHECK: %[[VALUE_COMPL:.*]] = llvm.or %[[VALUE_32]], %[[SHIFTED_COMPL]]
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i40
+ // CHECK: llvm.return %[[VALUE_COMPL]] : i40
+ llvm.return %2 : i40
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @no_volatile_memset
llvm.func @no_volatile_memset() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -89,6 +157,22 @@ llvm.func @no_volatile_memset() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @no_volatile_memset_inline
+llvm.func @no_volatile_memset_inline() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 4 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 4}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.return %2 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @no_partial_memset
llvm.func @no_partial_memset() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -107,6 +191,22 @@ llvm.func @no_partial_memset() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @no_partial_memset_inline
+llvm.func @no_partial_memset_inline() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.return %2 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @no_overflowing_memset
llvm.func @no_overflowing_memset() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -125,6 +225,22 @@ llvm.func @no_overflowing_memset() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @no_overflowing_memset_inline
+llvm.func @no_overflowing_memset_inline() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.return %2 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset
llvm.func @only_byte_aligned_integers_memset() -> i10 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -143,6 +259,22 @@ llvm.func @only_byte_aligned_integers_memset() -> i10 {
// -----
+// CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset_inline
+llvm.func @only_byte_aligned_integers_memset_inline() -> i10 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i10
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i10 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i10
+ llvm.return %2 : i10
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @basic_memcpy
// CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
llvm.func @basic_memcpy(%source: !llvm.ptr) -> i32 {
>From 2b889ac9884f112ab861df7d47e33e0fe6058522 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:46:33 +0800
Subject: [PATCH 10/13] add tests in sroa-intrinsics.mlir
---
mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir | 217 ++++++++++++++++++
1 file changed, 217 insertions(+)
diff --git a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
index ba73025814cc05..f0e671af6bf87f 100644
--- a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
@@ -21,6 +21,25 @@ llvm.func @memset() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline
+llvm.func @memset_inline() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // 16 bytes means it will span over the first 4 i32 entries
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 16}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_partial
llvm.func @memset_partial() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -43,6 +62,26 @@ llvm.func @memset_partial() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_partial
+llvm.func @memset_inline_partial() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ // After SROA, only the second i32 will be actually used. As the memset writes up
+ // to half of it, only 2 bytes will be set.
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // 6 bytes means it will span over the first i32 and half of the second i32.
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_full
llvm.func @memset_full() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -64,6 +103,25 @@ llvm.func @memset_full() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_full
+llvm.func @memset_inline_full() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // 40 bytes means it will span over the entire array
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 40}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_too_much
llvm.func @memset_too_much() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -84,6 +142,24 @@ llvm.func @memset_too_much() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_too_much
+llvm.func @memset_inline_too_much() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // 41 bytes means it will span over the entire array, and then some
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 41 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 41}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_no_volatile
llvm.func @memset_no_volatile() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -103,6 +179,23 @@ llvm.func @memset_no_volatile() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_no_volatile
+llvm.func @memset_inline_no_volatile() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 16 : i64}>
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 16}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @indirect_memset
llvm.func @indirect_memset() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -123,6 +216,24 @@ llvm.func @indirect_memset() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @indirect_memset_inline
+llvm.func @indirect_memset_inline() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // This memset will only cover the selected element.
+ %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @invalid_indirect_memset
llvm.func @invalid_indirect_memset() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -144,6 +255,25 @@ llvm.func @invalid_indirect_memset() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @invalid_indirect_memset_inline
+llvm.func @invalid_indirect_memset_inline() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)>
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // This memset will go slightly beyond one of the elements.
+ // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0]
+ %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
+ // CHECK: "llvm.intr.memset.inline"(%[[GEP]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
+ "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_double_use
llvm.func @memset_double_use() -> i32 {
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -176,6 +306,35 @@ llvm.func @memset_double_use() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_double_use
+llvm.func @memset_inline_double_use() -> i32 {
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+ // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // We expect two generated memset, one for each field.
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // 8 bytes means it will span over the two i32 entries.
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
+ %5 = llvm.load %4 : !llvm.ptr -> f32
+ // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+ %6 = llvm.bitcast %5 : f32 to i32
+ %7 = llvm.add %3, %6 : i32
+ llvm.return %7 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_considers_alignment
llvm.func @memset_considers_alignment() -> i32 {
// CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -207,6 +366,35 @@ llvm.func @memset_considers_alignment() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_considers_alignment
+llvm.func @memset_inline_considers_alignment() -> i32 {
+ // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // 8 bytes means it will span over the i8 and the i32 entry.
+ // Because of padding, the f32 entry will not be touched.
+ // Even though the two i32 are used, only one memset should be generated,
+ // as the second i32 is not touched by the initial memset.
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
+ %5 = llvm.load %4 : !llvm.ptr -> f32
+ // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+ %6 = llvm.bitcast %5 : f32 to i32
+ %7 = llvm.add %3, %6 : i32
+ llvm.return %7 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memset_considers_packing
llvm.func @memset_considers_packing() -> i32 {
// CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -239,6 +427,35 @@ llvm.func @memset_considers_packing() -> i32 {
// -----
+// CHECK-LABEL: llvm.func @memset_inline_considers_packing
+llvm.func @memset_inline_considers_packing() -> i32 {
+ // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+ // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+ // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %memset_value = llvm.mlir.constant(42 : i8) : i8
+ // Now all fields are touched by the memset.
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+ // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 3 : i64}>
+ // CHECK-NOT: "llvm.intr.memset.inline"
+ // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed.
+ "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
+ %5 = llvm.load %4 : !llvm.ptr -> f32
+ // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+ %6 = llvm.bitcast %5 : f32 to i32
+ %7 = llvm.add %3, %6 : i32
+ llvm.return %7 : i32
+}
+
+// -----
+
// CHECK-LABEL: llvm.func @memcpy_dest
// CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {
>From 0e2f43f2fdf38040cf98bb08f0e8aeedb80fa727 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:48:37 +0800
Subject: [PATCH 11/13] rename createMemsetIntrToPlace -> createMemsetIntr
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 23 ++++++++-----------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 2c88c624c4a3a1..ed9100d6cc7235 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1037,11 +1037,10 @@ IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
return op.getLenAttr();
}
template <class MemsetIntr>
-void createMemsetIntrToReplace(OpBuilder &builder, MemsetIntr toReplace,
- IntegerAttr memsetLenAttr,
- uint64_t newMemsetSize,
- DenseMap<Attribute, MemorySlot> &subslots,
- Attribute index) {
+void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+ IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index) {
Value newMemsetSizeValue =
builder
.create<LLVM::ConstantOp>(
@@ -1054,12 +1053,10 @@ void createMemsetIntrToReplace(OpBuilder &builder, MemsetIntr toReplace,
toReplace.getIsVolatile());
}
template <>
-void createMemsetIntrToReplace(OpBuilder &builder,
- LLVM::MemsetInlineOp toReplace,
- IntegerAttr memsetLenAttr,
- uint64_t newMemsetSize,
- DenseMap<Attribute, MemorySlot> &subslots,
- Attribute index) {
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
+ IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index) {
auto newMemsetSizeValue =
IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
@@ -1105,8 +1102,8 @@ memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
// Otherwise, only compute its offset within the original memset.
if (subslots.contains(index)) {
uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
- createMemsetIntrToReplace(builder, op, memsetLenAttr, newMemsetSize,
- subslots, index);
+ createMemsetIntr(builder, op, memsetLenAttr, newMemsetSize, subslots,
+ index);
}
covered += typeSize;
>From fb1518a164af93ee6c1f1ffa97f6d4afe31faf9c Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:50:35 +0800
Subject: [PATCH 12/13] specialize LLVM::MemsetOp
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index ed9100d6cc7235..e966e38d02e21d 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1038,6 +1038,11 @@ IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
}
template <class MemsetIntr>
void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+ IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index);
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
DenseMap<Attribute, MemorySlot> &subslots,
Attribute index) {
>From 3cf1dde520fdc103dd6de8344adb9dd8fc262e4d Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 18:39:26 +0800
Subject: [PATCH 13/13] move and add comments
---
mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 111 ++++++++++--------
1 file changed, 61 insertions(+), 50 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index e966e38d02e21d..b81f30f55ea4a9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -916,6 +916,67 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
return memIntrLen.getZExtValue();
}
+/// Returns an integer attribute representing the length of a memset intrinsic
+template <class MemsetIntr>
+IntegerAttr createMemsetLenAttr(MemsetIntr op) {
+ IntegerAttr memsetLenAttr;
+ bool successfulMatch =
+ matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
+ (void)successfulMatch;
+ assert(successfulMatch);
+ return memsetLenAttr;
+}
+
+/// Returns an integer attribute representing the length of a memset intrinsic
+/// Because MemsetInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
+template <>
+IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
+ return op.getLenAttr();
+}
+
+/// Template function to create memset intrinsic
+template <class MemsetIntr>
+void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+ IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index);
+
+/// Create memset intrinsic
+/// This is specialized for MemsetOp
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
+ IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index) {
+ Value newMemsetSizeValue =
+ builder
+ .create<LLVM::ConstantOp>(
+ toReplace.getLen().getLoc(),
+ IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
+ .getResult();
+
+ builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
+ toReplace.getVal(), newMemsetSizeValue,
+ toReplace.getIsVolatile());
+}
+
+/// Create memset intrinsic
+/// This is specialized for MemsetInlineOp
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
+ IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+ DenseMap<Attribute, MemorySlot> &subslots,
+ Attribute index) {
+
+ auto newMemsetSizeValue =
+ IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
+
+ builder.create<LLVM::MemsetInlineOp>(
+ toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
+ newMemsetSizeValue, toReplace.getIsVolatile());
+}
+
} // namespace
/// Returns whether one can be sure the memory intrinsic does not write outside
@@ -1022,56 +1083,6 @@ memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot,
return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
}
-namespace {
-template <class MemsetIntr>
-IntegerAttr createMemsetLenAttr(MemsetIntr op) {
- IntegerAttr memsetLenAttr;
- bool successfulMatch =
- matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
- (void)successfulMatch;
- assert(successfulMatch);
- return memsetLenAttr;
-}
-template <>
-IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
- return op.getLenAttr();
-}
-template <class MemsetIntr>
-void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
- IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
- DenseMap<Attribute, MemorySlot> &subslots,
- Attribute index);
-template <>
-void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
- IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
- DenseMap<Attribute, MemorySlot> &subslots,
- Attribute index) {
- Value newMemsetSizeValue =
- builder
- .create<LLVM::ConstantOp>(
- toReplace.getLen().getLoc(),
- IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
- .getResult();
-
- builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
- toReplace.getVal(), newMemsetSizeValue,
- toReplace.getIsVolatile());
-}
-template <>
-void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
- IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
- DenseMap<Attribute, MemorySlot> &subslots,
- Attribute index) {
-
- auto newMemsetSizeValue =
- IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
-
- builder.create<LLVM::MemsetInlineOp>(
- toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
- newMemsetSizeValue, toReplace.getIsVolatile());
-}
-} // namespace
-
template <class MemsetIntr>
static DeletionKind
memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
More information about the Mlir-commits
mailing list