[Mlir-commits] [mlir] [mlir][llvm] Add support for memset.inline (PR #115711)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Tue Nov 12 03:13:48 PST 2024


https://github.com/PikachuHyA updated https://github.com/llvm/llvm-project/pull/115711

>From efa6488148c3056ecd7861a0b702126671eb6b6f Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Mon, 11 Nov 2024 20:44:47 +0800
Subject: [PATCH 01/13] [mlir][llvm] Add support for memset.inline

---
 .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td   |  26 ++
 mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp |   4 +
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 255 +++++++++++++-----
 .../test/Target/LLVMIR/llvmir-intrinsics.mlir |   4 +
 4 files changed, 224 insertions(+), 65 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index d07ebbacc60434..85785938405859 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -256,6 +256,32 @@ def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2],
   ];
 }
 
+def LLVM_MemsetInlineOp : LLVM_ZeroResultIntrOp<"memset.inline", [0, 2],
+    [DeclareOpInterfaceMethods<PromotableMemOpInterface>,
+     DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>,
+     DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>],
+    /*requiresAccessGroup=*/1, /*requiresAliasAnalysis=*/1,
+    /*requiresOpBundles=*/0, /*immArgPositions=*/[2, 3],
+    /*immArgAttrNames=*/["len", "isVolatile"]> {
+  dag args = (ins Arg<LLVM_AnyPointer,"",[MemWrite]>:$dst,
+                  I8:$val, APIntAttr:$len, I1Attr:$isVolatile);
+  // Append the alias attributes defined by LLVM_IntrOpBase.
+  let arguments = !con(args, aliasAttrs);
+  let builders = [
+    OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len,
+                    "bool":$isVolatile), [{
+      build($_builder, $_state, dst, val, len,
+            $_builder.getBoolAttr(isVolatile));
+    }]>,
+    OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len,
+                    "IntegerAttr":$isVolatile), [{
+      build($_builder, $_state, dst, val, len, isVolatile,
+            /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
+            /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
+    }]>
+  ];
+}
+
 def LLVM_NoAliasScopeDeclOp
     : LLVM_ZeroResultIntrOp<"experimental.noalias.scope.decl"> {
   let arguments = (ins LLVM_AliasScopeAttr:$scope);
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
index cff16afc73af3f..a59900745d026e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp
@@ -94,6 +94,10 @@ SmallVector<Value> mlir::LLVM::MemsetOp::getAccessedOperands() {
   return {getDst()};
 }
 
+SmallVector<Value> mlir::LLVM::MemsetInlineOp::getAccessedOperands() {
+  return {getDst()};
+}
+
 SmallVector<Value> mlir::LLVM::CallOp::getAccessedOperands() {
   return llvm::to_vector(
       llvm::make_filter_range(getArgOperands(), [](Value arg) {
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 5dc506c14ef96f..e262964c9f348c 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -904,6 +904,14 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemcpyInlineOp op) {
   return memIntrLen.getZExtValue();
 }
 
+template <>
+std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
+  APInt memIntrLen = op.getLen();
+  if (memIntrLen.getBitWidth() > 64)
+    return {};
+  return memIntrLen.getZExtValue();
+}
+
 } // namespace
 
 /// Returns whether one can be sure the memory intrinsic does not write outside
@@ -931,23 +939,37 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
 }
 
 //===----------------------------------------------------------------------===//
-// Interfaces for memset
+// Interfaces for memset && memset.inline
 //===----------------------------------------------------------------------===//
 
-bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
+template <class MemsetLike>
+static bool memsetCanRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+                            SmallPtrSetImpl<Attribute> &usedIndices,
+                            SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+                            const DataLayout &dataLayout) {
+  if (&slot.elemType.getDialect() != op.getOperation()->getDialect())
+    return false;
 
-bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
-  return getDst() == slot.ptr;
+  if (op.getIsVolatile())
+    return false;
+
+  if (!cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap())
+    return false;
+
+  if (!areAllIndicesI32(slot))
+    return false;
+
+  return definitelyWritesOnlyWithinSlot(op, slot, dataLayout);
 }
 
-Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
-                                Value reachingDef,
-                                const DataLayout &dataLayout) {
+template <class MemsetLike>
+static Value memsetGetStored(MemsetLike op, const MemorySlot &slot,
+                             OpBuilder &builder) {
   // TODO: Support non-integer types.
   return TypeSwitch<Type, Value>(slot.elemType)
       .Case([&](IntegerType intType) -> Value {
         if (intType.getWidth() == 8)
-          return getVal();
+          return op.getVal();
 
         assert(intType.getWidth() % 8 == 0);
 
@@ -955,14 +977,14 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
         // or-ing it with the previous value.
         uint64_t coveredBits = 8;
         Value currentValue =
-            builder.create<LLVM::ZExtOp>(getLoc(), intType, getVal());
+            builder.create<LLVM::ZExtOp>(op.getLoc(), intType, op.getVal());
         while (coveredBits < intType.getWidth()) {
-          Value shiftBy =
-              builder.create<LLVM::ConstantOp>(getLoc(), intType, coveredBits);
+          Value shiftBy = builder.create<LLVM::ConstantOp>(op.getLoc(), intType,
+                                                           coveredBits);
           Value shifted =
-              builder.create<LLVM::ShlOp>(getLoc(), currentValue, shiftBy);
+              builder.create<LLVM::ShlOp>(op.getLoc(), currentValue, shiftBy);
           currentValue =
-              builder.create<LLVM::OrOp>(getLoc(), currentValue, shifted);
+              builder.create<LLVM::OrOp>(op.getLoc(), currentValue, shifted);
           coveredBits *= 2;
         }
 
@@ -974,10 +996,12 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
       });
 }
 
-bool LLVM::MemsetOp::canUsesBeRemoved(
-    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
-    SmallVectorImpl<OpOperand *> &newBlockingUses,
-    const DataLayout &dataLayout) {
+template <class MemsetLike>
+static bool
+memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
+                       const SmallPtrSetImpl<OpOperand *> &blockingUses,
+                       SmallVectorImpl<OpOperand *> &newBlockingUses,
+                       const DataLayout &dataLayout) {
   // TODO: Support non-integer types.
   bool canConvertType =
       TypeSwitch<Type, bool>(slot.elemType)
@@ -988,62 +1012,74 @@ bool LLVM::MemsetOp::canUsesBeRemoved(
   if (!canConvertType)
     return false;
 
-  if (getIsVolatile())
+  if (op.getIsVolatile())
     return false;
 
-  return getStaticMemIntrLen(*this) == dataLayout.getTypeSize(slot.elemType);
+  return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
 }
-
-DeletionKind LLVM::MemsetOp::removeBlockingUses(
-    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
-    OpBuilder &builder, Value reachingDefinition,
-    const DataLayout &dataLayout) {
-  return DeletionKind::Delete;
+namespace {
+template <class MemsetLike>
+void createMemsetLenAttr(MemsetLike op, IntegerAttr &memsetLenAttr) {
+  bool successfulMatch =
+      matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
+  (void)successfulMatch;
+  assert(successfulMatch);
 }
-
-LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses(
-    const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
-    const DataLayout &dataLayout) {
-  return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+template <>
+void createMemsetLenAttr(LLVM::MemsetInlineOp op, IntegerAttr &memsetLenAttr) {
+  memsetLenAttr = op.getLenAttr();
+}
+template <class MemsetLike>
+void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
+                               IntegerAttr memsetLenAttr,
+                               uint64_t newMemsetSize,
+                               DenseMap<Attribute, MemorySlot> &subslots,
+                               Attribute index) {
+
+  Value newMemsetSizeValue =
+      builder
+          .create<LLVM::ConstantOp>(
+              toReplace.getLen().getLoc(),
+              IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
+          .getResult();
+
+  builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
+                                 toReplace.getVal(), newMemsetSizeValue,
+                                 toReplace.getIsVolatile());
 }
+template <>
+void createMemsetLikeToReplace(OpBuilder &builder,
+                               LLVM::MemsetInlineOp toReplace,
+                               IntegerAttr memsetLenAttr,
+                               uint64_t newMemsetSize,
+                               DenseMap<Attribute, MemorySlot> &subslots,
+                               Attribute index) {
 
-bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
-                               SmallPtrSetImpl<Attribute> &usedIndices,
-                               SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
-                               const DataLayout &dataLayout) {
-  if (&slot.elemType.getDialect() != getOperation()->getDialect())
-    return false;
-
-  if (getIsVolatile())
-    return false;
-
-  if (!cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap())
-    return false;
-
-  if (!areAllIndicesI32(slot))
-    return false;
+  auto newMemsetSizeValue =
+      IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
 
-  return definitelyWritesOnlyWithinSlot(*this, slot, dataLayout);
+  builder.create<LLVM::MemsetInlineOp>(
+      toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
+      newMemsetSizeValue, toReplace.getIsVolatile());
 }
+} // namespace
+template <class MemsetLike>
+static DeletionKind
+memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+             DenseMap<Attribute, MemorySlot> &subslots, OpBuilder &builder,
+             const DataLayout &dataLayout) {
 
-DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
-                                    DenseMap<Attribute, MemorySlot> &subslots,
-                                    OpBuilder &builder,
-                                    const DataLayout &dataLayout) {
   std::optional<DenseMap<Attribute, Type>> types =
       cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap();
 
   IntegerAttr memsetLenAttr;
-  bool successfulMatch =
-      matchPattern(getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
-  (void)successfulMatch;
-  assert(successfulMatch);
+  createMemsetLenAttr(op, memsetLenAttr);
 
   bool packed = false;
   if (auto structType = dyn_cast<LLVM::LLVMStructType>(slot.elemType))
     packed = structType.isPacked();
 
-  Type i32 = IntegerType::get(getContext(), 32);
+  Type i32 = IntegerType::get(op.getContext(), 32);
   uint64_t memsetLen = memsetLenAttr.getValue().getZExtValue();
   uint64_t covered = 0;
   for (size_t i = 0; i < types->size(); i++) {
@@ -1063,16 +1099,8 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
     // Otherwise, only compute its offset within the original memset.
     if (subslots.contains(index)) {
       uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
-
-      Value newMemsetSizeValue =
-          builder
-              .create<LLVM::ConstantOp>(
-                  getLen().getLoc(),
-                  IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
-              .getResult();
-
-      builder.create<LLVM::MemsetOp>(getLoc(), subslots.at(index).ptr, getVal(),
-                                     newMemsetSizeValue, getIsVolatile());
+      createMemsetLikeToReplace(builder, op, memsetLenAttr, newMemsetSize,
+                                subslots, index);
     }
 
     covered += typeSize;
@@ -1081,6 +1109,103 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
   return DeletionKind::Delete;
 }
 
+bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) {
+  return getDst() == slot.ptr;
+}
+
+Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder,
+                                Value reachingDef,
+                                const DataLayout &dataLayout) {
+  return memsetGetStored(*this, slot, builder);
+}
+
+bool LLVM::MemsetOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses,
+                                dataLayout);
+}
+
+DeletionKind LLVM::MemsetOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  return DeletionKind::Delete;
+}
+
+LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses(
+    const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+    const DataLayout &dataLayout) {
+  return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+}
+
+bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot,
+                               SmallPtrSetImpl<Attribute> &usedIndices,
+                               SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+                               const DataLayout &dataLayout) {
+  return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed,
+                         dataLayout);
+}
+
+DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot,
+                                    DenseMap<Attribute, MemorySlot> &subslots,
+                                    OpBuilder &builder,
+                                    const DataLayout &dataLayout) {
+  return memsetRewire(*this, slot, subslots, builder, dataLayout);
+}
+
+bool LLVM::MemsetInlineOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool LLVM::MemsetInlineOp::storesTo(const MemorySlot &slot) {
+  return getDst() == slot.ptr;
+}
+
+Value LLVM::MemsetInlineOp::getStored(const MemorySlot &slot,
+                                      OpBuilder &builder, Value reachingDef,
+                                      const DataLayout &dataLayout) {
+  return memsetGetStored(*this, slot, builder);
+}
+
+bool LLVM::MemsetInlineOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses,
+                                dataLayout);
+}
+
+DeletionKind LLVM::MemsetInlineOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  return DeletionKind::Delete;
+}
+
+LogicalResult LLVM::MemsetInlineOp::ensureOnlySafeAccesses(
+    const MemorySlot &slot, SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+    const DataLayout &dataLayout) {
+  return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout));
+}
+
+bool LLVM::MemsetInlineOp::canRewire(
+    const DestructurableMemorySlot &slot,
+    SmallPtrSetImpl<Attribute> &usedIndices,
+    SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
+    const DataLayout &dataLayout) {
+  return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed,
+                         dataLayout);
+}
+
+DeletionKind
+LLVM::MemsetInlineOp::rewire(const DestructurableMemorySlot &slot,
+                             DenseMap<Attribute, MemorySlot> &subslots,
+                             OpBuilder &builder, const DataLayout &dataLayout) {
+  return memsetRewire(*this, slot, subslots, builder, dataLayout);
+}
+
 //===----------------------------------------------------------------------===//
 // Interfaces for memcpy/memmove
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index cb712eb4e1262d..9d45f219cf746e 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -533,6 +533,10 @@ llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) {
   %i1 = llvm.mlir.constant(false) : i1
   // CHECK: call void @llvm.memset.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 %{{.*}}, i1 false
   "llvm.intr.memset"(%arg2, %arg3, %arg0) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
+  // CHECK: call void @llvm.memset.inline.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 10, i1 true
+  "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i32}> : (!llvm.ptr, i8) -> ()
+  // CHECK: call void @llvm.memset.inline.p0.i64(ptr %{{.*}}, i8 %{{.*}}, i64 10, i1 true
+  "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i64}> : (!llvm.ptr, i8) -> ()
   llvm.return
 }
 

>From 128be764f40b54bb856686ce3cab386e17dd464b Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:50:43 +0800
Subject: [PATCH 02/13] change && to and

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index e262964c9f348c..bab64cce815dad 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -939,7 +939,7 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
 }
 
 //===----------------------------------------------------------------------===//
-// Interfaces for memset && memset.inline
+// Interfaces for memset and memset.inline
 //===----------------------------------------------------------------------===//
 
 template <class MemsetLike>

>From 99827b93c21ef2ab0c91c45f5957127c3c9ec011 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:52:20 +0800
Subject: [PATCH 03/13] refactor createMemsetLenAttr

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index bab64cce815dad..7823c8c5b24810 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1019,15 +1019,17 @@ memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
 }
 namespace {
 template <class MemsetLike>
-void createMemsetLenAttr(MemsetLike op, IntegerAttr &memsetLenAttr) {
+IntegerAttr createMemsetLenAttr(MemsetLike op) {
+  IntegerAttr memsetLenAttr;
   bool successfulMatch =
       matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
   (void)successfulMatch;
   assert(successfulMatch);
+  return memsetLenAttr;
 }
 template <>
-void createMemsetLenAttr(LLVM::MemsetInlineOp op, IntegerAttr &memsetLenAttr) {
-  memsetLenAttr = op.getLenAttr();
+IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
+  return op.getLenAttr();
 }
 template <class MemsetLike>
 void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,

>From 774bd17acfe3ef41d11b45e2d5876cd7ac8ea6c5 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:55:30 +0800
Subject: [PATCH 04/13] clean newline

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 7823c8c5b24810..a72ae457e1336b 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1017,6 +1017,7 @@ memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
 
   return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
 }
+
 namespace {
 template <class MemsetLike>
 IntegerAttr createMemsetLenAttr(MemsetLike op) {
@@ -1037,7 +1038,6 @@ void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
                                uint64_t newMemsetSize,
                                DenseMap<Attribute, MemorySlot> &subslots,
                                Attribute index) {
-
   Value newMemsetSizeValue =
       builder
           .create<LLVM::ConstantOp>(
@@ -1065,6 +1065,7 @@ void createMemsetLikeToReplace(OpBuilder &builder,
       newMemsetSizeValue, toReplace.getIsVolatile());
 }
 } // namespace
+
 template <class MemsetLike>
 static DeletionKind
 memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,

>From d64e0b0f6e215597933f8f665f5f3c1a0c9ee63f Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 14:57:14 +0800
Subject: [PATCH 05/13] rename MemsetLike -> MemsetIntr

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index a72ae457e1336b..80b65a82d9b4b9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -942,8 +942,8 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) {
 // Interfaces for memset and memset.inline
 //===----------------------------------------------------------------------===//
 
-template <class MemsetLike>
-static bool memsetCanRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+template <class MemsetIntr>
+static bool memsetCanRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
                             SmallPtrSetImpl<Attribute> &usedIndices,
                             SmallVectorImpl<MemorySlot> &mustBeSafelyUsed,
                             const DataLayout &dataLayout) {
@@ -962,8 +962,8 @@ static bool memsetCanRewire(MemsetLike op, const DestructurableMemorySlot &slot,
   return definitelyWritesOnlyWithinSlot(op, slot, dataLayout);
 }
 
-template <class MemsetLike>
-static Value memsetGetStored(MemsetLike op, const MemorySlot &slot,
+template <class MemsetIntr>
+static Value memsetGetStored(MemsetIntr op, const MemorySlot &slot,
                              OpBuilder &builder) {
   // TODO: Support non-integer types.
   return TypeSwitch<Type, Value>(slot.elemType)
@@ -996,9 +996,9 @@ static Value memsetGetStored(MemsetLike op, const MemorySlot &slot,
       });
 }
 
-template <class MemsetLike>
+template <class MemsetIntr>
 static bool
-memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
+memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot,
                        const SmallPtrSetImpl<OpOperand *> &blockingUses,
                        SmallVectorImpl<OpOperand *> &newBlockingUses,
                        const DataLayout &dataLayout) {
@@ -1019,8 +1019,8 @@ memsetCanUsesBeRemoved(MemsetLike op, const MemorySlot &slot,
 }
 
 namespace {
-template <class MemsetLike>
-IntegerAttr createMemsetLenAttr(MemsetLike op) {
+template <class MemsetIntr>
+IntegerAttr createMemsetLenAttr(MemsetIntr op) {
   IntegerAttr memsetLenAttr;
   bool successfulMatch =
       matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
@@ -1032,8 +1032,8 @@ template <>
 IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
   return op.getLenAttr();
 }
-template <class MemsetLike>
-void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
+template <class MemsetIntr>
+void createMemsetIntrToReplace(OpBuilder &builder, MemsetIntr toReplace,
                                IntegerAttr memsetLenAttr,
                                uint64_t newMemsetSize,
                                DenseMap<Attribute, MemorySlot> &subslots,
@@ -1050,7 +1050,7 @@ void createMemsetLikeToReplace(OpBuilder &builder, MemsetLike toReplace,
                                  toReplace.getIsVolatile());
 }
 template <>
-void createMemsetLikeToReplace(OpBuilder &builder,
+void createMemsetIntrToReplace(OpBuilder &builder,
                                LLVM::MemsetInlineOp toReplace,
                                IntegerAttr memsetLenAttr,
                                uint64_t newMemsetSize,
@@ -1066,9 +1066,9 @@ void createMemsetLikeToReplace(OpBuilder &builder,
 }
 } // namespace
 
-template <class MemsetLike>
+template <class MemsetIntr>
 static DeletionKind
-memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
+memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
              DenseMap<Attribute, MemorySlot> &subslots, OpBuilder &builder,
              const DataLayout &dataLayout) {
 
@@ -1102,7 +1102,7 @@ memsetRewire(MemsetLike op, const DestructurableMemorySlot &slot,
     // Otherwise, only compute its offset within the original memset.
     if (subslots.contains(index)) {
       uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
-      createMemsetLikeToReplace(builder, op, memsetLenAttr, newMemsetSize,
+      createMemsetIntrToReplace(builder, op, memsetLenAttr, newMemsetSize,
                                 subslots, index);
     }
 

>From 81546f5d7b2c22584555b44592330cf1b2fc901c Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 15:15:46 +0800
Subject: [PATCH 06/13] fix call createMemsetLenAttr

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 80b65a82d9b4b9..5d7d213c2cc408 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1075,8 +1075,7 @@ memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
   std::optional<DenseMap<Attribute, Type>> types =
       cast<DestructurableTypeInterface>(slot.elemType).getSubelementIndexMap();
 
-  IntegerAttr memsetLenAttr;
-  createMemsetLenAttr(op, memsetLenAttr);
+  IntegerAttr memsetLenAttr = createMemsetLenAttr(op);
 
   bool packed = false;
   if (auto structType = dyn_cast<LLVM::LLVMStructType>(slot.elemType))

>From 41db2825fc9adc6c895fe19fc99b12876a66e54e Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 15:16:25 +0800
Subject: [PATCH 07/13] add test llvm.intr.memset.inline in intrinsic.ll

---
 mlir/test/Target/LLVMIR/Import/intrinsic.ll | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index 606b11175f572f..e857e252ff0839 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -505,6 +505,10 @@ define void @memmove_test(i32 %0, ptr %1, ptr %2) {
 define void @memset_test(i32 %0, ptr %1, i8 %2) {
   ; CHECK: "llvm.intr.memset"(%{{.*}}, %{{.*}}, %{{.*}}) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> ()
   call void @llvm.memset.p0.i32(ptr %1, i8 %2, i32 %0, i1 false)
+  ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i64}> : (!llvm.ptr, i8) -> ()
+  call void @llvm.memset.inline.p0.i64(ptr %1, i8 %2, i64 10, i1 false)
+  ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i32}> : (!llvm.ptr, i8) -> ()
+  call void @llvm.memset.inline.p0.i32(ptr %1, i8 %2, i32 10, i1 false)
   ret void
 }
 

>From bc756d974cca3772d4eaa9532ce4cbb3341cf165 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 16:42:32 +0800
Subject: [PATCH 08/13] add comment of getStaticMemIntrLen

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 5d7d213c2cc408..2c88c624c4a3a1 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -904,6 +904,10 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemcpyInlineOp op) {
   return memIntrLen.getZExtValue();
 }
 
+/// Returns the length of the given memory intrinsic in bytes if it can be known
+/// at compile-time on a best-effort basis, nothing otherwise.
+/// Because MemsetInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
 template <>
 std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
   APInt memIntrLen = op.getLen();

>From c0ad3c9fa4a557053665a234f0bd77adcf1c16a6 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:18:25 +0800
Subject: [PATCH 09/13] add tests in mem2reg-intrinsics.mlir

---
 .../Dialect/LLVMIR/mem2reg-intrinsics.mlir    | 132 ++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
index 4fc80a87f20df5..646667505a3732 100644
--- a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir
@@ -23,6 +23,28 @@ llvm.func @basic_memset(%memset_value: i8) -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @basic_memset_inline
+// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
+llvm.func @basic_memset_inline(%memset_value: i8) -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4 : i32}> : (!llvm.ptr, i8) -> ()
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i32
+  // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]]
+  // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]]
+  // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32
+  // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]]
+  // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]]
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[VALUE_32]] : i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @basic_memset_constant
 llvm.func @basic_memset_constant() -> i32 {
   %0 = llvm.mlir.constant(1 : i32) : i32
@@ -45,6 +67,27 @@ llvm.func @basic_memset_constant() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @basic_memset_inline_constant
+llvm.func @basic_memset_inline_constant() -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[C42:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: %[[VALUE_42:.*]] = llvm.zext %[[C42]] : i8 to i32
+  // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32
+  // CHECK: %[[SHIFTED_42:.*]] = llvm.shl %[[VALUE_42]], %[[C8]]  : i32
+  // CHECK: %[[OR0:.*]] = llvm.or %[[VALUE_42]], %[[SHIFTED_42]]  : i32
+  // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32
+  // CHECK: %[[SHIFTED:.*]] = llvm.shl %[[OR0]], %[[C16]]  : i32
+  // CHECK: %[[RES:..*]] = llvm.or %[[OR0]], %[[SHIFTED]]  : i32
+  // CHECK: llvm.return %[[RES]] : i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @exotic_target_memset
 // CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
 llvm.func @exotic_target_memset(%memset_value: i8) -> i40 {
@@ -71,6 +114,31 @@ llvm.func @exotic_target_memset(%memset_value: i8) -> i40 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @exotic_target_memset_inline
+// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8)
+llvm.func @exotic_target_memset_inline(%memset_value: i8) -> i40 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i40 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 5}> : (!llvm.ptr, i8) -> ()
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i40
+  // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i40) : i40
+  // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]]
+  // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]]
+  // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i40) : i40
+  // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]]
+  // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]]
+  // CHECK: %[[C32:.*]] = llvm.mlir.constant(32 : i40) : i40
+  // CHECK: %[[SHIFTED_COMPL:.*]] = llvm.shl %[[VALUE_32]], %[[C32]]
+  // CHECK: %[[VALUE_COMPL:.*]] = llvm.or %[[VALUE_32]], %[[SHIFTED_COMPL]]
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i40
+  // CHECK: llvm.return %[[VALUE_COMPL]] : i40
+  llvm.return %2 : i40
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @no_volatile_memset
 llvm.func @no_volatile_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -89,6 +157,22 @@ llvm.func @no_volatile_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @no_volatile_memset_inline
+llvm.func @no_volatile_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 4}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @no_partial_memset
 llvm.func @no_partial_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -107,6 +191,22 @@ llvm.func @no_partial_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @no_partial_memset_inline
+llvm.func @no_partial_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @no_overflowing_memset
 llvm.func @no_overflowing_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -125,6 +225,22 @@ llvm.func @no_overflowing_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @no_overflowing_memset_inline
+llvm.func @no_overflowing_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %2 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset
 llvm.func @only_byte_aligned_integers_memset() -> i10 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -143,6 +259,22 @@ llvm.func @only_byte_aligned_integers_memset() -> i10 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset_inline
+llvm.func @only_byte_aligned_integers_memset_inline() -> i10 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i10
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i10 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i10
+  llvm.return %2 : i10
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @basic_memcpy
 // CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr)
 llvm.func @basic_memcpy(%source: !llvm.ptr) -> i32 {

>From 2b889ac9884f112ab861df7d47e33e0fe6058522 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:46:33 +0800
Subject: [PATCH 10/13] add tests in sroa-intrinsics.mlir

---
 mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir | 217 ++++++++++++++++++
 1 file changed, 217 insertions(+)

diff --git a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
index ba73025814cc05..f0e671af6bf87f 100644
--- a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
+++ b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir
@@ -21,6 +21,25 @@ llvm.func @memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline
+llvm.func @memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 16 bytes means it will span over the first 4 i32 entries
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 16}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_partial
 llvm.func @memset_partial() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -43,6 +62,26 @@ llvm.func @memset_partial() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_partial
+llvm.func @memset_inline_partial() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only the second i32 will be actually used. As the memset writes up
+  // to half of it, only 2 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 6 bytes means it will span over the first i32 and half of the second i32.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_full
 llvm.func @memset_full() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -64,6 +103,25 @@ llvm.func @memset_full() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_full
+llvm.func @memset_inline_full() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 40 bytes means it will span over the entire array
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 40}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_too_much
 llvm.func @memset_too_much() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -84,6 +142,24 @@ llvm.func @memset_too_much() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_too_much
+llvm.func @memset_inline_too_much() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 41 bytes means it will span over the entire array, and then some
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 41 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 41}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_no_volatile
 llvm.func @memset_no_volatile() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -103,6 +179,23 @@ llvm.func @memset_no_volatile() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_no_volatile
+llvm.func @memset_inline_no_volatile() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32>
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 16 : i64}>
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 16}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @indirect_memset
 llvm.func @indirect_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -123,6 +216,24 @@ llvm.func @indirect_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @indirect_memset_inline
+llvm.func @indirect_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // This memset will only cover the selected element.
+  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> ()
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @invalid_indirect_memset
 llvm.func @invalid_indirect_memset() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -144,6 +255,25 @@ llvm.func @invalid_indirect_memset() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @invalid_indirect_memset_inline
+llvm.func @invalid_indirect_memset_inline() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)>
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // This memset will go slightly beyond one of the elements.
+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0]
+  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)>
+  // CHECK: "llvm.intr.memset.inline"(%[[GEP]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}>
+  "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> ()
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  llvm.return %3 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_double_use
 llvm.func @memset_double_use() -> i32 {
   // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -176,6 +306,35 @@ llvm.func @memset_double_use() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_double_use
+llvm.func @memset_inline_double_use() -> i32 {
+  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // We expect two generated memset, one for each field.
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // After SROA, only one i32 will be actually used, so only 4 bytes will be set.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // 8 bytes means it will span over the two i32 entries.
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)>
+  %5 = llvm.load %4 : !llvm.ptr -> f32
+  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+  %6 = llvm.bitcast %5 : f32 to i32
+  %7 = llvm.add %3, %6 : i32
+  llvm.return %7 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_considers_alignment
 llvm.func @memset_considers_alignment() -> i32 {
   // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -207,6 +366,35 @@ llvm.func @memset_considers_alignment() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_considers_alignment
+llvm.func @memset_inline_considers_alignment() -> i32 {
+  // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // 8 bytes means it will span over the i8 and the i32 entry.
+  // Because of padding, the f32 entry will not be touched.
+  // Even though the two i32 are used, only one memset should be generated,
+  // as the second i32 is not touched by the initial memset.
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)>
+  %5 = llvm.load %4 : !llvm.ptr -> f32
+  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+  %6 = llvm.bitcast %5 : f32 to i32
+  %7 = llvm.add %3, %6 : i32
+  llvm.return %7 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memset_considers_packing
 llvm.func @memset_considers_packing() -> i32 {
   // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
@@ -239,6 +427,35 @@ llvm.func @memset_considers_packing() -> i32 {
 
 // -----
 
+// CHECK-LABEL: llvm.func @memset_inline_considers_packing
+llvm.func @memset_inline_considers_packing() -> i32 {
+  // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32
+  // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32
+  // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %memset_value = llvm.mlir.constant(42 : i8) : i8
+  // Now all fields are touched by the memset.
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set.
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}>
+  // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 3 : i64}>
+  // CHECK-NOT: "llvm.intr.memset.inline"
+  // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed.
+  "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> ()
+  %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
+  %3 = llvm.load %2 : !llvm.ptr -> i32
+  %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)>
+  %5 = llvm.load %4 : !llvm.ptr -> f32
+  // We use this exotic bitcast to use the f32 easily. Semantics do not matter here.
+  %6 = llvm.bitcast %5 : f32 to i32
+  %7 = llvm.add %3, %6 : i32
+  llvm.return %7 : i32
+}
+
+// -----
+
 // CHECK-LABEL: llvm.func @memcpy_dest
 // CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr)
 llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 {

>From 0e2f43f2fdf38040cf98bb08f0e8aeedb80fa727 Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:48:37 +0800
Subject: [PATCH 11/13] rename createMemsetIntrToPlace -> createMemsetIntr

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 2c88c624c4a3a1..ed9100d6cc7235 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1037,11 +1037,10 @@ IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
   return op.getLenAttr();
 }
 template <class MemsetIntr>
-void createMemsetIntrToReplace(OpBuilder &builder, MemsetIntr toReplace,
-                               IntegerAttr memsetLenAttr,
-                               uint64_t newMemsetSize,
-                               DenseMap<Attribute, MemorySlot> &subslots,
-                               Attribute index) {
+void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index) {
   Value newMemsetSizeValue =
       builder
           .create<LLVM::ConstantOp>(
@@ -1054,12 +1053,10 @@ void createMemsetIntrToReplace(OpBuilder &builder, MemsetIntr toReplace,
                                  toReplace.getIsVolatile());
 }
 template <>
-void createMemsetIntrToReplace(OpBuilder &builder,
-                               LLVM::MemsetInlineOp toReplace,
-                               IntegerAttr memsetLenAttr,
-                               uint64_t newMemsetSize,
-                               DenseMap<Attribute, MemorySlot> &subslots,
-                               Attribute index) {
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index) {
 
   auto newMemsetSizeValue =
       IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
@@ -1105,8 +1102,8 @@ memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,
     // Otherwise, only compute its offset within the original memset.
     if (subslots.contains(index)) {
       uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize);
-      createMemsetIntrToReplace(builder, op, memsetLenAttr, newMemsetSize,
-                                subslots, index);
+      createMemsetIntr(builder, op, memsetLenAttr, newMemsetSize, subslots,
+                       index);
     }
 
     covered += typeSize;

>From fb1518a164af93ee6c1f1ffa97f6d4afe31faf9c Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:50:35 +0800
Subject: [PATCH 12/13] specialize LLVM::MemsetOp

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index ed9100d6cc7235..e966e38d02e21d 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -1038,6 +1038,11 @@ IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
 }
 template <class MemsetIntr>
 void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index);
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
                       IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
                       DenseMap<Attribute, MemorySlot> &subslots,
                       Attribute index) {

>From 3cf1dde520fdc103dd6de8344adb9dd8fc262e4d Mon Sep 17 00:00:00 2001
From: PikachuHy <pikachuhy at linux.alibaba.com>
Date: Tue, 12 Nov 2024 18:39:26 +0800
Subject: [PATCH 13/13] move and add comments

---
 mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 111 ++++++++++--------
 1 file changed, 61 insertions(+), 50 deletions(-)

diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index e966e38d02e21d..b81f30f55ea4a9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -916,6 +916,67 @@ std::optional<uint64_t> getStaticMemIntrLen(LLVM::MemsetInlineOp op) {
   return memIntrLen.getZExtValue();
 }
 
+/// Returns an integer attribute representing the length of a memset intrinsic
+template <class MemsetIntr>
+IntegerAttr createMemsetLenAttr(MemsetIntr op) {
+  IntegerAttr memsetLenAttr;
+  bool successfulMatch =
+      matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
+  (void)successfulMatch;
+  assert(successfulMatch);
+  return memsetLenAttr;
+}
+
+/// Returns an integer attribute representing the length of a memset intrinsic
+/// Because MemsetInlineOp has its length encoded as an attribute, this requires
+/// specialized handling.
+template <>
+IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
+  return op.getLenAttr();
+}
+
+/// Template function to create memset intrinsic
+template <class MemsetIntr>
+void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index);
+
+/// Create memset intrinsic
+/// This is specialized for MemsetOp
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index) {
+  Value newMemsetSizeValue =
+      builder
+          .create<LLVM::ConstantOp>(
+              toReplace.getLen().getLoc(),
+              IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
+          .getResult();
+
+  builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
+                                 toReplace.getVal(), newMemsetSizeValue,
+                                 toReplace.getIsVolatile());
+}
+
+/// Create memset intrinsic
+/// This is specialized for MemsetInlineOp
+template <>
+void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
+                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
+                      DenseMap<Attribute, MemorySlot> &subslots,
+                      Attribute index) {
+
+  auto newMemsetSizeValue =
+      IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
+
+  builder.create<LLVM::MemsetInlineOp>(
+      toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
+      newMemsetSizeValue, toReplace.getIsVolatile());
+}
+
 } // namespace
 
 /// Returns whether one can be sure the memory intrinsic does not write outside
@@ -1022,56 +1083,6 @@ memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot,
   return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType);
 }
 
-namespace {
-template <class MemsetIntr>
-IntegerAttr createMemsetLenAttr(MemsetIntr op) {
-  IntegerAttr memsetLenAttr;
-  bool successfulMatch =
-      matchPattern(op.getLen(), m_Constant<IntegerAttr>(&memsetLenAttr));
-  (void)successfulMatch;
-  assert(successfulMatch);
-  return memsetLenAttr;
-}
-template <>
-IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) {
-  return op.getLenAttr();
-}
-template <class MemsetIntr>
-void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace,
-                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
-                      DenseMap<Attribute, MemorySlot> &subslots,
-                      Attribute index);
-template <>
-void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace,
-                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
-                      DenseMap<Attribute, MemorySlot> &subslots,
-                      Attribute index) {
-  Value newMemsetSizeValue =
-      builder
-          .create<LLVM::ConstantOp>(
-              toReplace.getLen().getLoc(),
-              IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize))
-          .getResult();
-
-  builder.create<LLVM::MemsetOp>(toReplace.getLoc(), subslots.at(index).ptr,
-                                 toReplace.getVal(), newMemsetSizeValue,
-                                 toReplace.getIsVolatile());
-}
-template <>
-void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace,
-                      IntegerAttr memsetLenAttr, uint64_t newMemsetSize,
-                      DenseMap<Attribute, MemorySlot> &subslots,
-                      Attribute index) {
-
-  auto newMemsetSizeValue =
-      IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize);
-
-  builder.create<LLVM::MemsetInlineOp>(
-      toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(),
-      newMemsetSizeValue, toReplace.getIsVolatile());
-}
-} // namespace
-
 template <class MemsetIntr>
 static DeletionKind
 memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot,



More information about the Mlir-commits mailing list