[llvm] [IR] Initial introduction of memset_pattern (PR #94992)

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 10 07:53:32 PDT 2024


https://github.com/asb created https://github.com/llvm/llvm-project/pull/94992

None

>From 920fc95f1735d9fce0524a629a20c77978e4135e Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 May 2024 12:47:05 +0100
Subject: [PATCH] [IR] Initial introduction of memset_pattern

---
 llvm/docs/LangRef.rst                         |  53 ++
 llvm/include/llvm/CodeGen/SelectionDAG.h      |   6 +
 llvm/include/llvm/IR/InstVisitor.h            |   3 +
 llvm/include/llvm/IR/IntrinsicInst.h          |  22 +-
 llvm/include/llvm/IR/Intrinsics.td            |   9 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  41 ++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  18 +
 llvm/lib/IR/Verifier.cpp                      |   3 +-
 .../CodeGen/RISCV/memset-pattern-inline.ll    | 591 ++++++++++++++++++
 llvm/test/Verifier/intrinsic-immarg.ll        |  17 +
 llvm/test/Verifier/memset-pattern-inline.ll   |   9 +
 11 files changed, 770 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/memset-pattern-inline.ll
 create mode 100644 llvm/test/Verifier/memset-pattern-inline.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9fb2c048a5c86..6a6d22eaac2d1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15234,6 +15234,59 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
 '``llvm.memset.*``', but the generated code is guaranteed not to call any
 external functions.
 
+.. _int_memset_pattern_inline:
+
+'``llvm.memset_pattern.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memset_pattern.inline`` on
+any integer bit width and for different address spaces. Not all targets
+support all bit widths however.
+
+::
+
+      declare void @llvm.memset_pattern.inline.p0.i64.i128(ptr <dest>, i128 <val>,
+                                                   i64 <len>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memset_pattern.inline.*``' intrinsics fill a block of memory with
+a particular value and guarantees that no external functions are called.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination to fill, the second
+is the value with which to fill it, the third argument is a constant integer
+argument specifying the number of bytes to fill, and the fourth is a boolean
+indicating a volatile access.
+
+The :ref:`align <attr_align>` parameter attribute can be provided
+for the first argument.
+
+If the ``isvolatile`` parameter is ``true``, the
+``llvm.memset_pattern.inline`` call is a :ref:`volatile operation <volatile>`.
+The detailed access behavior is not very cleanly specified and it is unwise to
+depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memset_pattern.inline.*``' intrinsics fill "len" bytes of memory
+starting at the destination location. If the argument is known to be aligned
+to some boundary, this can be specified as an attribute on the argument.
+
+``len`` must be a constant expression.
+If ``<len>`` is 0, it is no-op modulo the behavior of attributes attached to
+the arguments.
+If ``<len>`` is not a well-defined value, the behavior is undefined.
+If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
+behavior is undefined.
+
 .. _int_sqrt:
 
 '``llvm.sqrt.*``' Intrinsic
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 7b0e5e7d9504b..f5d5169500432 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1205,6 +1205,12 @@ class SelectionDAG {
                     MachinePointerInfo DstPtrInfo,
                     const AAMDNodes &AAInfo = AAMDNodes());
 
+  SDValue getMemsetPatternInline(SDValue Chain, const SDLoc &dl, SDValue Dst,
+                                 SDValue Src, SDValue Size, Align Alignment,
+                                 bool isVol, bool isTailCall,
+                                 MachinePointerInfo DstPtrInfo,
+                                 const AAMDNodes &AAInfo = AAMDNodes());
+
   SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
                           SDValue Src, SDValue Size, Type *SizeTy,
                           unsigned ElemSz, bool isTailCall,
diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h
index 311e0ac47ddfa..30a9df8a32184 100644
--- a/llvm/include/llvm/IR/InstVisitor.h
+++ b/llvm/include/llvm/IR/InstVisitor.h
@@ -208,6 +208,7 @@ class InstVisitor {
   RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
   RetTy visitMemSetInst(MemSetInst &I)            { DELEGATE(MemIntrinsic); }
   RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); }
+  RetTy visitMemSetPatternInlineInst(MemSetPatternInlineInst &I){ DELEGATE(MemSetInst); }
   RetTy visitMemCpyInst(MemCpyInst &I)            { DELEGATE(MemTransferInst); }
   RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); }
   RetTy visitMemMoveInst(MemMoveInst &I)          { DELEGATE(MemTransferInst); }
@@ -295,6 +296,8 @@ class InstVisitor {
       case Intrinsic::memset:      DELEGATE(MemSetInst);
       case Intrinsic::memset_inline:
         DELEGATE(MemSetInlineInst);
+      case Intrinsic::memset_pattern_inline:
+        DELEGATE(MemSetPatternInlineInst);
       case Intrinsic::vastart:     DELEGATE(VAStartInst);
       case Intrinsic::vaend:       DELEGATE(VAEndInst);
       case Intrinsic::vacopy:      DELEGATE(VACopyInst);
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 9010e1a1c896b..184cc47e48c1e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1170,6 +1170,7 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
     case Intrinsic::memmove:
     case Intrinsic::memset:
     case Intrinsic::memset_inline:
+    case Intrinsic::memset_pattern_inline:
     case Intrinsic::memcpy_inline:
       return true;
     default:
@@ -1181,7 +1182,8 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
   }
 };
 
-/// This class wraps the llvm.memset and llvm.memset.inline intrinsics.
+/// This class wraps the llvm.memset, llvm.memset.inline, and
+/// llvm.memset_pattern.inline intrinsics.
 class MemSetInst : public MemSetBase<MemIntrinsic> {
 public:
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -1189,6 +1191,7 @@ class MemSetInst : public MemSetBase<MemIntrinsic> {
     switch (I->getIntrinsicID()) {
     case Intrinsic::memset:
     case Intrinsic::memset_inline:
+    case Intrinsic::memset_pattern_inline:
       return true;
     default:
       return false;
@@ -1214,6 +1217,21 @@ class MemSetInlineInst : public MemSetInst {
   }
 };
 
+/// This class wraps the llvm.memset.inline intrinsic.
+class MemSetPatternInlineInst : public MemSetInst {
+public:
+  ConstantInt *getLength() const {
+    return cast<ConstantInt>(MemSetInst::getLength());
+  }
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::memset_pattern_inline;
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
 /// This class wraps the llvm.memcpy/memmove intrinsics.
 class MemTransferInst : public MemTransferBase<MemIntrinsic> {
 public:
@@ -1293,6 +1311,7 @@ class AnyMemIntrinsic : public MemIntrinsicBase<AnyMemIntrinsic> {
     case Intrinsic::memmove:
     case Intrinsic::memset:
     case Intrinsic::memset_inline:
+    case Intrinsic::memset_pattern_inline:
     case Intrinsic::memcpy_element_unordered_atomic:
     case Intrinsic::memmove_element_unordered_atomic:
     case Intrinsic::memset_element_unordered_atomic:
@@ -1315,6 +1334,7 @@ class AnyMemSetInst : public MemSetBase<AnyMemIntrinsic> {
     switch (I->getIntrinsicID()) {
     case Intrinsic::memset:
     case Intrinsic::memset_inline:
+    case Intrinsic::memset_pattern_inline:
     case Intrinsic::memset_element_unordered_atomic:
       return true;
     default:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 107442623ab7b..a7f2818506886 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1008,6 +1008,15 @@ def int_memset_inline
        NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
        ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
 
+// Memset variant that writes a given pattern. Like memset.inline, this is
+// guaranteed not to call any external function.
+def int_memset_pattern_inline
+    : Intrinsic<[],
+      [llvm_anyptr_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_i1_ty],
+      [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
+       NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
+       ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>], "llvm.memset_pattern.inline">;
+
 // FIXME: Add version of these floating point intrinsics which allow non-default
 // rounding modes and FP exception handling.
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 523d3aea66225..8a9faefbf2a8c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8484,6 +8484,47 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
   return CallResult.second;
 }
 
+SDValue SelectionDAG::getMemsetPatternInline(SDValue Chain, const SDLoc &dl,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, Align Alignment,
+                                             bool isVol, bool isTailCall,
+                                             MachinePointerInfo DstPtrInfo,
+                                             const AAMDNodes &AAInfo) {
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize->isZero())
+    return Chain;
+
+  uint64_t SrcWidth = Src.getScalarValueSizeInBits() / 8;
+  unsigned NumFullWidthStores = ConstantSize->getZExtValue() / SrcWidth;
+  unsigned RemainingBytes = ConstantSize->getZExtValue() % SrcWidth;
+  SmallVector<SDValue, 8> OutChains;
+  uint64_t DstOff = 0;
+
+  for (unsigned i = 0; i < ConstantSize->getZExtValue() / SrcWidth; i++) {
+    SDValue Store = getStore(
+        Chain, dl, Src,
+        getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        DstPtrInfo.getWithOffset(DstOff), Alignment,
+        isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
+        AAInfo);
+    OutChains.push_back(Store);
+    DstOff += Src.getValueType().getSizeInBits() / 8;
+  }
+
+  if (RemainingBytes) {
+    EVT IntVT = EVT::getIntegerVT(*getContext(), RemainingBytes * 8);
+    SDValue Store = getTruncStore(
+        Chain, dl, Src,
+        getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+        DstPtrInfo.getWithOffset(DstOff), IntVT, Alignment,
+        isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
+        AAInfo);
+    OutChains.push_back(Store);
+  }
+
+  return getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
 SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
                                       SDValue Dst, SDValue Value, SDValue Size,
                                       Type *SizeTy, unsigned ElemSz,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index be5e0f6ef058b..9d2b1fd6b7f9b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6485,6 +6485,24 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     updateDAGForMaybeTailCall(MC);
     return;
   }
+  case Intrinsic::memset_pattern_inline: {
+    const auto &MSPII = cast<MemSetPatternInlineInst>(I);
+    SDValue Dst = getValue(I.getArgOperand(0));
+    SDValue Value = getValue(I.getArgOperand(1));
+    SDValue Size = getValue(I.getArgOperand(2));
+    assert(isa<ConstantSDNode>(Size) &&
+           "memset_pattern_inline needs constant size");
+    // @llvm.memset defines 0 and 1 to both mean no alignment.
+    Align DstAlign = MSPII.getDestAlign().valueOrOne();
+    bool isVol = MSPII.isVolatile();
+    bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+    SDValue MC = DAG.getMemsetPatternInline(
+        Root, sdl, Dst, Value, Size, DstAlign, isVol, isTC,
+        MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+    updateDAGForMaybeTailCall(MC);
+    return;
+  }
   case Intrinsic::memmove: {
     const auto &MMI = cast<MemMoveInst>(I);
     SDValue Op1 = getValue(I.getArgOperand(0));
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index e5927203f33a2..8f783755a1dae 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5434,7 +5434,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   case Intrinsic::memcpy_inline:
   case Intrinsic::memmove:
   case Intrinsic::memset:
-  case Intrinsic::memset_inline: {
+  case Intrinsic::memset_inline:
+  case Intrinsic::memset_pattern_inline: {
     break;
   }
   case Intrinsic::memcpy_element_unordered_atomic:
diff --git a/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll b/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll
new file mode 100644
index 0000000000000..993dc330a36ff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll
@@ -0,0 +1,591 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
+; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
+; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
+; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
+; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
+
+define void @memset_1(ptr %a, i128 %value) nounwind {
+; RV32-BOTH-LABEL: memset_1:
+; RV32-BOTH:       # %bb.0:
+; RV32-BOTH-NEXT:    lw a1, 0(a1)
+; RV32-BOTH-NEXT:    sb a1, 0(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: memset_1:
+; RV64-BOTH:       # %bb.0:
+; RV64-BOTH-NEXT:    sb a1, 0(a0)
+; RV64-BOTH-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 1, i1 0)
+  ret void
+}
+
+define void @memset_2(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_2:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sh a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_2:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sh a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 2, i1 0)
+  ret void
+}
+
+define void @memset_3(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_3:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a2, a1, 8
+; RV32-NEXT:    sb a2, 1(a0)
+; RV32-NEXT:    srli a1, a1, 16
+; RV32-NEXT:    sb a1, 2(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_3:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a2, a1, 8
+; RV64-NEXT:    sb a2, 1(a0)
+; RV64-NEXT:    srli a1, a1, 16
+; RV64-NEXT:    sb a1, 2(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_3:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sh a1, 0(a0)
+; RV32-FAST-NEXT:    srli a1, a1, 16
+; RV32-FAST-NEXT:    sb a1, 2(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_3:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sh a1, 0(a0)
+; RV64-FAST-NEXT:    srli a1, a1, 16
+; RV64-FAST-NEXT:    sb a1, 2(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 3, i1 0)
+  ret void
+}
+
+define void @memset_4(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_4:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a2, a1, 24
+; RV32-NEXT:    sb a2, 3(a0)
+; RV32-NEXT:    srli a2, a1, 16
+; RV32-NEXT:    sb a2, 2(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_4:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_4:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_4:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sw a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 4, i1 0)
+  ret void
+}
+
+define void @memset_5(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_5:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 0(a1)
+; RV32-NEXT:    lw a1, 4(a1)
+; RV32-NEXT:    sb a2, 0(a0)
+; RV32-NEXT:    sb a1, 4(a0)
+; RV32-NEXT:    srli a1, a2, 24
+; RV32-NEXT:    sb a1, 3(a0)
+; RV32-NEXT:    srli a1, a2, 16
+; RV32-NEXT:    sb a1, 2(a0)
+; RV32-NEXT:    srli a2, a2, 8
+; RV32-NEXT:    sb a2, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_5:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a2, a1, 8
+; RV64-NEXT:    sb a2, 1(a0)
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    sb a1, 4(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_5:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sb a2, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_5:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sw a1, 0(a0)
+; RV64-FAST-NEXT:    srli a1, a1, 32
+; RV64-FAST-NEXT:    sb a1, 4(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 5, i1 0)
+  ret void
+}
+
+define void @memset_6(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_6:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a2, 4(a0)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a2, a2, 8
+; RV32-NEXT:    sb a2, 5(a0)
+; RV32-NEXT:    srli a2, a1, 24
+; RV32-NEXT:    sb a2, 3(a0)
+; RV32-NEXT:    srli a2, a1, 16
+; RV32-NEXT:    sb a2, 2(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_6:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a2, a1, 40
+; RV64-NEXT:    sb a2, 5(a0)
+; RV64-NEXT:    srli a2, a1, 32
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_6:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sh a2, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_6:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sw a1, 0(a0)
+; RV64-FAST-NEXT:    srli a1, a1, 32
+; RV64-FAST-NEXT:    sh a1, 4(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 6, i1 0)
+  ret void
+}
+
+define void @memset_7(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_7:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a2, 4(a0)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a3, a2, 8
+; RV32-NEXT:    sb a3, 5(a0)
+; RV32-NEXT:    srli a2, a2, 16
+; RV32-NEXT:    sb a2, 6(a0)
+; RV32-NEXT:    srli a2, a1, 24
+; RV32-NEXT:    sb a2, 3(a0)
+; RV32-NEXT:    srli a2, a1, 16
+; RV32-NEXT:    sb a2, 2(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_7:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a2, a1, 40
+; RV64-NEXT:    sb a2, 5(a0)
+; RV64-NEXT:    srli a2, a1, 32
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    srli a2, a1, 48
+; RV64-NEXT:    sb a2, 6(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_7:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sh a2, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    srli a2, a2, 16
+; RV32-FAST-NEXT:    sb a2, 6(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_7:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sw a1, 0(a0)
+; RV64-FAST-NEXT:    srli a2, a1, 48
+; RV64-FAST-NEXT:    sb a2, 6(a0)
+; RV64-FAST-NEXT:    srli a1, a1, 32
+; RV64-FAST-NEXT:    sh a1, 4(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 7, i1 0)
+  ret void
+}
+
+define void @memset_8(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a2, 4(a0)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a3, a2, 24
+; RV32-NEXT:    sb a3, 7(a0)
+; RV32-NEXT:    srli a3, a2, 16
+; RV32-NEXT:    sb a3, 6(a0)
+; RV32-NEXT:    srli a2, a2, 8
+; RV32-NEXT:    sb a2, 5(a0)
+; RV32-NEXT:    srli a2, a1, 24
+; RV32-NEXT:    sb a2, 3(a0)
+; RV32-NEXT:    srli a2, a1, 16
+; RV32-NEXT:    sb a2, 2(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a2, a1, 56
+; RV64-NEXT:    sb a2, 7(a0)
+; RV64-NEXT:    srli a2, a1, 48
+; RV64-NEXT:    sb a2, 6(a0)
+; RV64-NEXT:    srli a2, a1, 40
+; RV64-NEXT:    sb a2, 5(a0)
+; RV64-NEXT:    srli a2, a1, 32
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_8:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sw a2, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_8:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 8, i1 0)
+  ret void
+}
+
+define void @memset_9(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_9:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 4(a1)
+; RV32-NEXT:    lw a3, 0(a1)
+; RV32-NEXT:    lw a1, 8(a1)
+; RV32-NEXT:    sb a2, 4(a0)
+; RV32-NEXT:    sb a3, 0(a0)
+; RV32-NEXT:    sb a1, 8(a0)
+; RV32-NEXT:    srli a1, a2, 24
+; RV32-NEXT:    sb a1, 7(a0)
+; RV32-NEXT:    srli a1, a2, 16
+; RV32-NEXT:    sb a1, 6(a0)
+; RV32-NEXT:    srli a2, a2, 8
+; RV32-NEXT:    sb a2, 5(a0)
+; RV32-NEXT:    srli a1, a3, 24
+; RV32-NEXT:    sb a1, 3(a0)
+; RV32-NEXT:    srli a1, a3, 16
+; RV32-NEXT:    sb a1, 2(a0)
+; RV32-NEXT:    srli a3, a3, 8
+; RV32-NEXT:    sb a3, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_9:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    sb a2, 8(a0)
+; RV64-NEXT:    srli a2, a1, 56
+; RV64-NEXT:    sb a2, 7(a0)
+; RV64-NEXT:    srli a2, a1, 48
+; RV64-NEXT:    sb a2, 6(a0)
+; RV64-NEXT:    srli a2, a1, 40
+; RV64-NEXT:    sb a2, 5(a0)
+; RV64-NEXT:    srli a2, a1, 32
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_9:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 4(a1)
+; RV32-FAST-NEXT:    lw a3, 0(a1)
+; RV32-FAST-NEXT:    lw a1, 8(a1)
+; RV32-FAST-NEXT:    sw a2, 4(a0)
+; RV32-FAST-NEXT:    sw a3, 0(a0)
+; RV32-FAST-NEXT:    sb a1, 8(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_9:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sb a2, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 9, i1 0)
+  ret void
+}
+
+define void @memset_16(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 12(a1)
+; RV32-NEXT:    lw a3, 8(a1)
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a2, 12(a0)
+; RV32-NEXT:    sb a3, 8(a0)
+; RV32-NEXT:    sb a4, 4(a0)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    srli a5, a2, 24
+; RV32-NEXT:    sb a5, 15(a0)
+; RV32-NEXT:    srli a5, a2, 16
+; RV32-NEXT:    sb a5, 14(a0)
+; RV32-NEXT:    srli a2, a2, 8
+; RV32-NEXT:    sb a2, 13(a0)
+; RV32-NEXT:    srli a2, a3, 24
+; RV32-NEXT:    sb a2, 11(a0)
+; RV32-NEXT:    srli a2, a3, 16
+; RV32-NEXT:    sb a2, 10(a0)
+; RV32-NEXT:    srli a3, a3, 8
+; RV32-NEXT:    sb a3, 9(a0)
+; RV32-NEXT:    srli a2, a4, 24
+; RV32-NEXT:    sb a2, 7(a0)
+; RV32-NEXT:    srli a2, a4, 16
+; RV32-NEXT:    sb a2, 6(a0)
+; RV32-NEXT:    srli a4, a4, 8
+; RV32-NEXT:    sb a4, 5(a0)
+; RV32-NEXT:    srli a2, a1, 24
+; RV32-NEXT:    sb a2, 3(a0)
+; RV32-NEXT:    srli a2, a1, 16
+; RV32-NEXT:    sb a2, 2(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a2, 8(a0)
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    srli a3, a2, 56
+; RV64-NEXT:    sb a3, 15(a0)
+; RV64-NEXT:    srli a3, a2, 48
+; RV64-NEXT:    sb a3, 14(a0)
+; RV64-NEXT:    srli a3, a2, 40
+; RV64-NEXT:    sb a3, 13(a0)
+; RV64-NEXT:    srli a3, a2, 32
+; RV64-NEXT:    sb a3, 12(a0)
+; RV64-NEXT:    srli a3, a2, 24
+; RV64-NEXT:    sb a3, 11(a0)
+; RV64-NEXT:    srli a3, a2, 16
+; RV64-NEXT:    sb a3, 10(a0)
+; RV64-NEXT:    srli a2, a2, 8
+; RV64-NEXT:    sb a2, 9(a0)
+; RV64-NEXT:    srli a2, a1, 56
+; RV64-NEXT:    sb a2, 7(a0)
+; RV64-NEXT:    srli a2, a1, 48
+; RV64-NEXT:    sb a2, 6(a0)
+; RV64-NEXT:    srli a2, a1, 40
+; RV64-NEXT:    sb a2, 5(a0)
+; RV64-NEXT:    srli a2, a1, 32
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_16:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 12(a1)
+; RV32-FAST-NEXT:    lw a3, 8(a1)
+; RV32-FAST-NEXT:    lw a4, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sw a2, 12(a0)
+; RV32-FAST-NEXT:    sw a3, 8(a0)
+; RV32-FAST-NEXT:    sw a4, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_16:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sd a2, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 16, i1 0)
+  ret void
+}
+
+define void @memset_17(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_17:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a2, 12(a1)
+; RV32-NEXT:    lw a3, 8(a1)
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a2, 12(a0)
+; RV32-NEXT:    sb a3, 8(a0)
+; RV32-NEXT:    sb a4, 4(a0)
+; RV32-NEXT:    sb a1, 0(a0)
+; RV32-NEXT:    sb a1, 16(a0)
+; RV32-NEXT:    srli a5, a2, 24
+; RV32-NEXT:    sb a5, 15(a0)
+; RV32-NEXT:    srli a5, a2, 16
+; RV32-NEXT:    sb a5, 14(a0)
+; RV32-NEXT:    srli a2, a2, 8
+; RV32-NEXT:    sb a2, 13(a0)
+; RV32-NEXT:    srli a2, a3, 24
+; RV32-NEXT:    sb a2, 11(a0)
+; RV32-NEXT:    srli a2, a3, 16
+; RV32-NEXT:    sb a2, 10(a0)
+; RV32-NEXT:    srli a3, a3, 8
+; RV32-NEXT:    sb a3, 9(a0)
+; RV32-NEXT:    srli a2, a4, 24
+; RV32-NEXT:    sb a2, 7(a0)
+; RV32-NEXT:    srli a2, a4, 16
+; RV32-NEXT:    sb a2, 6(a0)
+; RV32-NEXT:    srli a4, a4, 8
+; RV32-NEXT:    sb a4, 5(a0)
+; RV32-NEXT:    srli a2, a1, 24
+; RV32-NEXT:    sb a2, 3(a0)
+; RV32-NEXT:    srli a2, a1, 16
+; RV32-NEXT:    sb a2, 2(a0)
+; RV32-NEXT:    srli a1, a1, 8
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: memset_17:
+; RV64:       # %bb.0:
+; RV64-NEXT:    sb a2, 8(a0)
+; RV64-NEXT:    sb a1, 0(a0)
+; RV64-NEXT:    sb a1, 16(a0)
+; RV64-NEXT:    srli a3, a2, 56
+; RV64-NEXT:    sb a3, 15(a0)
+; RV64-NEXT:    srli a3, a2, 48
+; RV64-NEXT:    sb a3, 14(a0)
+; RV64-NEXT:    srli a3, a2, 40
+; RV64-NEXT:    sb a3, 13(a0)
+; RV64-NEXT:    srli a3, a2, 32
+; RV64-NEXT:    sb a3, 12(a0)
+; RV64-NEXT:    srli a3, a2, 24
+; RV64-NEXT:    sb a3, 11(a0)
+; RV64-NEXT:    srli a3, a2, 16
+; RV64-NEXT:    sb a3, 10(a0)
+; RV64-NEXT:    srli a2, a2, 8
+; RV64-NEXT:    sb a2, 9(a0)
+; RV64-NEXT:    srli a2, a1, 56
+; RV64-NEXT:    sb a2, 7(a0)
+; RV64-NEXT:    srli a2, a1, 48
+; RV64-NEXT:    sb a2, 6(a0)
+; RV64-NEXT:    srli a2, a1, 40
+; RV64-NEXT:    sb a2, 5(a0)
+; RV64-NEXT:    srli a2, a1, 32
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    srli a2, a1, 24
+; RV64-NEXT:    sb a2, 3(a0)
+; RV64-NEXT:    srli a2, a1, 16
+; RV64-NEXT:    sb a2, 2(a0)
+; RV64-NEXT:    srli a1, a1, 8
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: memset_17:
+; RV32-FAST:       # %bb.0:
+; RV32-FAST-NEXT:    lw a2, 12(a1)
+; RV32-FAST-NEXT:    lw a3, 8(a1)
+; RV32-FAST-NEXT:    lw a4, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sw a2, 12(a0)
+; RV32-FAST-NEXT:    sw a3, 8(a0)
+; RV32-FAST-NEXT:    sw a4, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    sb a1, 16(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: memset_17:
+; RV64-FAST:       # %bb.0:
+; RV64-FAST-NEXT:    sd a2, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    sb a1, 16(a0)
+; RV64-FAST-NEXT:    ret
+  tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 17, i1 0)
+  ret void
+}
+
diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll
index 47189c0b7d052..29921633ce964 100644
--- a/llvm/test/Verifier/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/intrinsic-immarg.ll
@@ -79,6 +79,23 @@ define void @memset_inline_variable_size(ptr %dest, i8 %value, i32 %size) {
   ret void
 }
 
+declare void @llvm.memset_pattern.inline.p0.i32.i32(ptr nocapture, i32, i32, i1)
+define void @memset_pattern_inline_is_volatile(ptr %dest, i32 %value, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
+  call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
+  ret void
+}
+
+define void @memset_pattern_inline_variable_size(ptr %dest, i32 %value, i32 %size) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %size
+  ; CHECK-NEXT: call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 %size, i1 true)
+  call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 %size, i1 true)
+  ret void
+}
+
 
 declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
 define void @objectsize(ptr %ptr, i1 %a, i1 %b, i1 %c) {
diff --git a/llvm/test/Verifier/memset-pattern-inline.ll b/llvm/test/Verifier/memset-pattern-inline.ll
new file mode 100644
index 0000000000000..6876bc3ceedba
--- /dev/null
+++ b/llvm/test/Verifier/memset-pattern-inline.ll
@@ -0,0 +1,9 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+; CHECK: alignment is not a power of two
+
+define void @foo(ptr %P, i32 %value) {
+  call void @llvm.memset_pattern.inline.p0.i32.i32(ptr align 3 %P, i32 %value, i32 4, i1 false)
+  ret void
+}
+declare void @llvm.memset_pattern.inline.p0.i32.i32(ptr nocapture, i32, i32, i1) nounwind



More information about the llvm-commits mailing list