[llvm] [IR] Initial introduction of memset_pattern (PR #94992)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 10 07:53:32 PDT 2024
https://github.com/asb created https://github.com/llvm/llvm-project/pull/94992
None
>From 920fc95f1735d9fce0524a629a20c77978e4135e Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 May 2024 12:47:05 +0100
Subject: [PATCH] [IR] Initial introduction of memset_pattern
---
llvm/docs/LangRef.rst | 53 ++
llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +
llvm/include/llvm/IR/InstVisitor.h | 3 +
llvm/include/llvm/IR/IntrinsicInst.h | 22 +-
llvm/include/llvm/IR/Intrinsics.td | 9 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 41 ++
.../SelectionDAG/SelectionDAGBuilder.cpp | 18 +
llvm/lib/IR/Verifier.cpp | 3 +-
.../CodeGen/RISCV/memset-pattern-inline.ll | 591 ++++++++++++++++++
llvm/test/Verifier/intrinsic-immarg.ll | 17 +
llvm/test/Verifier/memset-pattern-inline.ll | 9 +
11 files changed, 770 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/memset-pattern-inline.ll
create mode 100644 llvm/test/Verifier/memset-pattern-inline.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9fb2c048a5c86..6a6d22eaac2d1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15234,6 +15234,59 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
'``llvm.memset.*``', but the generated code is guaranteed not to call any
external functions.
+.. _int_memset_pattern_inline:
+
+'``llvm.memset_pattern.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memset_pattern.inline`` on
+any integer bit width and for different address spaces. Not all targets
+support all bit widths however.
+
+::
+
+ declare void @llvm.memset_pattern.inline.p0.i64.i128(ptr <dest>, i128 <val>,
+ i64 <len>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memset_pattern.inline.*``' intrinsics fill a block of memory with
+a particular value and guarantees that no external functions are called.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination to fill, the second
+is the value with which to fill it, the third argument is a constant integer
+argument specifying the number of bytes to fill, and the fourth is a boolean
+indicating a volatile access.
+
+The :ref:`align <attr_align>` parameter attribute can be provided
+for the first argument.
+
+If the ``isvolatile`` parameter is ``true``, the
+``llvm.memset_pattern.inline`` call is a :ref:`volatile operation <volatile>`.
+The detailed access behavior is not very cleanly specified and it is unwise to
+depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memset_pattern.inline.*``' intrinsics fill "len" bytes of memory
+starting at the destination location. If the argument is known to be aligned
+to some boundary, this can be specified as an attribute on the argument.
+
+``len`` must be a constant expression.
+If ``<len>`` is 0, it is no-op modulo the behavior of attributes attached to
+the arguments.
+If ``<len>`` is not a well-defined value, the behavior is undefined.
+If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
+behavior is undefined.
+
.. _int_sqrt:
'``llvm.sqrt.*``' Intrinsic
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 7b0e5e7d9504b..f5d5169500432 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1205,6 +1205,12 @@ class SelectionDAG {
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo = AAMDNodes());
+ SDValue getMemsetPatternInline(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, Align Alignment,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ const AAMDNodes &AAInfo = AAMDNodes());
+
SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Type *SizeTy,
unsigned ElemSz, bool isTailCall,
diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h
index 311e0ac47ddfa..30a9df8a32184 100644
--- a/llvm/include/llvm/IR/InstVisitor.h
+++ b/llvm/include/llvm/IR/InstVisitor.h
@@ -208,6 +208,7 @@ class InstVisitor {
RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); }
RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); }
+ RetTy visitMemSetPatternInlineInst(MemSetPatternInlineInst &I){ DELEGATE(MemSetInst); }
RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); }
RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); }
RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); }
@@ -295,6 +296,8 @@ class InstVisitor {
case Intrinsic::memset: DELEGATE(MemSetInst);
case Intrinsic::memset_inline:
DELEGATE(MemSetInlineInst);
+ case Intrinsic::memset_pattern_inline:
+ DELEGATE(MemSetPatternInlineInst);
case Intrinsic::vastart: DELEGATE(VAStartInst);
case Intrinsic::vaend: DELEGATE(VAEndInst);
case Intrinsic::vacopy: DELEGATE(VACopyInst);
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 9010e1a1c896b..184cc47e48c1e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1170,6 +1170,7 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memset_inline:
+ case Intrinsic::memset_pattern_inline:
case Intrinsic::memcpy_inline:
return true;
default:
@@ -1181,7 +1182,8 @@ class MemIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
}
};
-/// This class wraps the llvm.memset and llvm.memset.inline intrinsics.
+/// This class wraps the llvm.memset, llvm.memset.inline, and
+/// llvm.memset_pattern.inline intrinsics.
class MemSetInst : public MemSetBase<MemIntrinsic> {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -1189,6 +1191,7 @@ class MemSetInst : public MemSetBase<MemIntrinsic> {
switch (I->getIntrinsicID()) {
case Intrinsic::memset:
case Intrinsic::memset_inline:
+ case Intrinsic::memset_pattern_inline:
return true;
default:
return false;
@@ -1214,6 +1217,21 @@ class MemSetInlineInst : public MemSetInst {
}
};
+/// This class wraps the llvm.memset.inline intrinsic.
+class MemSetPatternInlineInst : public MemSetInst {
+public:
+ ConstantInt *getLength() const {
+ return cast<ConstantInt>(MemSetInst::getLength());
+ }
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::memset_pattern_inline;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This class wraps the llvm.memcpy/memmove intrinsics.
class MemTransferInst : public MemTransferBase<MemIntrinsic> {
public:
@@ -1293,6 +1311,7 @@ class AnyMemIntrinsic : public MemIntrinsicBase<AnyMemIntrinsic> {
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memset_inline:
+ case Intrinsic::memset_pattern_inline:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
@@ -1315,6 +1334,7 @@ class AnyMemSetInst : public MemSetBase<AnyMemIntrinsic> {
switch (I->getIntrinsicID()) {
case Intrinsic::memset:
case Intrinsic::memset_inline:
+ case Intrinsic::memset_pattern_inline:
case Intrinsic::memset_element_unordered_atomic:
return true;
default:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 107442623ab7b..a7f2818506886 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1008,6 +1008,15 @@ def int_memset_inline
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+// Memset variant that writes a given pattern. Like memset.inline, this is
+// guaranteed not to call any external function.
+def int_memset_pattern_inline
+ : Intrinsic<[],
+ [llvm_anyptr_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_i1_ty],
+ [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
+ NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>], "llvm.memset_pattern.inline">;
+
// FIXME: Add version of these floating point intrinsics which allow non-default
// rounding modes and FP exception handling.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 523d3aea66225..8a9faefbf2a8c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8484,6 +8484,47 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getMemsetPatternInline(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src,
+ SDValue Size, Align Alignment,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ const AAMDNodes &AAInfo) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize->isZero())
+ return Chain;
+
+ uint64_t SrcWidth = Src.getScalarValueSizeInBits() / 8;
+ unsigned NumFullWidthStores = ConstantSize->getZExtValue() / SrcWidth;
+ unsigned RemainingBytes = ConstantSize->getZExtValue() % SrcWidth;
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+
+ for (unsigned i = 0; i < ConstantSize->getZExtValue() / SrcWidth; i++) {
+ SDValue Store = getStore(
+ Chain, dl, Src,
+ getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment,
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
+ AAInfo);
+ OutChains.push_back(Store);
+ DstOff += Src.getValueType().getSizeInBits() / 8;
+ }
+
+ if (RemainingBytes) {
+ EVT IntVT = EVT::getIntegerVT(*getContext(), RemainingBytes * 8);
+ SDValue Store = getTruncStore(
+ Chain, dl, Src,
+ getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), IntVT, Alignment,
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
+ AAInfo);
+ OutChains.push_back(Store);
+ }
+
+ return getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
SDValue Dst, SDValue Value, SDValue Size,
Type *SizeTy, unsigned ElemSz,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index be5e0f6ef058b..9d2b1fd6b7f9b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6485,6 +6485,24 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
updateDAGForMaybeTailCall(MC);
return;
}
+ case Intrinsic::memset_pattern_inline: {
+ const auto &MSPII = cast<MemSetPatternInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Value = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) &&
+ "memset_pattern_inline needs constant size");
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MSPII.getDestAlign().valueOrOne();
+ bool isVol = MSPII.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemsetPatternInline(
+ Root, sdl, Dst, Value, Size, DstAlign, isVol, isTC,
+ MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
case Intrinsic::memmove: {
const auto &MMI = cast<MemMoveInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index e5927203f33a2..8f783755a1dae 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5434,7 +5434,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
- case Intrinsic::memset_inline: {
+ case Intrinsic::memset_inline:
+ case Intrinsic::memset_pattern_inline: {
break;
}
case Intrinsic::memcpy_element_unordered_atomic:
diff --git a/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll b/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll
new file mode 100644
index 0000000000000..993dc330a36ff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/memset-pattern-inline.ll
@@ -0,0 +1,591 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
+; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
+; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
+; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
+; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
+
+define void @memset_1(ptr %a, i128 %value) nounwind {
+; RV32-BOTH-LABEL: memset_1:
+; RV32-BOTH: # %bb.0:
+; RV32-BOTH-NEXT: lw a1, 0(a1)
+; RV32-BOTH-NEXT: sb a1, 0(a0)
+; RV32-BOTH-NEXT: ret
+;
+; RV64-BOTH-LABEL: memset_1:
+; RV64-BOTH: # %bb.0:
+; RV64-BOTH-NEXT: sb a1, 0(a0)
+; RV64-BOTH-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_2(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_2:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_2:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_2:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sh a1, 0(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_2:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sh a1, 0(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 2, i1 0)
+ ret void
+}
+
+define void @memset_3(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_3:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a2, a1, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: srli a1, a1, 16
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_3:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: srli a1, a1, 16
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_3:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sh a1, 0(a0)
+; RV32-FAST-NEXT: srli a1, a1, 16
+; RV32-FAST-NEXT: sb a1, 2(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_3:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sh a1, 0(a0)
+; RV64-FAST-NEXT: srli a1, a1, 16
+; RV64-FAST-NEXT: sb a1, 2(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 3, i1 0)
+ ret void
+}
+
+define void @memset_4(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_4:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_4:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_4:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_4:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sw a1, 0(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 4, i1 0)
+ ret void
+}
+
+define void @memset_5(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_5:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
+; RV32-NEXT: sb a2, 0(a0)
+; RV32-NEXT: sb a1, 4(a0)
+; RV32-NEXT: srli a1, a2, 24
+; RV32-NEXT: sb a1, 3(a0)
+; RV32-NEXT: srli a1, a2, 16
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_5:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a2, a1, 8
+; RV64-NEXT: sb a2, 1(a0)
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: sb a1, 4(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_5:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 4(a1)
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sb a2, 4(a0)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_5:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sw a1, 0(a0)
+; RV64-FAST-NEXT: srli a1, a1, 32
+; RV64-FAST-NEXT: sb a1, 4(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 5, i1 0)
+ ret void
+}
+
+define void @memset_6(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_6:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_6:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_6:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 4(a1)
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sh a2, 4(a0)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_6:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sw a1, 0(a0)
+; RV64-FAST-NEXT: srli a1, a1, 32
+; RV64-FAST-NEXT: sh a1, 4(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 6, i1 0)
+ ret void
+}
+
+define void @memset_7(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_7:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a2, 8
+; RV32-NEXT: sb a3, 5(a0)
+; RV32-NEXT: srli a2, a2, 16
+; RV32-NEXT: sb a2, 6(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_7:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 48
+; RV64-NEXT: sb a2, 6(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_7:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 4(a1)
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sh a2, 4(a0)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: srli a2, a2, 16
+; RV32-FAST-NEXT: sb a2, 6(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_7:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sw a1, 0(a0)
+; RV64-FAST-NEXT: srli a2, a1, 48
+; RV64-FAST-NEXT: sb a2, 6(a0)
+; RV64-FAST-NEXT: srli a1, a1, 32
+; RV64-FAST-NEXT: sh a1, 4(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 7, i1 0)
+ ret void
+}
+
+define void @memset_8(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_8:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a3, a2, 24
+; RV32-NEXT: sb a3, 7(a0)
+; RV32-NEXT: srli a3, a2, 16
+; RV32-NEXT: sb a3, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_8:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a2, a1, 56
+; RV64-NEXT: sb a2, 7(a0)
+; RV64-NEXT: srli a2, a1, 48
+; RV64-NEXT: sb a2, 6(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_8:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 4(a1)
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sw a2, 4(a0)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_8:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 8, i1 0)
+ ret void
+}
+
+define void @memset_9(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_9:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a3, 0(a1)
+; RV32-NEXT: lw a1, 8(a1)
+; RV32-NEXT: sb a2, 4(a0)
+; RV32-NEXT: sb a3, 0(a0)
+; RV32-NEXT: sb a1, 8(a0)
+; RV32-NEXT: srli a1, a2, 24
+; RV32-NEXT: sb a1, 7(a0)
+; RV32-NEXT: srli a1, a2, 16
+; RV32-NEXT: sb a1, 6(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 5(a0)
+; RV32-NEXT: srli a1, a3, 24
+; RV32-NEXT: sb a1, 3(a0)
+; RV32-NEXT: srli a1, a3, 16
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_9:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a2, 8(a0)
+; RV64-NEXT: srli a2, a1, 56
+; RV64-NEXT: sb a2, 7(a0)
+; RV64-NEXT: srli a2, a1, 48
+; RV64-NEXT: sb a2, 6(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_9:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 4(a1)
+; RV32-FAST-NEXT: lw a3, 0(a1)
+; RV32-FAST-NEXT: lw a1, 8(a1)
+; RV32-FAST-NEXT: sw a2, 4(a0)
+; RV32-FAST-NEXT: sw a3, 0(a0)
+; RV32-FAST-NEXT: sb a1, 8(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_9:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sb a2, 8(a0)
+; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 9, i1 0)
+ ret void
+}
+
+define void @memset_16(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_16:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 12(a1)
+; RV32-NEXT: lw a3, 8(a1)
+; RV32-NEXT: lw a4, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a2, 12(a0)
+; RV32-NEXT: sb a3, 8(a0)
+; RV32-NEXT: sb a4, 4(a0)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: srli a5, a2, 24
+; RV32-NEXT: sb a5, 15(a0)
+; RV32-NEXT: srli a5, a2, 16
+; RV32-NEXT: sb a5, 14(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 13(a0)
+; RV32-NEXT: srli a2, a3, 24
+; RV32-NEXT: sb a2, 11(a0)
+; RV32-NEXT: srli a2, a3, 16
+; RV32-NEXT: sb a2, 10(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 9(a0)
+; RV32-NEXT: srli a2, a4, 24
+; RV32-NEXT: sb a2, 7(a0)
+; RV32-NEXT: srli a2, a4, 16
+; RV32-NEXT: sb a2, 6(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 5(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_16:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a2, 8(a0)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 15(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 14(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 13(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 12(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 11(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 10(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 9(a0)
+; RV64-NEXT: srli a2, a1, 56
+; RV64-NEXT: sb a2, 7(a0)
+; RV64-NEXT: srli a2, a1, 48
+; RV64-NEXT: sb a2, 6(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_16:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 12(a1)
+; RV32-FAST-NEXT: lw a3, 8(a1)
+; RV32-FAST-NEXT: lw a4, 4(a1)
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sw a2, 12(a0)
+; RV32-FAST-NEXT: sw a3, 8(a0)
+; RV32-FAST-NEXT: sw a4, 4(a0)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_16:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sd a2, 8(a0)
+; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 16, i1 0)
+ ret void
+}
+
+define void @memset_17(ptr %a, i128 %value) nounwind {
+; RV32-LABEL: memset_17:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a2, 12(a1)
+; RV32-NEXT: lw a3, 8(a1)
+; RV32-NEXT: lw a4, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: sb a2, 12(a0)
+; RV32-NEXT: sb a3, 8(a0)
+; RV32-NEXT: sb a4, 4(a0)
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 16(a0)
+; RV32-NEXT: srli a5, a2, 24
+; RV32-NEXT: sb a5, 15(a0)
+; RV32-NEXT: srli a5, a2, 16
+; RV32-NEXT: sb a5, 14(a0)
+; RV32-NEXT: srli a2, a2, 8
+; RV32-NEXT: sb a2, 13(a0)
+; RV32-NEXT: srli a2, a3, 24
+; RV32-NEXT: sb a2, 11(a0)
+; RV32-NEXT: srli a2, a3, 16
+; RV32-NEXT: sb a2, 10(a0)
+; RV32-NEXT: srli a3, a3, 8
+; RV32-NEXT: sb a3, 9(a0)
+; RV32-NEXT: srli a2, a4, 24
+; RV32-NEXT: sb a2, 7(a0)
+; RV32-NEXT: srli a2, a4, 16
+; RV32-NEXT: sb a2, 6(a0)
+; RV32-NEXT: srli a4, a4, 8
+; RV32-NEXT: sb a4, 5(a0)
+; RV32-NEXT: srli a2, a1, 24
+; RV32-NEXT: sb a2, 3(a0)
+; RV32-NEXT: srli a2, a1, 16
+; RV32-NEXT: sb a2, 2(a0)
+; RV32-NEXT: srli a1, a1, 8
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: memset_17:
+; RV64: # %bb.0:
+; RV64-NEXT: sb a2, 8(a0)
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 16(a0)
+; RV64-NEXT: srli a3, a2, 56
+; RV64-NEXT: sb a3, 15(a0)
+; RV64-NEXT: srli a3, a2, 48
+; RV64-NEXT: sb a3, 14(a0)
+; RV64-NEXT: srli a3, a2, 40
+; RV64-NEXT: sb a3, 13(a0)
+; RV64-NEXT: srli a3, a2, 32
+; RV64-NEXT: sb a3, 12(a0)
+; RV64-NEXT: srli a3, a2, 24
+; RV64-NEXT: sb a3, 11(a0)
+; RV64-NEXT: srli a3, a2, 16
+; RV64-NEXT: sb a3, 10(a0)
+; RV64-NEXT: srli a2, a2, 8
+; RV64-NEXT: sb a2, 9(a0)
+; RV64-NEXT: srli a2, a1, 56
+; RV64-NEXT: sb a2, 7(a0)
+; RV64-NEXT: srli a2, a1, 48
+; RV64-NEXT: sb a2, 6(a0)
+; RV64-NEXT: srli a2, a1, 40
+; RV64-NEXT: sb a2, 5(a0)
+; RV64-NEXT: srli a2, a1, 32
+; RV64-NEXT: sb a2, 4(a0)
+; RV64-NEXT: srli a2, a1, 24
+; RV64-NEXT: sb a2, 3(a0)
+; RV64-NEXT: srli a2, a1, 16
+; RV64-NEXT: sb a2, 2(a0)
+; RV64-NEXT: srli a1, a1, 8
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: ret
+;
+; RV32-FAST-LABEL: memset_17:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: lw a2, 12(a1)
+; RV32-FAST-NEXT: lw a3, 8(a1)
+; RV32-FAST-NEXT: lw a4, 4(a1)
+; RV32-FAST-NEXT: lw a1, 0(a1)
+; RV32-FAST-NEXT: sw a2, 12(a0)
+; RV32-FAST-NEXT: sw a3, 8(a0)
+; RV32-FAST-NEXT: sw a4, 4(a0)
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: sb a1, 16(a0)
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: memset_17:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: sd a2, 8(a0)
+; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: sb a1, 16(a0)
+; RV64-FAST-NEXT: ret
+ tail call void @llvm.memset_pattern.inline.p0.i64.i128(ptr %a, i128 %value, i64 17, i1 0)
+ ret void
+}
+
diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll
index 47189c0b7d052..29921633ce964 100644
--- a/llvm/test/Verifier/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/intrinsic-immarg.ll
@@ -79,6 +79,23 @@ define void @memset_inline_variable_size(ptr %dest, i8 %value, i32 %size) {
ret void
}
+declare void @llvm.memset_pattern.inline.p0.i32.i32(ptr nocapture, i32, i32, i1)
+define void @memset_pattern_inline_is_volatile(ptr %dest, i32 %value, i1 %is.volatile) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i1 %is.volatile
+ ; CHECK-NEXT: call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
+ call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
+ ret void
+}
+
+define void @memset_pattern_inline_variable_size(ptr %dest, i32 %value, i32 %size) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i32 %size
+ ; CHECK-NEXT: call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 %size, i1 true)
+ call void @llvm.memset_pattern.inline.p0.i32.i32(ptr %dest, i32 %value, i32 %size, i1 true)
+ ret void
+}
+
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
define void @objectsize(ptr %ptr, i1 %a, i1 %b, i1 %c) {
diff --git a/llvm/test/Verifier/memset-pattern-inline.ll b/llvm/test/Verifier/memset-pattern-inline.ll
new file mode 100644
index 0000000000000..6876bc3ceedba
--- /dev/null
+++ b/llvm/test/Verifier/memset-pattern-inline.ll
@@ -0,0 +1,9 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+; CHECK: alignment is not a power of two
+
+define void @foo(ptr %P, i32 %value) {
+ call void @llvm.memset_pattern.inline.p0.i32.i32(ptr align 3 %P, i32 %value, i32 4, i1 false)
+ ret void
+}
+declare void @llvm.memset_pattern.inline.p0.i32.i32(ptr nocapture, i32, i32, i1) nounwind
More information about the llvm-commits
mailing list