[llvm] 879c825 - [instrinsics] Add @llvm.memcpy.inline instrinsics
Guillaume Chatelet via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 00:42:31 PST 2020
Author: Guillaume Chatelet
Date: 2020-01-28T09:42:01+01:00
New Revision: 879c825cb808ed144f7506182b9d6596043bcc68
URL: https://github.com/llvm/llvm-project/commit/879c825cb808ed144f7506182b9d6596043bcc68
DIFF: https://github.com/llvm/llvm-project/commit/879c825cb808ed144f7506182b9d6596043bcc68.diff
LOG: [instrinsics] Add @llvm.memcpy.inline instrinsics
Summary:
This is a follow up on D61634. It adds an LLVM IR intrinsic to allow better implementation of memcpy from C++.
A follow up CL will add the intrinsics in Clang.
Reviewers: courbet, theraven, t.p.northover, jdoerfert, tejohnson
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71710
Added:
llvm/test/CodeGen/X86/memcpy-inline.ll
llvm/test/Verifier/memcpy-inline.ll
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/IR/IntrinsicInst.h
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/Analysis/Lint.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/Verifier.cpp
llvm/test/Other/lint.ll
llvm/test/Verifier/intrinsic-immarg.ll
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 89a9ca0805da..36e6f90461f5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11719,6 +11719,65 @@ the argument.
If "len" is 0, the pointers may be NULL or dangling. However, they must still
be appropriately aligned.
+.. _int_memcpy_inline:
+
+'``llvm.memcpy.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memcpy.inline`` on any
+integer bit width and for
diff erent address spaces. Not all targets
+support all bit widths however.
+
+::
+
+ declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+ i32 <len>, i1 <isvolatile>)
+ declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+ i64 <len>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
+source location to the destination location and guarantees that no external
+functions are called.
+
+Note that, unlike the standard libc function, the ``llvm.memcpy.inline.*``
+intrinsics do not return a value, takes extra isvolatile
+arguments and the pointers can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination, the second is a
+pointer to the source. The third argument is a constant integer argument
+specifying the number of bytes to copy, and the fourth is a
+boolean indicating a volatile access.
+
+The :ref:`align <attr_align>` parameter attribute can be provided
+for the first and second arguments.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy.inline`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
+source location to the destination location, which are not allowed to
+overlap. It copies "len" bytes of memory over. If the argument is known
+to be aligned to some boundary, this can be specified as an attribute on
+the argument.
+
+If "len" is 0, the pointers may be NULL or dangling. However, they must still
+be appropriately aligned.
+
+The generated code is guaranteed not to call any external functions.
+
.. _int_memmove:
'``llvm.memmove``' Intrinsic
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 42a5564a4488..cebe07e42afc 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -582,6 +582,7 @@ namespace llvm {
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
+ case Intrinsic::memcpy_inline:
return true;
default: return false;
}
@@ -608,8 +609,14 @@ namespace llvm {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
- return I->getIntrinsicID() == Intrinsic::memcpy ||
- I->getIntrinsicID() == Intrinsic::memmove;
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy_inline:
+ return true;
+ default:
+ return false;
+ }
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -640,6 +647,21 @@ namespace llvm {
}
};
+ /// This class wraps the llvm.memcpy.inline intrinsic.
+ class MemCpyInlineInst : public MemTransferInst {
+ public:
+ ConstantInt *getLength() const {
+ return cast<ConstantInt>(MemTransferInst::getLength());
+ }
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::memcpy_inline;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
+
// The common base class for any memset/memmove/memcpy intrinsics;
// whether they be atomic or non-atomic.
// i.e. llvm.element.unordered.atomic.memset/memcpy/memmove
@@ -656,6 +678,7 @@ namespace llvm {
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memcpy_element_unordered_atomic:
@@ -698,6 +721,7 @@ namespace llvm {
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
@@ -719,6 +743,7 @@ namespace llvm {
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memcpy_element_unordered_atomic:
return true;
default:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d549eb27b4ba..22ce1f117793 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -511,6 +511,20 @@ def int_memcpy : Intrinsic<[],
llvm_i1_ty],
[IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>,
NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
+
+// Memcpy semantic that is guaranteed to be inlined.
+// In particular this means that the generated code is not allowed to call any
+// external function.
+// The third argument (specifying the size) must be a constant.
+def int_memcpy_inline
+ : Intrinsic<[],
+ [ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ],
+ [ IntrArgMemOnly, IntrWillReturn,
+ NoCapture<0>, NoCapture<1>,
+ NoAlias<0>, NoAlias<1>,
+ WriteOnly<0>, ReadOnly<1>,
+ ImmArg<2>, ImmArg<3> ]>;
+
def int_memmove : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index ba945eb4318f..735be452ba04 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -345,6 +345,22 @@ void Lint::visitCallSite(CallSite CS) {
"Undefined behavior: memcpy source and destination overlap", &I);
break;
}
+ case Intrinsic::memcpy_inline: {
+ MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
+ const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
+ visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlignment(),
+ nullptr, MemRef::Write);
+ visitMemoryReference(I, MCII->getSource(), Size,
+ MCII->getSourceAlignment(), nullptr, MemRef::Read);
+
+ // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+ // isn't expressive enough for what we really want to do. Known partial
+ // overlap is not distinguished from the case where nothing is known.
+ const LocationSize LS = LocationSize::precise(Size);
+ Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
+ break;
+ }
case Intrinsic::memmove: {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
// TODO: If the size is known, use it.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index df6875a92e58..4625d0023a75 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5840,12 +5840,33 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
- false, isTC,
+ /* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
return;
}
+ case Intrinsic::memcpy_inline: {
+ const auto &MCI = cast<MemCpyInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
+ // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing
diff erent dest/src alignments to the memcpy DAG
+ // node.
+ SDValue MC = DAG.getMemcpy(
+ getRoot(), sdl, Dst, Src, Size, Alignment.value(), isVol,
+ /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
case Intrinsic::memset: {
const auto &MSI = cast<MemSetInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 86648d9d7a1d..4eef66ffe367 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4347,6 +4347,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(Call));
break;
case Intrinsic::memcpy:
+ case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset: {
const auto *MI = cast<MemIntrinsic>(&Call);
diff --git a/llvm/test/CodeGen/X86/memcpy-inline.ll b/llvm/test/CodeGen/X86/memcpy-inline.ll
new file mode 100644
index 000000000000..8e44db8fa60b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/memcpy-inline.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck -check-prefix=X64 %s
+
+; NOTE: This is expected to fail on target that do not support memcpy.
+; RUN: llc < %s -mtriple=r600-unknown-linux-gnu 2> %t.err || true
+; RUN: FileCheck --input-file %t.err -check-prefix=R600 %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @test1(i8* %a, i8* %b) nounwind {
+; X64-LABEL: test1:
+; X64: # %bb.0:
+; X64-NEXT: movq (%rsi), %rax
+; X64-NEXT: movq %rax, (%rdi)
+; X64-NEXT: retq
+; R600: LLVM ERROR
+ tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
+ ret void
+}
+
+define void @regular_memcpy_calls_external_function(i8* %a, i8* %b) nounwind {
+; X64-LABEL: regular_memcpy_calls_external_function:
+; X64: # %bb.0:
+; X64-NEXT: movl $128, %edx
+; X64-NEXT: jmp memcpy # TAILCALL
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
+ ret void
+}
+
+define void @inlined_copy_doesnt_call_external_function(i8* %a, i8* %b) nounwind {
+; X64-LABEL: inlined_copy_doesnt_call_external_function:
+; X64: # %bb.0:
+; X64-NEXT: movl $128, %ecx
+; X64-NEXT: rep;movsb (%rsi), %es:(%rdi)
+; X64-NEXT: retq
+ tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
+ ret void
+}
diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll
index 415fc29d8f05..db8d53a14f5a 100644
--- a/llvm/test/Other/lint.ll
+++ b/llvm/test/Other/lint.ll
@@ -4,6 +4,7 @@ target datalayout = "e-p:64:64:64"
declare fastcc void @bar()
declare void @llvm.stackrestore(i8*)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
declare void @has_sret(i8* sret %p)
declare void @has_noaliases(i32* noalias %p, i32* %q)
declare void @one_arg(i32)
@@ -80,6 +81,8 @@ define i32 @foo() noreturn {
; CHECK: Write to read-only memory
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
+; CHECK: Write to read-only memory
+call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
; CHECK: Unusual: noalias argument aliases another argument
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0)
@@ -189,3 +192,11 @@ entry:
; CHECK: Undefined behavior: indirectbr with no destinations
indirectbr i8* null, []
}
+
+define i32 @memcpy_inline_same_address() noreturn {
+ %buf = alloca i64, align 1
+ %ptr = bitcast i64* %buf to i8*
+ ; CHECK: Unusual: noalias argument aliases another argument
+ call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %ptr, i8* %ptr, i64 1, i1 false)
+ unreachable
+}
diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll
index 915432a93bb8..68297678c0b6 100644
--- a/llvm/test/Verifier/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/intrinsic-immarg.ll
@@ -27,6 +27,23 @@ define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) {
ret void
}
+declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
+define void @memcpy_inline_is_volatile(i8* %dest, i8* %src, i1 %is.volatile) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i1 %is.volatile
+ ; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+ call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
+ ret void
+}
+
+define void @memcpy_inline_variable_size(i8* %dest, i8* %src, i32 %size) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i32 %size
+ ; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
+ call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
+ ret void
+}
+
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) {
; CHECK: immarg operand has non-immediate parameter
diff --git a/llvm/test/Verifier/memcpy-inline.ll b/llvm/test/Verifier/memcpy-inline.ll
new file mode 100644
index 000000000000..6aa52674b634
--- /dev/null
+++ b/llvm/test/Verifier/memcpy-inline.ll
@@ -0,0 +1,9 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; CHECK: alignment is not a power of two
+
+define void @foo(i8* %P, i8* %Q) {
+ call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* align 3 %P, i8* %Q, i32 4, i1 false)
+ ret void
+}
+declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
More information about the llvm-commits
mailing list