[llvm] 6ec2c5e - GC-parseable element atomic memcpy/memmove

Artur Pilipenko via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 23 14:06:26 PDT 2020


Author: Artur Pilipenko
Date: 2020-10-23T14:06:09-07:00
New Revision: 6ec2c5e402a724ba99bce82a9cac7a3006d660f4

URL: https://github.com/llvm/llvm-project/commit/6ec2c5e402a724ba99bce82a9cac7a3006d660f4
DIFF: https://github.com/llvm/llvm-project/commit/6ec2c5e402a724ba99bce82a9cac7a3006d660f4.diff

LOG: GC-parseable element atomic memcpy/memmove

This change introduces a GC parseable lowering for element atomic
memcpy/memmove intrinsics. This way runtime can provide an
implementation which can take a safepoint during copy operation.

See "GC-parseable element atomic memcpy/memmove" thread on llvm-dev
for the background and details:
https://groups.google.com/g/llvm-dev/c/NnENHzmX-b8/m/3PyN8Y2pCAAJ

Differential Revision: https://reviews.llvm.org/D88861

Added: 
    llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll
    llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/docs/Statepoints.rst
    llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
    llvm/lib/Transforms/Utils/Local.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 74492c4f27e4..a70952d7b8c9 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -20355,7 +20355,9 @@ Lowering:
 
 In the most general case call to the '``llvm.memcpy.element.unordered.atomic.*``' is
 lowered to a call to the symbol ``__llvm_memcpy_element_unordered_atomic_*``. Where '*'
-is replaced with an actual element size.
+is replaced with an actual element size. See :ref:`RewriteStatepointsForGC intrinsic
+lowering <RewriteStatepointsForGC_intrinsic_lowering>` for details on GC specific
+lowering.
 
 Optimizer is allowed to inline memory copy when it's profitable to do so.
 
@@ -20432,7 +20434,9 @@ Lowering:
 In the most general case call to the
 '``llvm.memmove.element.unordered.atomic.*``' is lowered to a call to the symbol
 ``__llvm_memmove_element_unordered_atomic_*``. Where '*' is replaced with an
-actual element size.
+actual element size. See :ref:`RewriteStatepointsForGC intrinsic lowering
+<RewriteStatepointsForGC_intrinsic_lowering>` for details on GC specific
+lowering.
 
 The optimizer is allowed to inline the memory copy when it's profitable to do so.
 

diff  --git a/llvm/docs/Statepoints.rst b/llvm/docs/Statepoints.rst
index e857d6f50ba3..7a4337201e1f 100644
--- a/llvm/docs/Statepoints.rst
+++ b/llvm/docs/Statepoints.rst
@@ -817,6 +817,50 @@ In practice, RewriteStatepointsForGC should be run much later in the pass
 pipeline, after most optimization is already done.  This helps to improve 
 the quality of the generated code when compiled with garbage collection support.
 
+.. _RewriteStatepointsForGC_intrinsic_lowering:
+
+RewriteStatepointsForGC intrinsic lowering
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+As a part of lowering to the explicit model of relocations
+RewriteStatepointsForGC performs GC specific lowering for
+'``llvm.memcpy.element.unordered.atomic.*``',
+'``llvm.memmove.element.unordered.atomic.*``' intrinsics.
+
+There are two possible lowerings for these copy operations: GC leaf lowering
+and GC parseable lowering. If a call is explicitly marked with
+"gc-leaf-function" attribute the call is lowered to a GC leaf call to
+'``__llvm_memcpy_element_unordered_atomic_*``' or
+'``__llvm_memmove_element_unordered_atomic_*``' symbol. Such a call can not
+take a safepoint. Otherwise, the call is made GC parseable by wrapping the
+call into a statepoint. This makes it possible to take a safepoint during
+copy operation. Note that a GC parseable copy operation is not required to
+take a safepoint. For example, a short copy operation may be performed without
+taking a safepoint.
+
+GC parseable calls to '``llvm.memcpy.element.unordered.atomic.*``',
+'``llvm.memmove.element.unordered.atomic.*``' intrinsics are lowered to calls
+to '``__llvm_memcpy_element_unordered_atomic_safepoint_*``',
+'``__llvm_memmove_element_unordered_atomic_safepoint_*``' symbols respectively.
+This way the runtime can provide implementations of copy operations with and
+without safepoints.
+
+GC parseable lowering also involves adjusting the arguments for the call.
+Memcpy and memmove intrinsics take derived pointers as source and destination
+arguments. If a copy operation takes a safepoint it might need to relocate the
+underlying source and destination objects. This requires the corresponding base
+pointers to be available in the copy operation. In order to make the base
+pointers available RewriteStatepointsForGC replaces derived pointers with base
+pointer and offset pairs. For example:
+
+.. code-block:: llvm
+
+  declare void @__llvm_memcpy_element_unordered_atomic_safepoint_1(
+    i8 addrspace(1)*  %dest_base, i64 %dest_offset,
+    i8 addrspace(1)*  %src_base, i64 %src_offset,
+    i64 %length)
+
+
 .. _PlaceSafepoints:
 
 PlaceSafepoints

diff  --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 0769b23f903b..91cd4c8e7abf 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1543,6 +1543,101 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
                        .getCallee();
 
       IsDeoptimize = true;
+    } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
+               IID == Intrinsic::memmove_element_unordered_atomic) {
+      // Unordered atomic memcpy and memmove intrinsics which are not explicitly
+      // marked as "gc-leaf-function" should be lowered in a GC parseable way.
+      // Specifically, these calls should be lowered to the
+      // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
+      // Similarly to __llvm_deoptimize we want to resolve this now, since the
+      // verifier does not allow taking the address of an intrinsic function.
+      //
+      // Moreover we need to shuffle the arguments for the call in order to
+      // accommodate GC. The underlying source and destination objects might be
+      // relocated during copy operation should the GC occur. To relocate the
+      // derived source and destination pointers the implementation of the
+      // intrinsic should know the corresponding base pointers.
+      //
+      // To make the base pointers available pass them explicitly as arguments:
+      //   memcpy(dest_derived, source_derived, ...) =>
+      //   memcpy(dest_base, dest_offset, source_base, source_offset, ...)
+      auto &Context = Call->getContext();
+      auto &DL = Call->getModule()->getDataLayout();
+      auto GetBaseAndOffset = [&](Value *Derived) {
+        assert(Result.PointerToBase.count(Derived));
+        unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
+        unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
+        Value *Base = Result.PointerToBase.find(Derived)->second;
+        Value *Base_int = Builder.CreatePtrToInt(
+            Base, Type::getIntNTy(Context, IntPtrSize));
+        Value *Derived_int = Builder.CreatePtrToInt(
+            Derived, Type::getIntNTy(Context, IntPtrSize));
+        return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
+      };
+
+      auto *Dest = CallArgs[0];
+      Value *DestBase, *DestOffset;
+      std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
+
+      auto *Source = CallArgs[1];
+      Value *SourceBase, *SourceOffset;
+      std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
+
+      auto *LengthInBytes = CallArgs[2];
+      auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
+
+      CallArgs.clear();
+      CallArgs.push_back(DestBase);
+      CallArgs.push_back(DestOffset);
+      CallArgs.push_back(SourceBase);
+      CallArgs.push_back(SourceOffset);
+      CallArgs.push_back(LengthInBytes);
+
+      SmallVector<Type *, 8> DomainTy;
+      for (Value *Arg : CallArgs)
+        DomainTy.push_back(Arg->getType());
+      auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
+                                    /* isVarArg = */ false);
+
+      auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
+        uint64_t ElementSize = ElementSizeCI->getZExtValue();
+        if (IID == Intrinsic::memcpy_element_unordered_atomic) {
+          switch (ElementSize) {
+          case 1:
+            return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
+          case 2:
+            return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
+          case 4:
+            return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
+          case 8:
+            return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
+          case 16:
+            return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
+          default:
+            llvm_unreachable("unexpected element size!");
+          }
+        }
+        assert(IID == Intrinsic::memmove_element_unordered_atomic);
+        switch (ElementSize) {
+        case 1:
+          return "__llvm_memmove_element_unordered_atomic_safepoint_1";
+        case 2:
+          return "__llvm_memmove_element_unordered_atomic_safepoint_2";
+        case 4:
+          return "__llvm_memmove_element_unordered_atomic_safepoint_4";
+        case 8:
+          return "__llvm_memmove_element_unordered_atomic_safepoint_8";
+        case 16:
+          return "__llvm_memmove_element_unordered_atomic_safepoint_16";
+        default:
+          llvm_unreachable("unexpected element size!");
+        }
+      };
+
+      CallTarget =
+          F->getParent()
+              ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
+              .getCallee();
     }
   }
 
@@ -2584,8 +2679,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
   assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
 
   auto NeedsRewrite = [&TLI](Instruction &I) {
-    if (const auto *Call = dyn_cast<CallBase>(&I))
-      return !callsGCLeafFunction(Call, TLI) && !isa<GCStatepointInst>(Call);
+    if (const auto *Call = dyn_cast<CallBase>(&I)) {
+      if (isa<GCStatepointInst>(Call))
+        return false;
+      if (callsGCLeafFunction(Call, TLI))
+        return false;
+
+      // Normally it's up to the frontend to make sure that non-leaf calls also
+      // have proper deopt state if it is required. We make an exception for
+      // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
+      // these are non-leaf by default. They might be generated by the optimizer
+      // which doesn't know how to produce a proper deopt state. So if we see a
+      // non-leaf memcpy/memmove without deopt state just treat it as a leaf
+      // copy and don't produce a statepoint.
+      if (!AllowStatepointWithNoDeoptInfo &&
+          !Call->getOperandBundle(LLVMContext::OB_deopt)) {
+        assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
+               "Don't expect any other calls here!");
+        return false;
+      }
+      return true;
+    }
     return false;
   };
 

diff  --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index eea347aa8fe6..9303ea5cdf73 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2672,10 +2672,13 @@ bool llvm::callsGCLeafFunction(const CallBase *Call,
     if (F->hasFnAttribute("gc-leaf-function"))
       return true;
 
-    if (auto IID = F->getIntrinsicID())
+    if (auto IID = F->getIntrinsicID()) {
       // Most LLVM intrinsics do not take safepoints.
       return IID != Intrinsic::experimental_gc_statepoint &&
-             IID != Intrinsic::experimental_deoptimize;
+             IID != Intrinsic::experimental_deoptimize &&
+             IID != Intrinsic::memcpy_element_unordered_atomic &&
+             IID != Intrinsic::memmove_element_unordered_atomic;
+    }
   }
 
   // Lib calls can be materialized by some passes, and won't be

diff  --git a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll
new file mode 100644
index 000000000000..a6a4b0763ccb
--- /dev/null
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy-no-deopt.ll
@@ -0,0 +1,52 @@
+; RUN: opt -passes=rewrite-statepoints-for-gc -rs4gc-allow-statepoint-with-no-deopt-info=0 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-REQUIRE-DEOPT
+; RUN: opt -passes=rewrite-statepoints-for-gc -rs4gc-allow-statepoint-with-no-deopt-info=1 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO-REQUIRE-DEOPT
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg)
+declare void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg)
+
+define void @test_memcpy_no_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: @test_memcpy_no_deopt
+; CHECK-REQUIRE-DEOPT-NOT: @llvm.experimental.gc.statepoint
+; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1)
+  ret void
+}
+
+define void @test_memmove_no_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: @test_memmove_no_deopt
+; CHECK-REQUIRE-DEOPT-NOT: @llvm.experimental.gc.statepoint
+; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1)
+  ret void
+}
+
+define void @test_memcpy_with_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: @test_memcpy_with_deopt
+; CHECK-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint
+; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ]
+  ret void
+}
+
+define void @test_memmove_with_deopt(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: @test_memmove_with_deopt
+; CHECK-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint
+; CHECK-NO-REQUIRE-DEOPT: @llvm.experimental.gc.statepoint
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) [ "deopt"(i32 0) ]
+  ret void
+}

diff  --git a/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll
new file mode 100644
index 000000000000..6aba5e6714ca
--- /dev/null
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/unordered-atomic-memcpy.ll
@@ -0,0 +1,199 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; Use instcombine to cleanup offset computation.
+; RUN: opt -passes=rewrite-statepoints-for-gc,instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128-p1:64:64"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg)
+declare void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i32 immarg)
+
+define void @test_memcpy_gc_leaf_function(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memcpy_gc_leaf_function
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]]
+; CHECK-NEXT:    [[DEST_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]]
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) [[ATTR2:#.*]]
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) [[ATTR2]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function"
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function"
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function"
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function"
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function"
+  ret void
+}
+
+define void @test_memcpy_element_atomic_1(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_1
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_1, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1)
+  ret void
+}
+
+define void @test_memcpy_element_atomic_2(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_2
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_2, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2)
+  ret void
+}
+
+define void @test_memcpy_element_atomic_4(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_4
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_4, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4)
+  ret void
+}
+
+define void @test_memcpy_element_atomic_8(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_8
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_8, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8)
+  ret void
+}
+
+define void @test_memcpy_element_atomic_16(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memcpy_element_atomic_16
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memcpy_element_unordered_atomic_safepoint_16, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memcpy.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16)
+  ret void
+}
+
+define void @test_memmove_gc_leaf_function(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memmove_gc_leaf_function
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]]
+; CHECK-NEXT:    [[DEST_DERIVED:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]]
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 1) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 2) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 4) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 8) [[ATTR2]]
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 [[SRC_DERIVED]], i8 addrspace(1)* align 16 [[DEST_DERIVED]], i32 [[LEN]], i32 16) [[ATTR2]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1) "gc-leaf-function"
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2) "gc-leaf-function"
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4) "gc-leaf-function"
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8) "gc-leaf-function"
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16) "gc-leaf-function"
+  ret void
+}
+
+define void @test_memmove_element_atomic_1(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_1
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_1, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 1)
+  ret void
+}
+
+define void @test_memmove_element_atomic_2(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_2
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_2, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 2)
+  ret void
+}
+
+define void @test_memmove_element_atomic_4(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_4
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_4, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 4)
+  ret void
+}
+
+define void @test_memmove_element_atomic_8(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_8
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_8, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 8)
+  ret void
+}
+
+define void @test_memmove_element_atomic_16(i8 addrspace(1)* %src, i64 %src_offset, i8 addrspace(1)* %dest, i64 %dest_offset, i32 %len) gc "statepoint-example" {
+; CHECK-LABEL: define {{[^@]+}}@test_memmove_element_atomic_16
+; CHECK-SAME: (i8 addrspace(1)* [[SRC:%.*]], i64 [[SRC_OFFSET:%.*]], i8 addrspace(1)* [[DEST:%.*]], i64 [[DEST_OFFSET:%.*]], i32 [[LEN:%.*]]) gc "statepoint-example" {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i8i64p1i8i64i32f(i64 2882400000, i32 0, void (i8 addrspace(1)*, i64, i8 addrspace(1)*, i64, i32)* nonnull @__llvm_memmove_element_unordered_atomic_safepoint_16, i32 5, i32 0, i8 addrspace(1)* [[SRC]], i64 [[SRC_OFFSET]], i8 addrspace(1)* [[DEST]], i64 [[DEST_OFFSET]], i32 [[LEN]], i32 0, i32 0) [ "gc-live"() ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src_derived = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %src_offset
+  %dest_derived = getelementptr inbounds i8, i8 addrspace(1)* %dest, i64 %dest_offset
+  call void @llvm.memmove.element.unordered.atomic.p1i8.p1i8.i32(i8 addrspace(1)* align 16 %src_derived, i8 addrspace(1)* align 16 %dest_derived, i32 %len, i32 16)
+  ret void
+}


        


More information about the llvm-commits mailing list