[llvm] f5446b7 - [MemCpyOpt] Allow variable lengths in memcpy optimizer

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 21 14:23:50 PDT 2021


Author: Olle Fredriksson
Date: 2021-04-21T23:23:38+02:00
New Revision: f5446b769a7929806f72256fccd4826d66502e59

URL: https://github.com/llvm/llvm-project/commit/f5446b769a7929806f72256fccd4826d66502e59
DIFF: https://github.com/llvm/llvm-project/commit/f5446b769a7929806f72256fccd4826d66502e59.diff

LOG: [MemCpyOpt] Allow variable lengths in memcpy optimizer

This makes the memcpy-memcpy and memcpy-memset optimizations work for
variable sizes as long as they are equal, relaxing the old restriction
that they are constant integers. If they're not equal, the old
requirement that they are constant integers with certain size
restrictions is used.

The implementation works by pushing the length tests further down in the
code, which reveals some places where it's enough that the lengths are
equal (but not necessarily constant).

Differential Revision: https://reviews.llvm.org/D100870

Added: 
    llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
    llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll
    llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll

Modified: 
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index f98a06490d98f..83d475d90b85c 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1050,10 +1050,12 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
 
   // Second, the length of the memcpy's must be the same, or the preceding one
   // must be larger than the following one.
-  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
-  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
-  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
-    return false;
+  if (MDep->getLength() != M->getLength()) {
+    ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+    ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+    if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+      return false;
+  }
 
   // Verify that the copied-from memory doesn't change in between the two
   // transfers.  For example, in:
@@ -1229,21 +1231,23 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
 
 /// Determine whether the instruction has undefined content for the given Size,
 /// either because it was freshly alloca'd or started its lifetime.
-static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
+static bool hasUndefContents(Instruction *I, Value *Size) {
   if (isa<AllocaInst>(I))
     return true;
 
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
-      if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
-        if (LTSize->getZExtValue() >= Size->getZExtValue())
-          return true;
+  if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+        if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+          if (LTSize->getZExtValue() >= CSize->getZExtValue())
+            return true;
+  }
 
   return false;
 }
 
 static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
-                                 MemoryDef *Def, ConstantInt *Size) {
+                                 MemoryDef *Def, Value *Size) {
   if (MSSA->isLiveOnEntryDef(Def))
     return isa<AllocaInst>(getUnderlyingObject(V));
 
@@ -1251,14 +1255,17 @@ static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
           dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
-      if (AA->isMustAlias(V, II->getArgOperand(1)) &&
-          LTSize->getZExtValue() >= Size->getZExtValue())
-        return true;
 
-      // If the lifetime.start covers a whole alloca (as it almost always does)
-      // and we're querying a pointer based on that alloca, then we know the
-      // memory is definitely undef, regardless of how exactly we alias. The
-      // size also doesn't matter, as an out-of-bounds access would be UB.
+      if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
+        if (AA->isMustAlias(V, II->getArgOperand(1)) &&
+            LTSize->getZExtValue() >= CSize->getZExtValue())
+          return true;
+      }
+
+      // If the lifetime.start covers a whole alloca (as it almost always
+      // does) and we're querying a pointer based on that alloca, then we know
+      // the memory is definitely undef, regardless of how exactly we alias.
+      // The size also doesn't matter, as an out-of-bounds access would be UB.
       AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
       if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
         DataLayout DL = Alloca->getModule()->getDataLayout();
@@ -1284,8 +1291,6 @@ static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
 ///   memset(dst2, c, dst2_size);
 /// \endcode
 /// When dst2_size <= dst1_size.
-///
-/// The \p MemCpy must have a Constant length.
 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
                                                MemSetInst *MemSet) {
   // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
@@ -1293,38 +1298,47 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
   if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
     return false;
 
-  // A known memset size is required.
-  ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
-  if (!MemSetSize)
-    return false;
+  Value *MemSetSize = MemSet->getLength();
+  Value *CopySize = MemCpy->getLength();
 
-  // Make sure the memcpy doesn't read any more than what the memset wrote.
-  // Don't worry about sizes larger than i64.
-  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
-  if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
-    // If the memcpy is larger than the memset, but the memory was undef prior
-    // to the memset, we can just ignore the tail. Technically we're only
-    // interested in the bytes from MemSetSize..CopySize here, but as we can't
-    // easily represent this location, we use the full 0..CopySize range.
-    MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
-    bool CanReduceSize = false;
-    if (EnableMemorySSA) {
-      MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
-      MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-          MemSetAccess->getDefiningAccess(), MemCpyLoc);
-      if (auto *MD = dyn_cast<MemoryDef>(Clobber))
-        if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
-          CanReduceSize = true;
-    } else {
-      MemDepResult DepInfo = MD->getPointerDependencyFrom(
-          MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
-      if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
-        CanReduceSize = true;
-    }
+  if (MemSetSize != CopySize) {
+    // Make sure the memcpy doesn't read any more than what the memset wrote.
+    // Don't worry about sizes larger than i64.
+
+    // A known memset size is required.
+    ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
+    if (!CMemSetSize)
+      return false;
 
-    if (!CanReduceSize)
+    // A known memcpy size is also required.
+    ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
+    if (!CCopySize)
       return false;
-    CopySize = MemSetSize;
+    if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
+      // If the memcpy is larger than the memset, but the memory was undef prior
+      // to the memset, we can just ignore the tail. Technically we're only
+      // interested in the bytes from MemSetSize..CopySize here, but as we can't
+      // easily represent this location, we use the full 0..CopySize range.
+      MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
+      bool CanReduceSize = false;
+      if (EnableMemorySSA) {
+        MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+        MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+            MemSetAccess->getDefiningAccess(), MemCpyLoc);
+        if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+          if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
+            CanReduceSize = true;
+      } else {
+        MemDepResult DepInfo = MD->getPointerDependencyFrom(
+            MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+        if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+          CanReduceSize = true;
+      }
+
+      if (!CanReduceSize)
+        return false;
+      CopySize = MemSetSize;
+    }
   }
 
   IRBuilder<> Builder(MemCpy);
@@ -1396,10 +1410,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
           if (processMemSetMemCpyDependence(M, MDep))
             return true;
 
-    // The optimizations after this point require the memcpy size.
-    ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
-    if (!CopySize) return false;
-
     MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
         AnyClobber, MemoryLocation::getForSource(M));
 
@@ -1412,26 +1422,29 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
     //   d) memcpy from a just-memset'd source can be turned into memset.
     if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
       if (Instruction *MI = MD->getMemoryInst()) {
-        if (auto *C = dyn_cast<CallInst>(MI)) {
-          // The memcpy must post-dom the call. Limit to the same block for now.
-          // Additionally, we need to ensure that there are no accesses to dest
-          // between the call and the memcpy. Accesses to src will be checked
-          // by performCallSlotOptzn().
-          // TODO: Support non-local call-slot optimization?
-          if (C->getParent() == M->getParent() &&
-              !accessedBetween(*AA, DestLoc, MD, MA)) {
-            // FIXME: Can we pass in either of dest/src alignment here instead
-            // of conservatively taking the minimum?
-            Align Alignment = std::min(M->getDestAlign().valueOrOne(),
-                                       M->getSourceAlign().valueOrOne());
-            if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
-                                     CopySize->getZExtValue(), Alignment, C)) {
-              LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
-                                << "    call: " << *C << "\n"
-                                << "    memcpy: " << *M << "\n");
-              eraseInstruction(M);
-              ++NumMemCpyInstr;
-              return true;
+        if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+          if (auto *C = dyn_cast<CallInst>(MI)) {
+            // The memcpy must post-dom the call. Limit to the same block for
+            // now. Additionally, we need to ensure that there are no accesses
+            // to dest between the call and the memcpy. Accesses to src will be
+            // checked by performCallSlotOptzn().
+            // TODO: Support non-local call-slot optimization?
+            if (C->getParent() == M->getParent() &&
+                !accessedBetween(*AA, DestLoc, MD, MA)) {
+              // FIXME: Can we pass in either of dest/src alignment here instead
+              // of conservatively taking the minimum?
+              Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+                                         M->getSourceAlign().valueOrOne());
+              if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+                                       CopySize->getZExtValue(), Alignment,
+                                       C)) {
+                LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+                                  << "    call: " << *C << "\n"
+                                  << "    memcpy: " << *M << "\n");
+                eraseInstruction(M);
+                ++NumMemCpyInstr;
+                return true;
+              }
             }
           }
         }
@@ -1447,7 +1460,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
         }
       }
 
-      if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
+      if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
         LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
         eraseInstruction(M);
         ++NumMemCpyInstr;
@@ -1464,10 +1477,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
         if (processMemSetMemCpyDependence(M, MDep))
           return true;
 
-    // The optimizations after this point require the memcpy size.
-    ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
-    if (!CopySize) return false;
-
     // There are four possible optimizations we can do for memcpy:
     //   a) memcpy-memcpy xform which exposes redundance for DSE.
     //   b) call-memcpy xform for return slot optimization.
@@ -1475,17 +1484,19 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
     //      its lifetime copies undefined data, and we can therefore eliminate
     //      the memcpy in favor of the data that was already at the destination.
     //   d) memcpy from a just-memset'd source can be turned into memset.
-    if (DepInfo.isClobber()) {
-      if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
-        // FIXME: Can we pass in either of dest/src alignment here instead
-        // of conservatively taking the minimum?
-        Align Alignment = std::min(M->getDestAlign().valueOrOne(),
-                                   M->getSourceAlign().valueOrOne());
-        if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
-                                 CopySize->getZExtValue(), Alignment, C)) {
-          eraseInstruction(M);
-          ++NumMemCpyInstr;
-          return true;
+    if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+      if (DepInfo.isClobber()) {
+        if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+          // FIXME: Can we pass in either of dest/src alignment here instead
+          // of conservatively taking the minimum?
+          Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+                                     M->getSourceAlign().valueOrOne());
+          if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+                                   CopySize->getZExtValue(), Alignment, C)) {
+            eraseInstruction(M);
+            ++NumMemCpyInstr;
+            return true;
+          }
         }
       }
     }
@@ -1498,7 +1509,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
       if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
         return processMemCpyMemCpyDependence(M, MDep);
     } else if (SrcDepInfo.isDef()) {
-      if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
+      if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
         eraseInstruction(M);
         ++NumMemCpyInstr;
         return true;

diff  --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
new file mode 100644
index 0000000000000..0006c4c1d68f9
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i8* %src, i64 %size) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
+; CHECK-NEXT:    [[DST:%.*]] = alloca i8, i64 [[SIZE]], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca i8, i64 %size
+  %dst = alloca i8, i64 %size
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size, i1 false)
+
+  ret void
+}
+
+; Differing sizes, so left as it is.
+define void @negative_test(i8* %src, i64 %size1, i64 %size2) {
+; CHECK-LABEL: @negative_test(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT:    [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE1]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[TMP]], i64 [[SIZE2]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca i8, i64 %size1
+  %dst = alloca i8, i64 %size2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size2, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)

diff  --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll
new file mode 100644
index 0000000000000..ed1028f7b9d78
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i64 %size) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
+; CHECK-NEXT:    [[DST:%.*]] = alloca i8, i64 [[SIZE]], align 1
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i8, i64 %size
+  %dst = alloca i8, i64 %size
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %size, i1 false)
+
+  ret void
+}
+
+define void @test2(i64 %size1, i64 %size2, i64 %cpy_size) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT:    [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i8, i64 %size1
+  %dst = alloca i8, i64 %size2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %cpy_size, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)

diff  --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
new file mode 100644
index 0000000000000..e80bab819fe2d
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i8* %src, i8 %c, i64 %size) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[DST1:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
+; CHECK-NEXT:    [[DST2:%.*]] = alloca i8, i64 [[SIZE]], align 1
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2]], i8 [[C]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  %dst1 = alloca i8, i64 %size
+  %dst2 = alloca i8, i64 %size
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size, i1 false)
+
+  ret void
+}
+
+; Differing sizes, so left as it is.
+define void @negative_test(i8* %src, i8 %c, i64 %size1, i64 %size2) {
+; CHECK-LABEL: @negative_test(
+; CHECK-NEXT:    [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT:    [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST2]], i8* align 8 [[DST1]], i64 [[SIZE2]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  %dst1 = alloca i8, i64 %size1
+  %dst2 = alloca i8, i64 %size2
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size2, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)


        


More information about the llvm-commits mailing list