[llvm] dd5991c - [LoopIdiom] Transform loop containing memcpy to memmove

Dawid Jurczak via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 8 00:56:48 PDT 2021


Author: Dawid Jurczak
Date: 2021-10-08T09:56:01+02:00
New Revision: dd5991cc6f2d0b39716c4e6c9272596481f1c7ad

URL: https://github.com/llvm/llvm-project/commit/dd5991cc6f2d0b39716c4e6c9272596481f1c7ad
DIFF: https://github.com/llvm/llvm-project/commit/dd5991cc6f2d0b39716c4e6c9272596481f1c7ad.diff

LOG: [LoopIdiom] Transform loop containing memcpy to memmove

The purpose of patch is to learn Loop Idiom Recognize pass how to recognize simple memmove patterns
in similar way like GCC does: https://godbolt.org/z/dKjGvTGff
It's follow-up of following change: https://reviews.llvm.org/D104464

Differential Revision: https://reviews.llvm.org/D107075

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/basic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b5cc845e2cee6..021c37748936e 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1257,6 +1257,51 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                                     StoreEv, LoadEv, BECount);
 }
 
+class MemmoveVerifier {
+public:
+  explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
+                           const DataLayout &DL)
+      : DL(DL), LoadOff(0), StoreOff(0),
+        BP1(llvm::GetPointerBaseWithConstantOffset(
+            LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+        BP2(llvm::GetPointerBaseWithConstantOffset(
+            StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
+        IsSameObject(BP1 == BP2) {}
+
+  bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
+                                  const Instruction &TheLoad,
+                                  bool IsMemCpy) const {
+    if (IsMemCpy) {
+      // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+      // for negative stride.
+      if ((!IsNegStride && LoadOff <= StoreOff) ||
+          (IsNegStride && LoadOff >= StoreOff))
+        return false;
+    } else {
+      // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+      // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
+      int64_t LoadSize =
+          DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
+      if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+        return false;
+      if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
+          (IsNegStride && LoadOff + LoadSize > StoreOff))
+        return false;
+    }
+    return true;
+  }
+
+private:
+  const DataLayout &DL;
+  int64_t LoadOff;
+  int64_t StoreOff;
+  const Value *BP1;
+  const Value *BP2;
+
+public:
+  const bool IsSameObject;
+};
+
 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
     Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
     MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
@@ -1321,10 +1366,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   bool IsMemCpy = isa<MemCpyInst>(TheStore);
   const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
 
-  bool UseMemMove =
+  bool LoopAccessStore =
       mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
                             StoreSizeSCEV, *AA, IgnoredInsts);
-  if (UseMemMove) {
+  if (LoopAccessStore) {
     // For memmove case it's not enough to guarantee that loop doesn't access
     // TheStore and TheLoad. Additionally we need to make sure that TheStore is
     // the only user of TheLoad.
@@ -1363,34 +1408,32 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   // the load memory locations. So remove it from the ignored stores.
   if (IsMemCpy)
     IgnoredInsts.erase(TheStore);
+  MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
   if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
                             StoreSizeSCEV, *AA, IgnoredInsts)) {
-    ORE.emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
-             << ore::NV("Inst", InstRemark) << " in "
-             << ore::NV("Function", TheStore->getFunction())
-             << " function will not be hoisted: "
-             << ore::NV("Reason", "The loop may access load location");
-    });
-    return Changed;
-  }
-  if (UseMemMove) {
-    // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
-    // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
-    int64_t LoadOff = 0, StoreOff = 0;
-    const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
-        LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
-    const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
-        StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
-    int64_t LoadSize =
-        DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
-    if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+    if (!IsMemCpy) {
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
+                                        TheLoad)
+               << ore::NV("Inst", InstRemark) << " in "
+               << ore::NV("Function", TheStore->getFunction())
+               << " function will not be hoisted: "
+               << ore::NV("Reason", "The loop may access load location");
+      });
       return Changed;
-    if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
-        (IsNegStride && LoadOff + LoadSize > StoreOff))
+    }
+    // At this point loop may access load only for memcpy in same underlying
+    // object. If that's not the case bail out.
+    if (!Verifier.IsSameObject)
       return Changed;
   }
 
+  bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
+  if (UseMemMove)
+    if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
+                                             IsMemCpy))
+      return Changed;
+
   if (avoidLIRForMultiBlockLoop())
     return Changed;
 

diff  --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 754b9df105e29..0bf4ad4f64958 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -1104,6 +1104,43 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
+
+;; Memmove formation. We expect exactly same memmove result like in PR46179_positive_stride output.
+define void @loop_with_memcpy_PR46179_positive_stride(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @loop_with_memcpy_PR46179_positive_stride(
+; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
+; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+bb.nph:
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %Step = add nuw nsw i64 %indvar, 1
+  %SrcI = getelementptr i8, i8* %Src, i64 %Step
+  %DestI = getelementptr i8, i8* %Src, i64 %indvar
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
 ;; Memmove formation.
 define void @PR46179_negative_stride(i8* %Src, i64 %Size) {
 ; CHECK-LABEL: @PR46179_negative_stride(
@@ -1145,7 +1182,82 @@ for.end:                                      ; preds = %.for.body, %bb.nph
   ret void
 }
 
-;; Do not form memmove from previous store when stride is positive.
+;; Memmove formation. We expect exactly same memmove result like in PR46179_negative_stride output.
+define void @loop_with_memcpy_PR46179_negative_stride(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @loop_with_memcpy_PR46179_negative_stride(
+; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SCEVGEP]], i8* align 1 [[SRC]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[STEP]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[STEP:%.*]] = add nsw i64 [[INDVAR]], -1
+; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
+; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+bb.nph:
+  %cmp1 = icmp sgt i64 %Size, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                           ; preds = %bb.nph, %.for.body
+  %indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
+  %Step = add nsw i64 %indvar, -1
+  %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
+  %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+  %exitcond = icmp sgt i64 %indvar, 1
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                      ; preds = %.for.body, %bb.nph
+  ret void
+}
+
+;; Memmove formation.
+define void @loop_with_memcpy_stride16(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @loop_with_memcpy_stride16(
+; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 16
+; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SIZE:%.*]], i64 16)
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw i64 [[TMP2]], 16
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[STEP]], [[FOR_BODY]] ], [ 0, [[BB_NPH:%.*]] ]
+; CHECK-NEXT:    [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 16
+; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[STEP]]
+; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i64 [[STEP]], [[SIZE:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+bb.nph:
+  br label %for.body
+
+for.body:                                                ; preds = %for.body, %bb.nph
+  %indvar = phi i64 [ %Step, %for.body ], [ 0, %bb.nph ]
+  %Step = add nuw nsw i64 %indvar, 16
+  %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
+  %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 16, i1 false)
+  %exitcond = icmp slt i64 %Step, %Size
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                                ; preds = %for.body
+  ret void
+}
+
+;; Do not form memmove from previous load when stride is positive.
 define void @do_not_form_memmove1(i8* %Src, i64 %Size) {
 ; CHECK-LABEL: @do_not_form_memmove1(
 ; CHECK-NEXT:  bb.nph:
@@ -1181,26 +1293,58 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-;; Do not form memmove from next store when stride is negative.
+;; Do not form memmove from previous load in memcpy when stride is positive.
 define void @do_not_form_memmove2(i8* %Src, i64 %Size) {
 ; CHECK-LABEL: @do_not_form_memmove2(
 ; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
+; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
+; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+bb.nph:
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
+  %Step = add nuw nsw i64 %indvar, -1
+  %SrcI = getelementptr i8, i8* %Src, i64 %Step
+  %DestI = getelementptr i8, i8* %Src, i64 %indvar
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; Do not form memmove from next load when stride is negative.
+define void @do_not_form_memmove3(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @do_not_form_memmove3(
+; CHECK-NEXT:  bb.nph:
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
 ; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
 ; CHECK-NEXT:    [[V:%.*]] = load i8, i8* [[SRCI]], align 1
 ; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
 ; CHECK-NEXT:    store i8 [[V]], i8* [[DESTI]], align 1
-; CHECK-NEXT:    [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
+; CHECK-NEXT:    [[INDVAR_NEXT:%.*]] = add nsw i64 [[INDVAR]], -1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
@@ -1223,9 +1367,47 @@ for.end:                                      ; preds = %.for.body, %bb.nph
   ret void
 }
 
+;; Do not form memmove from next load in memcpy when stride is negative.
+define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @do_not_form_memmove4(
+; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
+; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
+; CHECK-NEXT:    [[INDVAR_NEXT:%.*]] = add nsw i64 [[INDVAR]], -1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+bb.nph:
+  %cmp1 = icmp sgt i64 %Size, 0
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                           ; preds = %bb.nph, %.for.body
+  %indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
+  %Step = add nuw nsw i64 %indvar, 1
+  %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
+  %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+  %indvar.next = add nsw i64 %indvar, -1
+  %exitcond = icmp sgt i64 %indvar, 1
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                      ; preds = %.for.body, %bb.nph
+  ret void
+}
+
 ;; Do not form memmove when underaligned load is overlapped with store.
-define void @do_not_form_memmove3(i32* %s, i64 %size) {
-; CHECK-LABEL: @do_not_form_memmove3(
+define void @do_not_form_memmove5(i32* %s, i64 %size) {
+; CHECK-LABEL: @do_not_form_memmove5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
 ; CHECK-NEXT:    [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 [[END_IDX]]
@@ -1266,12 +1448,11 @@ exit:
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
-
-;; FIXME: Do not form memmove from loop body containing memcpy.
-define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
-; CHECK-LABEL: @do_not_form_memmove4(
+;; Do not form memmove for memcpy with aliasing store.
+define void @do_not_form_memmove6(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @do_not_form_memmove6(
 ; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    [[BASEALIAS:%.*]] = call i8* @external(i8* [[SRC:%.*]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
@@ -1279,6 +1460,7 @@ define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
 ; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
 ; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
+; CHECK-NEXT:    store i8 4, i8* [[BASEALIAS]], align 1
 ; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -1286,6 +1468,7 @@ define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
 ; CHECK-NEXT:    ret void
 ;
 bb.nph:
+  %BaseAlias = call i8* @external(i8* %Src)
   br label %for.body
 
 for.body:                                         ; preds = %bb.nph, %for.body
@@ -1294,6 +1477,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
   %SrcI = getelementptr i8, i8* %Src, i64 %Step
   %DestI = getelementptr i8, i8* %Src, i64 %indvar
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+  store i8 4, i8* %BaseAlias
   %indvar.next = add i64 %indvar, 1
   %exitcond = icmp eq i64 %indvar.next, %Size
   br i1 %exitcond, label %for.end, label %for.body
@@ -1303,8 +1487,8 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ;; Do not form memmove when load has more than one use.
-define i32 @do_not_form_memmove5(i32* %p) {
-; CHECK-LABEL: @do_not_form_memmove5(
+define i32 @do_not_form_memmove7(i32* %p) {
+; CHECK-LABEL: @do_not_form_memmove7(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:


        


More information about the llvm-commits mailing list