[llvm] dd5991c - [LoopIdiom] Transform loop containing memcpy to memmove
Dawid Jurczak via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 8 00:56:48 PDT 2021
Author: Dawid Jurczak
Date: 2021-10-08T09:56:01+02:00
New Revision: dd5991cc6f2d0b39716c4e6c9272596481f1c7ad
URL: https://github.com/llvm/llvm-project/commit/dd5991cc6f2d0b39716c4e6c9272596481f1c7ad
DIFF: https://github.com/llvm/llvm-project/commit/dd5991cc6f2d0b39716c4e6c9272596481f1c7ad.diff
LOG: [LoopIdiom] Transform loop containing memcpy to memmove
The purpose of patch is to learn Loop Idiom Recognize pass how to recognize simple memmove patterns
in similar way like GCC does: https://godbolt.org/z/dKjGvTGff
It's follow-up of following change: https://reviews.llvm.org/D104464
Differential Revision: https://reviews.llvm.org/D107075
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/test/Transforms/LoopIdiom/basic.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b5cc845e2cee6..021c37748936e 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1257,6 +1257,51 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
StoreEv, LoadEv, BECount);
}
+class MemmoveVerifier {
+public:
+ explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
+ const DataLayout &DL)
+ : DL(DL), LoadOff(0), StoreOff(0),
+ BP1(llvm::GetPointerBaseWithConstantOffset(
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+ BP2(llvm::GetPointerBaseWithConstantOffset(
+ StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
+ IsSameObject(BP1 == BP2) {}
+
+ bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
+ const Instruction &TheLoad,
+ bool IsMemCpy) const {
+ if (IsMemCpy) {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride.
+ if ((!IsNegStride && LoadOff <= StoreOff) ||
+ (IsNegStride && LoadOff >= StoreOff))
+ return false;
+ } else {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
+ int64_t LoadSize =
+ DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
+ if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ return false;
+ if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
+ (IsNegStride && LoadOff + LoadSize > StoreOff))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ const DataLayout &DL;
+ int64_t LoadOff;
+ int64_t StoreOff;
+ const Value *BP1;
+ const Value *BP2;
+
+public:
+ const bool IsSameObject;
+};
+
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
@@ -1321,10 +1366,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
bool IsMemCpy = isa<MemCpyInst>(TheStore);
const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
- bool UseMemMove =
+ bool LoopAccessStore =
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
StoreSizeSCEV, *AA, IgnoredInsts);
- if (UseMemMove) {
+ if (LoopAccessStore) {
// For memmove case it's not enough to guarantee that loop doesn't access
// TheStore and TheLoad. Additionally we need to make sure that TheStore is
// the only user of TheLoad.
@@ -1363,34 +1408,32 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// the load memory locations. So remove it from the ignored stores.
if (IsMemCpy)
IgnoredInsts.erase(TheStore);
+ MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSizeSCEV, *AA, IgnoredInsts)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- if (UseMemMove) {
- // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
- // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
- int64_t LoadOff = 0, StoreOff = 0;
- const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
- LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
- const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
- StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
- int64_t LoadSize =
- DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
- if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ if (!IsMemCpy) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
+ TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
return Changed;
- if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
- (IsNegStride && LoadOff + LoadSize > StoreOff))
+ }
+ // At this point loop may access load only for memcpy in same underlying
+ // object. If that's not the case bail out.
+ if (!Verifier.IsSameObject)
return Changed;
}
+ bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
+ if (UseMemMove)
+ if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
+ IsMemCpy))
+ return Changed;
+
if (avoidLIRForMultiBlockLoop())
return Changed;
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 754b9df105e29..0bf4ad4f64958 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -1104,6 +1104,43 @@ for.end: ; preds = %for.body, %entry
ret void
}
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
+
+;; Memmove formation. We expect exactly same memmove result like in PR46179_positive_stride output.
+define void @loop_with_memcpy_PR46179_positive_stride(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @loop_with_memcpy_PR46179_positive_stride(
+; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]]
+; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+bb.nph:
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %Step = add nuw nsw i64 %indvar, 1
+ %SrcI = getelementptr i8, i8* %Src, i64 %Step
+ %DestI = getelementptr i8, i8* %Src, i64 %indvar
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
;; Memmove formation.
define void @PR46179_negative_stride(i8* %Src, i64 %Size) {
; CHECK-LABEL: @PR46179_negative_stride(
@@ -1145,7 +1182,82 @@ for.end: ; preds = %.for.body, %bb.nph
ret void
}
-;; Do not form memmove from previous store when stride is positive.
+;; Memmove formation. We expect exactly same memmove result like in PR46179_negative_stride output.
+define void @loop_with_memcpy_PR46179_negative_stride(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @loop_with_memcpy_PR46179_negative_stride(
+; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SCEVGEP]], i8* align 1 [[SRC]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[STEP:%.*]] = add nsw i64 [[INDVAR]], -1
+; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
+; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+bb.nph:
+ %cmp1 = icmp sgt i64 %Size, 0
+ br i1 %cmp1, label %for.body, label %for.end
+
+for.body: ; preds = %bb.nph, %.for.body
+ %indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
+ %Step = add nsw i64 %indvar, -1
+ %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
+ %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+ %exitcond = icmp sgt i64 %indvar, 1
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end: ; preds = %.for.body, %bb.nph
+ ret void
+}
+
+;; Memmove formation.
+define void @loop_with_memcpy_stride16(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @loop_with_memcpy_stride16(
+; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 16
+; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SIZE:%.*]], i64 16)
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 16
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[TMP3]], i1 false)
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[STEP]], [[FOR_BODY]] ], [ 0, [[BB_NPH:%.*]] ]
+; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 16
+; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[STEP]]
+; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i64 [[STEP]], [[SIZE:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+bb.nph:
+ br label %for.body
+
+for.body: ; preds = %for.body, %bb.nph
+ %indvar = phi i64 [ %Step, %for.body ], [ 0, %bb.nph ]
+ %Step = add nuw nsw i64 %indvar, 16
+ %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
+ %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 16, i1 false)
+ %exitcond = icmp slt i64 %Step, %Size
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+;; Do not form memmove from previous load when stride is positive.
define void @do_not_form_memmove1(i8* %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove1(
; CHECK-NEXT: bb.nph:
@@ -1181,26 +1293,58 @@ for.end: ; preds = %for.body, %entry
ret void
}
-;; Do not form memmove from next store when stride is negative.
+;; Do not form memmove from previous load in memcpy when stride is positive.
define void @do_not_form_memmove2(i8* %Src, i64 %Size) {
; CHECK-LABEL: @do_not_form_memmove2(
; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 1, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], -1
+; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
+; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+bb.nph:
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 1, %bb.nph ], [ %indvar.next, %for.body ]
+ %Step = add nuw nsw i64 %indvar, -1
+ %SrcI = getelementptr i8, i8* %Src, i64 %Step
+ %DestI = getelementptr i8, i8* %Src, i64 %indvar
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; Do not form memmove from next load when stride is negative.
+define void @do_not_form_memmove3(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @do_not_form_memmove3(
+; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[SRCI]], align 1
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: store i8 [[V]], i8* [[DESTI]], align 1
-; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], -1
+; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nsw i64 [[INDVAR]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
-; CHECK: for.end.loopexit:
-; CHECK-NEXT: br label [[FOR_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1223,9 +1367,47 @@ for.end: ; preds = %.for.body, %bb.nph
ret void
}
+;; Do not form memmove from next load in memcpy when stride is negative.
+define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @do_not_form_memmove4(
+; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SRCI:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[STEP]]
+; CHECK-NEXT: [[DESTI:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 [[INDVAR]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
+; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nsw i64 [[INDVAR]], -1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+bb.nph:
+ %cmp1 = icmp sgt i64 %Size, 0
+ br i1 %cmp1, label %for.body, label %for.end
+
+for.body: ; preds = %bb.nph, %.for.body
+ %indvar = phi i64 [ %indvar.next, %for.body ], [ %Size, %bb.nph ]
+ %Step = add nuw nsw i64 %indvar, 1
+ %SrcI = getelementptr inbounds i8, i8* %Src, i64 %Step
+ %DestI = getelementptr inbounds i8, i8* %Src, i64 %indvar
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+ %indvar.next = add nsw i64 %indvar, -1
+ %exitcond = icmp sgt i64 %indvar, 1
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end: ; preds = %.for.body, %bb.nph
+ ret void
+}
+
;; Do not form memmove when underaligned load is overlapped with store.
-define void @do_not_form_memmove3(i32* %s, i64 %size) {
-; CHECK-LABEL: @do_not_form_memmove3(
+define void @do_not_form_memmove5(i32* %s, i64 %size) {
+; CHECK-LABEL: @do_not_form_memmove5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END_IDX:%.*]] = add i64 [[SIZE:%.*]], -1
; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 [[END_IDX]]
@@ -1266,12 +1448,11 @@ exit:
ret void
}
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
-
-;; FIXME: Do not form memmove from loop body containing memcpy.
-define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
-; CHECK-LABEL: @do_not_form_memmove4(
+;; Do not form memmove for memcpy with aliasing store.
+define void @do_not_form_memmove6(i8* %Src, i64 %Size) {
+; CHECK-LABEL: @do_not_form_memmove6(
; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: [[BASEALIAS:%.*]] = call i8* @external(i8* [[SRC:%.*]])
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
@@ -1279,6 +1460,7 @@ define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]]
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]]
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false)
+; CHECK-NEXT: store i8 4, i8* [[BASEALIAS]], align 1
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -1286,6 +1468,7 @@ define void @do_not_form_memmove4(i8* %Src, i64 %Size) {
; CHECK-NEXT: ret void
;
bb.nph:
+ %BaseAlias = call i8* @external(i8* %Src)
br label %for.body
for.body: ; preds = %bb.nph, %for.body
@@ -1294,6 +1477,7 @@ for.body: ; preds = %bb.nph, %for.body
%SrcI = getelementptr i8, i8* %Src, i64 %Step
%DestI = getelementptr i8, i8* %Src, i64 %indvar
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false)
+ store i8 4, i8* %BaseAlias
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
@@ -1303,8 +1487,8 @@ for.end: ; preds = %for.body, %entry
}
;; Do not form memmove when load has more than one use.
-define i32 @do_not_form_memmove5(i32* %p) {
-; CHECK-LABEL: @do_not_form_memmove5(
+define i32 @do_not_form_memmove7(i32* %p) {
+; CHECK-LABEL: @do_not_form_memmove7(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
More information about the llvm-commits
mailing list