[llvm] [MemCpyOpt] Forward `memcpy` based on the actual copy memory location. (PR #87190)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 17:03:07 PDT 2024
https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/87190
>From abd81ec27f855c3e01a876ff731b0a9c834e6354 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Tue, 2 Apr 2024 08:46:38 +0800
Subject: [PATCH 1/2] Pre-commit test cases
---
.../MemCpyOpt/memcpy-memcpy-offset.ll | 195 ++++++++++++++++++
.../Transforms/PhaseOrdering/memcpy-offset.ll | 42 ++++
2 files changed, 237 insertions(+)
create mode 100644 llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
create mode 100644 llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
new file mode 100644
index 0000000000000..fe5056d85dcd3
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
@@ -0,0 +1,195 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
+
+%buf = type [9 x i8]
+
+; We can forward `memcpy` because the copy location are the same,
+define void @forward_offset(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 6, i1 false)
+ ret void
+}
+
+; We need to update the align value of the source of `memcpy` when forwarding.
+define void @forward_offset_align(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset_align(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[DEP_SRC]], i64 9, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3
+; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 3
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 5, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 4 %dep_src, i64 9, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 3
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 3
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 5, i1 false)
+ ret void
+}
+
+; We can change the align value to 2 when forwarding.
+define void @forward_offset_align_2(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset_align_2(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[DEP_SRC]], i64 9, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2
+; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 4 %dep_src, i64 9, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 2
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 2
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 6, i1 false)
+ ret void
+}
+
+; We need to create a GEP instruction when forwarding.
+define void @forward_offset_with_gep(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset_with_gep(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: [[DEP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP1]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 2
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 6, i1 false)
+ ret void
+}
+
+; Make sure we pass the right parameters when calling `memcpy`.
+define void @forward_offset_memcpy(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset_memcpy(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: [[DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: call void @use(ptr [[DEST]])
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ %dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 6, i1 false)
+ call void @use(ptr %dest)
+ ret void
+}
+
+; Make sure we pass the right parameters when calling `memcpy.inline`.
+define void @forward_offset_memcpy_inline(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset_memcpy_inline(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: [[DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: call void @use(ptr [[DEST]])
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ %dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 6, i1 false)
+ call void @use(ptr %dest)
+ ret void
+}
+
+; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
+define void @do_not_forward_oversize_offset(ptr %dep_src) {
+; CHECK-LABEL: define void @do_not_forward_oversize_offset(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 6, i1 false)
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 6, i1 false)
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 6, i1 false)
+ ret void
+}
+
+; We can forward `memcpy` because the write operation does not corrupt the location to be copied.
+define void @forward_offset_and_store(ptr %dep_src) {
+; CHECK-LABEL: define void @forward_offset_and_store(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
+; CHECK-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
+; CHECK-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
+; CHECK-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 5, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ store i8 1, ptr %dep_src, align 1
+ %dep_src_end = getelementptr inbounds i8, ptr %dep_src, i64 6
+ store i8 1, ptr %dep_src_end, align 1
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 5, i1 false)
+ ret void
+}
+
+; We cannot forward `memcpy` because the write operation alters the location to be copied.
+; Also, make sure we have removed the GEP instruction that was created temporarily.
+define void @do_not_forward_offset_and_store(ptr %dep_src) {
+; CHECK-LABEL: define void @do_not_forward_offset_and_store(
+; CHECK-SAME: ptr [[DEP_SRC:%.*]]) {
+; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
+; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: store i8 1, ptr [[DEP]], align 1
+; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 5, i1 false)
+; CHECK-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ %dep_src_offset = getelementptr inbounds i8, ptr %dep_src, i64 1
+ store i8 1, ptr %dep_src_offset, align 1
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 2
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 5, i1 false)
+ ret void
+}
+
+declare void @use(ptr)
+
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
+declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
diff --git a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll
new file mode 100644
index 0000000000000..0d34932937eee
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=memcpyopt,dse,instcombine -S -verify-memoryssa | FileCheck --check-prefix=CUSTOM %s
+; RUN: opt < %s -O2 -S | FileCheck --check-prefix=O2 %s
+
+%buf = type [7 x i8]
+
+; Check that we eliminate all `memcpy` calls in this function.
+define void @forward_offset_and_store(ptr %dep_src) {
+; CUSTOM-LABEL: define void @forward_offset_and_store(
+; CUSTOM-SAME: ptr [[DEP_SRC:%.*]]) {
+; CUSTOM-NEXT: [[DEP_DEST:%.*]] = alloca [7 x i8], align 1
+; CUSTOM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEP_DEST]], ptr noundef nonnull align 1 dereferenceable(7) [[DEP_SRC]], i64 7, i1 false)
+; CUSTOM-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
+; CUSTOM-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
+; CUSTOM-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
+; CUSTOM-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; CUSTOM-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CUSTOM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DEST]], ptr noundef nonnull align 1 dereferenceable(5) [[SRC]], i64 5, i1 false)
+; CUSTOM-NEXT: ret void
+;
+; O2-LABEL: define void @forward_offset_and_store(
+; O2-SAME: ptr nocapture [[DEP_SRC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O2-NEXT: [[DEP_DEST:%.*]] = alloca [7 x i8], align 1
+; O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEP_DEST]], ptr noundef nonnull align 1 dereferenceable(7) [[DEP_SRC]], i64 7, i1 false)
+; O2-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
+; O2-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
+; O2-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
+; O2-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
+; O2-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DEST]], ptr noundef nonnull align 1 dereferenceable(5) [[SRC]], i64 5, i1 false)
+; O2-NEXT: ret void
+;
+ %dep_dest = alloca %buf, align 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dep_dest, ptr align 1 %dep_src, i64 7, i1 false)
+ store i8 1, ptr %dep_src, align 1
+ %dep_src_end = getelementptr inbounds i8, ptr %dep_src, i64 6
+ store i8 1, ptr %dep_src_end, align 1
+ %src = getelementptr inbounds i8, ptr %dep_dest, i64 1
+ %dest = getelementptr inbounds i8, ptr %dep_src, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %src, i64 5, i1 false)
+ ret void
+}
>From a787b4fc717627374b61180d2ea816637d77d22e Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Tue, 2 Apr 2024 08:47:44 +0800
Subject: [PATCH 2/2] [MemCpyOpt] Calculate the offset value to forward
`memcpy`
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 85 +++++++++++++------
.../MemCpyOpt/memcpy-memcpy-offset.ll | 17 ++--
.../Transforms/PhaseOrdering/memcpy-offset.ll | 12 +--
3 files changed, 72 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index b9efd9aaa28c5..6ddc9e3af79b8 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -1124,28 +1125,67 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MemCpyInst *MDep,
BatchAAResults &BAA) {
- // We can only transforms memcpy's where the dest of one is the source of the
- // other.
- if (M->getSource() != MDep->getDest() || MDep->isVolatile())
- return false;
-
// If dep instruction is reading from our current input, then it is a noop
- // transfer and substituting the input won't change this instruction. Just
- // ignore the input and let someone else zap MDep. This handles cases like:
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
// memcpy(a <- a)
// memcpy(b <- a)
if (M->getSource() == MDep->getSource())
return false;
- // Second, the length of the memcpy's must be the same, or the preceding one
+ // We can only optimize non-volatile memcpy's.
+ if (MDep->isVolatile())
+ return false;
+
+ int64_t MForwardOffset = 0;
+ const DataLayout &DL = M->getModule()->getDataLayout();
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other, or they have an offset in a range.
+ if (M->getSource() != MDep->getDest()) {
+ std::optional<int64_t> Offset =
+ M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
+ if (!Offset || *Offset < 0)
+ return false;
+ MForwardOffset = *Offset;
+ }
+
+ // The length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
- if (MDep->getLength() != M->getLength()) {
+ if (MForwardOffset != 0 || (MDep->getLength() != M->getLength())) {
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
- if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+ if (!MDepLen || !MLen ||
+ MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
return false;
}
+ IRBuilder<> Builder(M);
+ auto *CopySource = MDep->getRawSource();
+ auto CleanupOnFailure = llvm::make_scope_exit([&CopySource] {
+ if (CopySource->use_empty())
+ cast<Instruction>(CopySource)->eraseFromParent();
+ });
+ MaybeAlign CopySourceAlign = MDep->getSourceAlign();
+ // We just need to calculate the actual size of the copy.
+ auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
+ MemoryLocation::getForSource(M).Size);
+
+ // We need to update `MCopyLoc` if an offset exists.
+ if (MForwardOffset > 0) {
+ // The copy destination of `M` maybe can serve as the source of copying.
+ std::optional<int64_t> MDestOffset =
+ M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
+ if (MDestOffset && *MDestOffset == MForwardOffset)
+ CopySource = M->getRawDest();
+ else
+ CopySource = Builder.CreateInBoundsPtrAdd(
+ CopySource, ConstantInt::get(Type::getInt64Ty(Builder.getContext()),
+ MForwardOffset));
+ MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
+ if (CopySourceAlign)
+ CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
+ }
+
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
// memcpy(a <- b)
@@ -1155,10 +1195,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
+ MSSA->getMemoryAccess(M)))
return false;
// If the dest of the second might alias the source of the first, then the
@@ -1183,23 +1221,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
- IRBuilder<> Builder(M);
Instruction *NewM;
if (UseMemMove)
- NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM =
+ Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(), CopySource,
+ CopySourceAlign, M->getLength(), M->isVolatile());
else if (isa<MemCpyInlineInst>(M)) {
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
// never allowed since that would allow the latter to be lowered as a call
// to an external function.
- NewM = Builder.CreateMemCpyInline(
- M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
- MDep->getSourceAlign(), M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemCpyInline(M->getRawDest(), M->getDestAlign(),
+ CopySource, CopySourceAlign,
+ M->getLength(), M->isVolatile());
} else
- NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM =
+ Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(), CopySource,
+ CopySourceAlign, M->getLength(), M->isVolatile());
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
index fe5056d85dcd3..4d00ea70a564d 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
@@ -11,7 +11,7 @@ define void @forward_offset(ptr %dep_src) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[DEP]], i64 6, i1 false)
; CHECK-NEXT: ret void
;
%dep_dest = alloca %buf, align 1
@@ -30,7 +30,7 @@ define void @forward_offset_align(ptr %dep_src) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[DEP_SRC]], i64 9, i1 false)
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3
; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 3
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 5, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[DEST]], i64 5, i1 false)
; CHECK-NEXT: ret void
;
%dep_dest = alloca %buf, align 1
@@ -49,7 +49,7 @@ define void @forward_offset_align_2(ptr %dep_src) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[DEP_SRC]], i64 9, i1 false)
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP]], ptr align 2 [[DEP]], i64 6, i1 false)
; CHECK-NEXT: ret void
;
%dep_dest = alloca %buf, align 1
@@ -68,7 +68,8 @@ define void @forward_offset_with_gep(ptr %dep_src) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT: [[DEP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 2
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP1]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP1]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT: ret void
;
%dep_dest = alloca %buf, align 1
@@ -87,7 +88,8 @@ define void @forward_offset_memcpy(ptr %dep_src) {
; CHECK-NEXT: [[DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT: call void @use(ptr [[DEST]])
; CHECK-NEXT: ret void
;
@@ -108,7 +110,8 @@ define void @forward_offset_memcpy_inline(ptr %dep_src) {
; CHECK-NEXT: [[DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[DEP_SRC]], i64 7, i1 false)
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
-; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT: call void @use(ptr [[DEST]])
; CHECK-NEXT: ret void
;
@@ -151,7 +154,7 @@ define void @forward_offset_and_store(ptr %dep_src) {
; CHECK-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[SRC]], i64 5, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEP]], ptr align 1 [[DEP]], i64 5, i1 false)
; CHECK-NEXT: ret void
;
%dep_dest = alloca %buf, align 1
diff --git a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll
index 0d34932937eee..c7c05901455bc 100644
--- a/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll
+++ b/llvm/test/Transforms/PhaseOrdering/memcpy-offset.ll
@@ -8,26 +8,16 @@
define void @forward_offset_and_store(ptr %dep_src) {
; CUSTOM-LABEL: define void @forward_offset_and_store(
; CUSTOM-SAME: ptr [[DEP_SRC:%.*]]) {
-; CUSTOM-NEXT: [[DEP_DEST:%.*]] = alloca [7 x i8], align 1
-; CUSTOM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEP_DEST]], ptr noundef nonnull align 1 dereferenceable(7) [[DEP_SRC]], i64 7, i1 false)
; CUSTOM-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
; CUSTOM-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
; CUSTOM-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
-; CUSTOM-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
-; CUSTOM-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
-; CUSTOM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DEST]], ptr noundef nonnull align 1 dereferenceable(5) [[SRC]], i64 5, i1 false)
; CUSTOM-NEXT: ret void
;
; O2-LABEL: define void @forward_offset_and_store(
-; O2-SAME: ptr nocapture [[DEP_SRC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; O2-NEXT: [[DEP_DEST:%.*]] = alloca [7 x i8], align 1
-; O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEP_DEST]], ptr noundef nonnull align 1 dereferenceable(7) [[DEP_SRC]], i64 7, i1 false)
+; O2-SAME: ptr nocapture writeonly [[DEP_SRC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; O2-NEXT: store i8 1, ptr [[DEP_SRC]], align 1
; O2-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 6
; O2-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
-; O2-NEXT: [[SRC:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
-; O2-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[DEP_SRC]], i64 1
-; O2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DEST]], ptr noundef nonnull align 1 dereferenceable(5) [[SRC]], i64 5, i1 false)
; O2-NEXT: ret void
;
%dep_dest = alloca %buf, align 1
More information about the llvm-commits
mailing list