[llvm] [MemCpyOpt] No need to create `memcpy(a <- a)` (PR #98321)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 10 16:51:13 PDT 2024


https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/98321

>From 4e61f5d6b69d8a64c50f1bfd3864b88635dc1ff7 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Wed, 10 Jul 2024 21:19:51 +0800
Subject: [PATCH 1/4] Pre-commit test cases

---
 llvm/test/Transforms/MemCpyOpt/memcpy.ll | 54 ++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 48698f25174e7..22faff1fdd1ae 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -139,6 +139,60 @@ define void @test6_memcpy(ptr %src, ptr %dest) nounwind {
   ret void
 }
 
+; FIXME: When forwarding to memcpy(arg+1, arg+1), we don't need to create this memcpy.
+define void @test6_memcpy_forward_back(ptr %arg) nounwind {
+; CHECK-LABEL: @test6_memcpy_forward_back(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca [16 x i8], align 1
+; CHECK-NEXT:    [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
+; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca [16 x i8], align 1
+  %src = getelementptr inbounds i8, ptr %arg, i64 1
+  %dest = getelementptr inbounds i8, ptr %arg, i64 1
+  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
+  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
+  ret void
+}
+
+; We have to retain this `memcpy(arg+2, arg+1)` forwarding.
+define void @test6_memcpy_forward_not_back(ptr %arg) nounwind {
+; CHECK-LABEL: @test6_memcpy_forward_not_back(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca [16 x i8], align 1
+; CHECK-NEXT:    [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
+; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca [16 x i8], align 1
+  %src = getelementptr inbounds i8, ptr %arg, i64 1
+  %dest = getelementptr inbounds i8, ptr %arg, i64 2
+  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
+  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
+  ret void
+}
+
+; There is data writing between the two memcpy operations.
+define void @test6_memcpy_forward_back_failed(ptr %arg) nounwind {
+; CHECK-LABEL: @test6_memcpy_forward_back_failed(
+; CHECK-NEXT:    [[TMP:%.*]] = alloca [16 x i8], align 1
+; CHECK-NEXT:    [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
+; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %tmp = alloca [16 x i8], align 1
+  %src = getelementptr inbounds i8, ptr %arg, i64 1
+  %dest = getelementptr inbounds i8, ptr %arg, i64 1
+  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
+  store i8 1, ptr %dest, align 1
+  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
+  ret void
+}
 
 @x = external global %0
 

>From ec1ab1cd3cfb7e9775bdbc8294d9522832a68de8 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Wed, 10 Jul 2024 13:47:07 +0800
Subject: [PATCH 2/4] [MemCpyOpt] No need to create `memcpy(a <- a)`

---
 llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 8 ++++++++
 llvm/test/Transforms/MemCpyOpt/memcpy.ll       | 6 +-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index b9efd9aaa28c5..1a4b3657cedae 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1161,6 +1161,14 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
                      MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
     return false;
 
+  // No need to create `memcpy(a <- a)`.
+  if (BAA.isMustAlias(M->getDest(), MDep->getRawSource())) {
+    // Remove the instruction we're replacing.
+    eraseInstruction(M);
+    ++NumMemCpyInstr;
+    return true;
+  }
+
   // If the dest of the second might alias the source of the first, then the
   // source and dest might overlap. In addition, if the source of the first
   // points to constant memory, they won't overlap by definition. Otherwise, we
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 22faff1fdd1ae..a19cfe6e75b70 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -142,11 +142,7 @@ define void @test6_memcpy(ptr %src, ptr %dest) nounwind {
 ; FIXME: When forwarding to memcpy(arg+1, arg+1), we don't need to create this memcpy.
 define void @test6_memcpy_forward_back(ptr %arg) nounwind {
 ; CHECK-LABEL: @test6_memcpy_forward_back(
-; CHECK-NEXT:    [[TMP:%.*]] = alloca [16 x i8], align 1
-; CHECK-NEXT:    [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
-; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
+; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
 ; CHECK-NEXT:    ret void
 ;
   %tmp = alloca [16 x i8], align 1

>From 385ace339a9fc6fcff429d30da5667a25f0a9888 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Wed, 10 Jul 2024 22:27:10 +0800
Subject: [PATCH 3/4] Address comments

---
 llvm/test/Transforms/MemCpyOpt/memcpy.ll | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index a19cfe6e75b70..1d55c733da831 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -139,7 +139,7 @@ define void @test6_memcpy(ptr %src, ptr %dest) nounwind {
   ret void
 }
 
-; FIXME: When forwarding to memcpy(arg+1, arg+1), we don't need to create this memcpy.
+; When forwarding to memcpy(arg+1, arg+1), we don't need to create this memcpy.
 define void @test6_memcpy_forward_back(ptr %arg) nounwind {
 ; CHECK-LABEL: @test6_memcpy_forward_back(
 ; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
@@ -171,25 +171,6 @@ define void @test6_memcpy_forward_not_back(ptr %arg) nounwind {
   ret void
 }
 
-; There is data writing between the two memcpy operations.
-define void @test6_memcpy_forward_back_failed(ptr %arg) nounwind {
-; CHECK-LABEL: @test6_memcpy_forward_back_failed(
-; CHECK-NEXT:    [[TMP:%.*]] = alloca [16 x i8], align 1
-; CHECK-NEXT:    [[SRC:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
-; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i32 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false)
-; CHECK-NEXT:    ret void
-;
-  %tmp = alloca [16 x i8], align 1
-  %src = getelementptr inbounds i8, ptr %arg, i64 1
-  %dest = getelementptr inbounds i8, ptr %arg, i64 1
-  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %src, i32 16, i1 false)
-  store i8 1, ptr %dest, align 1
-  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false)
-  ret void
-}
-
 @x = external global %0
 
 define void @test3(ptr noalias writable sret(%0) %agg.result) nounwind  {

>From dd9c1902d2f96e2477bf8067240a4723d9a45569 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 11 Jul 2024 07:43:41 +0800
Subject: [PATCH 4/4] Address comments

---
 llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1a4b3657cedae..9bf87f2370531 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1162,7 +1162,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
     return false;
 
   // No need to create `memcpy(a <- a)`.
-  if (BAA.isMustAlias(M->getDest(), MDep->getRawSource())) {
+  if (BAA.isMustAlias(M->getDest(), MDep->getSource())) {
     // Remove the instruction we're replacing.
     eraseInstruction(M);
     ++NumMemCpyInstr;



More information about the llvm-commits mailing list