[llvm] MemCpyOpt: replace an AA query with MSSA query (PR #108535)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 13 04:20:32 PDT 2024


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/108535

An AA query in processStoreOfLoad misses certain cases, and has been marked as a TODO. Replace it with an MSSA query to increase MemCpyOpt's power, fixing the long-standing TODO.

>From 5ca5b930ff16bf6e91abfc978239391594b616af Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 13 Sep 2024 12:13:57 +0100
Subject: [PATCH] MemCpyOpt: replace an AA query with MSSA query

An AA query in processStoreOfLoad misses certain cases, and has been
marked as a TODO. Replace it with an MSSA query to increase MemCpyOpt's
power, fixing the long-standing TODO.
---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 25 +++++++--------
 llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll  | 31 +++++++++----------
 2 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1d67773585d593..0b7e58d5b961ae 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -647,19 +647,18 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
       (EnableMemCpyOptWithoutLibcalls ||
        (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
     MemoryLocation LoadLoc = MemoryLocation::get(LI);
-
-    // We use alias analysis to check if an instruction may store to
-    // the memory we load from in between the load and the store. If
-    // such an instruction is found, we try to promote there instead
-    // of at the store position.
-    // TODO: Can use MSSA for this.
-    Instruction *P = SI;
-    for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
-      if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
-        P = &I;
-        break;
-      }
-    }
+    MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
+                   *StoreAccess = MSSA->getMemoryAccess(SI);
+
+    // We use MSSA to check if an instruction may store to the memory we load
+    // from in between the load and the store. If such an instruction is found,
+    // we try to promote there instead of at the store position.
+    BatchAAResults BAA(*AA);
+    auto *Clobber =
+        cast<MemoryUseOrDef>(MSSA->getWalker()->getClobberingMemoryAccess(
+            StoreAccess, LoadLoc, BAA));
+    Instruction *P =
+        MSSA->dominates(LoadAccess, Clobber) ? Clobber->getMemoryInst() : SI;
 
     // If we found an instruction that may write to the loaded memory,
     // we can try to promote at this position instead of the store
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 51fad820509393..00cffb9b1c7f19 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -38,9 +38,8 @@ define void @noaliasdst(ptr %src, ptr noalias %dst) {
 
 define void @destroysrc(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @destroysrc(
-; CHECK-NEXT:    [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
-; CHECK-NEXT:    store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -51,8 +50,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
 
 define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @destroynoaliassrc(
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -63,9 +62,8 @@ define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
 
 define void @copyalias(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @copyalias(
-; CHECK-NEXT:    [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
-; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:    store [[S]] [[TMP1]], ptr [[DST]], align 8
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -79,9 +77,9 @@ define void @copyalias(ptr %src, ptr %dst) {
 ; sure we lift the computation as well if needed and possible.
 define void @addrproducer(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @addrproducer(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
 ; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -93,11 +91,10 @@ define void @addrproducer(ptr %src, ptr %dst) {
 
 define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) {
 ; CHECK-LABEL: @aliasaddrproducer(
-; CHECK-NEXT:    [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT:    [[DSTINDEX:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
-; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S]], ptr [[DST]], i32 [[DSTINDEX]]
-; CHECK-NEXT:    store [[S]] [[TMP1]], ptr [[DST2]], align 8
+; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i32 [[DSTINDEX]]
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -110,11 +107,11 @@ define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) {
 
 define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidptr) {
 ; CHECK-LABEL: @noaliasaddrproducer(
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
-; CHECK-NEXT:    [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
+; CHECK-NEXT:    [[DSTINDEX:%.*]] = or i32 [[TMP1]], 1
 ; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]]
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -130,7 +127,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @throwing_call(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
-; CHECK-NEXT:    call void @call() [[ATTR2:#.*]]
+; CHECK-NEXT:    call void @call() #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;



More information about the llvm-commits mailing list