[llvm] MemCpyOpt: replace an AA query with MSSA query (PR #108535)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 13 04:20:32 PDT 2024
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/108535
An AA query in processStoreOfLoad misses certain cases, and has been marked as a TODO. Replace it with an MSSA query to increase MemCpyOpt's power, fixing the long-standing TODO.
>From 5ca5b930ff16bf6e91abfc978239391594b616af Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 13 Sep 2024 12:13:57 +0100
Subject: [PATCH] MemCpyOpt: replace an AA query with MSSA query
An AA query in processStoreOfLoad misses certain cases, and has been
marked as a TODO. Replace it with an MSSA query to increase MemCpyOpt's
power, fixing the long-standing TODO.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 25 +++++++--------
llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 31 +++++++++----------
2 files changed, 26 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1d67773585d593..0b7e58d5b961ae 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -647,19 +647,18 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
(EnableMemCpyOptWithoutLibcalls ||
(TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
MemoryLocation LoadLoc = MemoryLocation::get(LI);
-
- // We use alias analysis to check if an instruction may store to
- // the memory we load from in between the load and the store. If
- // such an instruction is found, we try to promote there instead
- // of at the store position.
- // TODO: Can use MSSA for this.
- Instruction *P = SI;
- for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
- if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
- P = &I;
- break;
- }
- }
+ MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
+ *StoreAccess = MSSA->getMemoryAccess(SI);
+
+ // We use MSSA to check if an instruction may store to the memory we load
+ // from in between the load and the store. If such an instruction is found,
+ // we try to promote there instead of at the store position.
+ BatchAAResults BAA(*AA);
+ auto *Clobber =
+ cast<MemoryUseOrDef>(MSSA->getWalker()->getClobberingMemoryAccess(
+ StoreAccess, LoadLoc, BAA));
+ Instruction *P =
+ MSSA->dominates(LoadAccess, Clobber) ? Clobber->getMemoryInst() : SI;
// If we found an instruction that may write to the loaded memory,
// we can try to promote at this position instead of the store
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 51fad820509393..00cffb9b1c7f19 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -38,9 +38,8 @@ define void @noaliasdst(ptr %src, ptr noalias %dst) {
define void @destroysrc(ptr %src, ptr %dst) {
; CHECK-LABEL: @destroysrc(
-; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
-; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -51,8 +50,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
; CHECK-LABEL: @destroynoaliassrc(
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -63,9 +62,8 @@ define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
define void @copyalias(ptr %src, ptr %dst) {
; CHECK-LABEL: @copyalias(
-; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
-; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST]], align 8
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -79,9 +77,9 @@ define void @copyalias(ptr %src, ptr %dst) {
; sure we lift the computation as well if needed and possible.
define void @addrproducer(ptr %src, ptr %dst) {
; CHECK-LABEL: @addrproducer(
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -93,11 +91,10 @@ define void @addrproducer(ptr %src, ptr %dst) {
define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) {
; CHECK-LABEL: @aliasaddrproducer(
-; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: [[DSTINDEX:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
-; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S]], ptr [[DST]], i32 [[DSTINDEX]]
-; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST2]], align 8
+; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i32 [[DSTINDEX]]
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -110,11 +107,11 @@ define void @aliasaddrproducer(ptr %src, ptr %dst, ptr %dstidptr) {
define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidptr) {
; CHECK-LABEL: @noaliasaddrproducer(
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
-; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
+; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP1]], 1
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]]
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -130,7 +127,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) {
; CHECK-LABEL: @throwing_call(
; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
-; CHECK-NEXT: call void @call() [[ATTR2:#.*]]
+; CHECK-NEXT: call void @call() #[[ATTR2:[0-9]+]]
; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list