[llvm] 5da9044 - [MemCpyOpt] Fix clobber check in fca2memcpy optimization
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 06:54:01 PDT 2025
Author: Nikita Popov
Date: 2025-03-12T14:53:50+01:00
New Revision: 5da9044c40840187330526ca888290a95927a629
URL: https://github.com/llvm/llvm-project/commit/5da9044c40840187330526ca888290a95927a629
DIFF: https://github.com/llvm/llvm-project/commit/5da9044c40840187330526ca888290a95927a629.diff
LOG: [MemCpyOpt] Fix clobber check in fca2memcpy optimization
This effectively reverts #108535. The old AA code was looking for
the *first* clobber between the load and store and then trying to
move all the way up there. The new MSSA based code instead found
the *last* clobber. There might still be an earlier clobber that
has not been accounted for.
Fixes #130632.
Added:
Modified:
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index ff3c93661c9e6..ce13f87d601a7 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -638,17 +638,19 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
(EnableMemCpyOptWithoutLibcalls ||
(TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
MemoryLocation LoadLoc = MemoryLocation::get(LI);
- MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
- *StoreAccess = MSSA->getMemoryAccess(SI);
-
- // We use MSSA to check if an instruction may store to the memory we load
- // from in between the load and the store. If such an instruction is found,
- // we try to promote there instead of at the store position.
- auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- StoreAccess->getDefiningAccess(), LoadLoc, BAA);
- Instruction *P = MSSA->dominates(LoadAccess, Clobber)
- ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst()
- : SI;
+
+ // We use alias analysis to check if an instruction may store to
+ // the memory we load from in between the load and the store. If
+ // such an instruction is found, we try to promote there instead
+ // of at the store position.
+ // TODO: Can use MSSA for this.
+ Instruction *P = SI;
+ for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
+ if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
+ P = &I;
+ break;
+ }
+ }
// If we found an instruction that may write to the loaded memory,
// we can try to promote at this position instead of the store
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 61e349e01ed91..7d4557aa331c4 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -51,8 +51,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
; CHECK-LABEL: @destroynoaliassrc(
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -79,9 +79,9 @@ define void @copyalias(ptr %src, ptr %dst) {
; sure we lift the computation as well if needed and possible.
define void @addrproducer(ptr %src, ptr %dst) {
; CHECK-LABEL: @addrproducer(
-; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
+; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -113,8 +113,8 @@ define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidp
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]]
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, i64 16, i1 false)
; CHECK-NEXT: ret void
;
%1 = load %S, ptr %src
@@ -130,7 +130,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) {
; CHECK-LABEL: @throwing_call(
; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
-; CHECK-NEXT: call void @call() [[ATTR2:#.*]]
+; CHECK-NEXT: call void @call() #[[ATTR2:[0-9]+]]
; CHECK-NEXT: store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
; CHECK-NEXT: ret void
;
@@ -156,4 +156,30 @@ loop:
br label %loop
}
+; There are multiple instructions that can clobber the source memory here.
+; We can move the dest write past the store to %ptr.24, but not the memcpy.
+; Make sure we don't perform fca2memcpy conversion in this case.
+define void @multiple_clobbering(ptr %ptr, ptr %ptr.copy) {
+; CHECK-LABEL: @multiple_clobbering(
+; CHECK-NEXT: [[PTR_8:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT: [[PTR_24:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 24
+; CHECK-NEXT: [[PTR_32:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 32
+; CHECK-NEXT: [[PTR_COPY_8:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_COPY:%.*]], i64 8
+; CHECK-NEXT: [[STRUCT:%.*]] = load { i32, i64 }, ptr [[PTR_COPY_8]], align 8
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR_8]], ptr [[PTR_32]], i64 12, i1 false)
+; CHECK-NEXT: store i64 1, ptr [[PTR_24]], align 8
+; CHECK-NEXT: store { i32, i64 } [[STRUCT]], ptr [[PTR_32]], align 8
+; CHECK-NEXT: ret void
+;
+ %ptr.8 = getelementptr inbounds nuw i8, ptr %ptr, i64 8
+ %ptr.24 = getelementptr inbounds nuw i8, ptr %ptr, i64 24
+ %ptr.32 = getelementptr inbounds nuw i8, ptr %ptr, i64 32
+ %ptr.copy.8 = getelementptr inbounds nuw i8, ptr %ptr.copy, i64 8
+ %struct = load { i32, i64 }, ptr %ptr.copy.8, align 8
+ call void @llvm.memcpy.p0.p0.i64(ptr %ptr.8, ptr %ptr.32, i64 12, i1 false)
+ store i64 1, ptr %ptr.24, align 8
+ store { i32, i64 } %struct, ptr %ptr.32, align 8
+ ret void
+}
+
declare void @call()
More information about the llvm-commits
mailing list