[llvm] [MemCpyOpt] Continue merging memset with unrelated clobber (PR #89550)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 21 10:02:37 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: XChy (XChy)
<details>
<summary>Changes</summary>
Fixes part of #<!-- -->88632
This patch does:
- Add test coverage for merging stores into memset
- Use AA information to skip unrelated store clobber
Because we use AA here, there may be some compile-time regression. @<!-- -->nikic, could you please test it?
---
Full diff: https://github.com/llvm/llvm-project/pull/89550.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp (+12-2)
- (modified) llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll (+151-2)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1036b8ae963a24..9cf31de95ca3b1 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -399,6 +399,9 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
+ if (AA->isNoAlias(NextStore->getPointerOperand(), StartPtr))
+ continue;
+
Value *StoredVal = NextStore->getValueOperand();
// Don't convert stores of non-integral pointer types to memsets (which
@@ -420,6 +423,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
std::optional<int64_t> Offset =
NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
+
if (!Offset)
break;
@@ -427,13 +431,19 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
} else {
auto *MSI = cast<MemSetInst>(BI);
- if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
- !isa<ConstantInt>(MSI->getLength()))
+ if (MSI->isVolatile())
+ break;
+
+ if (AA->isNoAlias(MSI->getDest(), StartPtr))
+ continue;
+
+ if (ByteVal != MSI->getValue() || !isa<ConstantInt>(MSI->getLength()))
break;
// Check to see if this store is to a constant offset from the start ptr.
std::optional<int64_t> Offset =
MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
+
if (!Offset)
break;
diff --git a/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll b/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll
index 78aa769982404a..a24696063e4d39 100644
--- a/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll
@@ -36,5 +36,154 @@ exit:
ret void
}
-declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
-declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
+define void @memset_clobber_no_alias(ptr %p) {
+; CHECK-LABEL: @memset_clobber_no_alias(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 16, i1 false)
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @store_clobber_no_alias1(i64 %a, ptr %p) {
+; CHECK-LABEL: @store_clobber_no_alias1(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: store i64 [[A:%.*]], ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ store i64 %a, ptr %p, align 8
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @store_clobber_no_alias2(i64 %a, ptr %p) {
+; CHECK-LABEL: @store_clobber_no_alias2(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: store i64 [[A:%.*]], ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ store i64 %a, ptr %p, align 8
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @store_clobber_no_alias_precise_fail(i64 %a) {
+; CHECK-LABEL: @store_clobber_no_alias_precise_fail(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: store i64 [[A:%.*]], ptr [[STACK]], align 8
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ store i64 %a, ptr %stack, align 8
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @store_clobber_may_alias_fail(ptr %p, ptr %p1) {
+; CHECK-LABEL: @store_clobber_may_alias_fail(
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK:%.*]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1:%.*]], align 8
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack1 = getelementptr inbounds i8, ptr %p, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ store i64 0, ptr %p1, align 8
+ %stack2 = getelementptr inbounds i8, ptr %p, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @load_clobber_fail(ptr %p, ptr %p1) {
+; CHECK-LABEL: @load_clobber_fail(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: [[A:%.*]] = load i64, ptr [[P:%.*]], align 8
+; CHECK-NEXT: store i64 [[A]], ptr [[P1:%.*]], align 8
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ %a = load i64, ptr %p, align 8
+ store i64 %a, ptr %p1, align 8
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @memset_volatile_fail(ptr %p) {
+; CHECK-LABEL: @memset_volatile_fail(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 0, i64 16, i1 true)
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 16, i1 true)
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
+
+define void @store_volatile_fail(i64 %a, ptr %p) {
+; CHECK-LABEL: @store_volatile_fail(
+; CHECK-NEXT: [[STACK:%.*]] = alloca <256 x i8>, align 8
+; CHECK-NEXT: [[STACK1:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 8
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK1]], i8 0, i64 136, i1 false)
+; CHECK-NEXT: store volatile i64 [[A:%.*]], ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[STACK2:%.*]] = getelementptr inbounds i8, ptr [[STACK]], i64 24
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[STACK2]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <256 x i8>, align 8
+ %stack1 = getelementptr inbounds i8, ptr %stack, i64 8
+ call void @llvm.memset.p0.i64(ptr %stack1, i8 0, i64 136, i1 false)
+ store volatile i64 %a, ptr %p
+ %stack2 = getelementptr inbounds i8, ptr %stack, i64 24
+ call void @llvm.memset.p0.i64(ptr %stack2, i8 0, i64 24, i1 false)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/89550
More information about the llvm-commits
mailing list