[PATCH] D153453: [MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas
Kohei Asano via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 8 22:29:58 PDT 2023
khei4 added inline comments.
================
Comment at: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp:1514-1515
+ auto *UI = cast<Instruction>(U.getUser());
+ if (DestAlloca->getParent() != UI->getParent())
+ return false;
+ if (!FirstUser || UI->comesBefore(FirstUser))
----------------
Note: this might be too restrictive, but I think it's ok for the first step for single BB:)
================
Comment at: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp:1583-1585
+ // We can do the transformation. First, align the allocas appropriately.
+ SrcAlloca->setAlignment(
+ std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
----------------
Although implicitly assumed with the two alloca are same types, should we check this is enforceable?
What CxtI shuold be set to getOrEnforceKnownAlignment? for all uses of dest?
maybe following tests are possible
```
; Tests that merging two allocas shouldn't be more poisonous,
; bigger and not enforceable aligned src is not acceptable.
define void @align_src_smaller() {
%src = alloca i64, align 4
%dest = alloca i64, align 8
call void @llvm.lifetime.start.p0(i64 8, ptr nocapture %src)
call void @llvm.lifetime.start.p0(i64 8, ptr nocapture %dest)
store i64 42, ptr %src
%1 = call i32 @use_nocapture(ptr noundef nocapture %src)
call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)
%1 = call i32 @use_nocapture(ptr noundef nocapture align 8 %dest)
call void @llvm.lifetime.end.p0(i64 8, ptr nocapture %src)
call void @llvm.lifetime.end.p0(i64 8, ptr nocapture %dest)
ret void
}
```
```
; TODO: Merge alloca.
; Tests that merging two allocas shouldn't be more poisonous, src enforceable to the dest align is ok.
define void @align_src_enforced() {
; CHECK-LABEL: define void @align_src_enforced() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 1
; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @expect_4byte_aligned(ptr nocapture noundef [[DEST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT: ret void
;
%src = alloca %struct.Foo, align 1
%dest = alloca %struct.Foo, align 8
call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
; i32 is defined to be 4-byte alignd on datalayout.
store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
%1 = call i32 @use_nocapture(ptr noundef nocapture %src)
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
%2 = call i32 @expect_4byte_aligned(ptr noundef nocapture %dest)
call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
ret void
}
```
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D153453/new/
https://reviews.llvm.org/D153453
More information about the llvm-commits
mailing list