[PATCH] D153453: [MemCpyOpt] implement single BB stack-move optimization which unify the static unescaped allocas

Kohei Asano via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 8 22:29:58 PDT 2023


khei4 added inline comments.


================
Comment at: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp:1514-1515
+          auto *UI = cast<Instruction>(U.getUser());
+          if (DestAlloca->getParent() != UI->getParent())
+            return false;
+          if (!FirstUser || UI->comesBefore(FirstUser))
----------------
Note: this might be too restrictive, but I think it's ok for the first step for single BB:)


================
Comment at: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp:1583-1585
+  // We can do the transformation. First, align the allocas appropriately.
+  SrcAlloca->setAlignment(
+      std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
----------------
Although implicitly assumed with the two alloca are same types, should we check this is enforceable?
What CxtI shuold be set to getOrEnforceKnownAlignment? for all uses of dest?
maybe following tests are possible 
```
; Tests that merging two allocas shouldn't be more poisonous,
; bigger and not enforceable aligned src is not acceptable.
define void @align_src_smaller() {
  %src = alloca i64, align 4
  %dest = alloca i64, align 8
  call void @llvm.lifetime.start.p0(i64 8, ptr nocapture %src)
  call void @llvm.lifetime.start.p0(i64 8, ptr nocapture %dest)
  store i64 42, ptr %src
  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)

  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)

  %1 = call i32 @use_nocapture(ptr noundef nocapture align 8 %dest)
  call void @llvm.lifetime.end.p0(i64 8, ptr nocapture %src)
  call void @llvm.lifetime.end.p0(i64 8, ptr nocapture %dest)
  ret void
}
```

```
; TODO: Merge alloca.
; Tests that merging two allocas shouldn't be more poisonous, src enforceable to the dest align is ok.
define void @align_src_enforced() {
; CHECK-LABEL: define void @align_src_enforced() {
; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 1
; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @expect_4byte_aligned(ptr nocapture noundef [[DEST]])
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
; CHECK-NEXT:    ret void
;
  %src = alloca %struct.Foo, align 1
  %dest = alloca %struct.Foo, align 8
  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
  ; i32 is defined to be 4-byte alignd on datalayout.
  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)

  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)

  %2 = call i32 @expect_4byte_aligned(ptr noundef nocapture %dest)
  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
  ret void
}
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D153453/new/

https://reviews.llvm.org/D153453



More information about the llvm-commits mailing list