[llvm] baf031a - [MemCpyOpt] fix miscompile for non-dominated use of src alloca for stack-move optimization (#66618)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 18 05:29:15 PDT 2023


Author: Kohei Asano
Date: 2023-09-18T21:29:10+09:00
New Revision: baf031a853d1d301d6798ef3277b89875625a0b1

URL: https://github.com/llvm/llvm-project/commit/baf031a853d1d301d6798ef3277b89875625a0b1
DIFF: https://github.com/llvm/llvm-project/commit/baf031a853d1d301d6798ef3277b89875625a0b1.diff

LOG: [MemCpyOpt] fix miscompile for non-dominated use of src alloca for stack-move optimization (#66618)

Stack-move optimization, the optimization that merges src and dest
alloca of the full-size copy, replaces all uses of the dest alloca with
src alloca. For safety, we needed to check all uses of the dest alloca
locations are dominated by src alloca, to be replaced. This PR adds the
check for that.

Fixes #65225

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/test/Transforms/MemCpyOpt/stack-move.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 8c9d9c6e91b70fe..a21ee6151fe026d 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1486,6 +1486,15 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
       Instruction *I = Worklist.back();
       Worklist.pop_back();
       for (const Use &U : I->uses()) {
+        auto *UI = cast<Instruction>(U.getUser());
+        // TODO: We can perform the transformation if we move src alloca to
+        // before the dominator of all uses. If any use that isn't dominated by
+        // SrcAlloca exists, non-dominating uses will be produced.
+        if (!DT->dominates(SrcAlloca, UI)) {
+          LLVM_DEBUG(dbgs() << "Stack Move: SrcAlloca doesn't dominate all "
+                               "uses for the location, bailing\n");
+          return false;
+        }
         if (Visited.size() >= MaxUsesToExplore) {
           LLVM_DEBUG(
               dbgs()
@@ -1499,10 +1508,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
           return false;
         case UseCaptureKind::PASSTHROUGH:
           // Instructions cannot have non-instruction users.
-          Worklist.push_back(cast<Instruction>(U.getUser()));
+          Worklist.push_back(UI);
           continue;
         case UseCaptureKind::NO_CAPTURE: {
-          auto *UI = cast<Instruction>(U.getUser());
           if (UI->isLifetimeStartOrEnd()) {
             // We note the locations of these intrinsic calls so that we can
             // delete them later if the optimization succeeds, this is safe

diff  --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 161a732fdc2a94b..f6be486dce8ac1e 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -981,6 +981,32 @@ bb2:
 
 ; Optimization failures follow:
 
+; TODO: we can merge those alloca if we move src alloca to the start of the BB.
+; Tests that a the optimization isn't performed,
+; when any use that isn't dominated by SrcAlloca exists.
+define i32 @use_not_dominated_by_src_alloca() {
+; CHECK-LABEL: define i32 @use_not_dominated_by_src_alloca() {
+; CHECK-NEXT:    [[DEST:%.*]] = alloca i1, align 1
+; CHECK-NEXT:    [[DEST_GEP:%.*]] = getelementptr i64, ptr [[DEST]], i64 -1
+; CHECK-NEXT:    [[DEST_USE:%.*]] = load i8, ptr [[DEST_GEP]], align 1
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, align 4
+; CHECK-NEXT:    [[SRC_VAL:%.*]] = load i1, ptr [[SRC]], align 4
+; CHECK-NEXT:    store i1 [[SRC_VAL]], ptr [[DEST]], align 1
+; CHECK-NEXT:    ret i32 0
+;
+  %dest = alloca i1, align 1
+  ; Replacing the use of dest with src causes no domination uses.
+  %dest.gep = getelementptr i64, ptr %dest, i64 -1
+  %dest.use = load i8, ptr %dest.gep, align 1
+  %src = alloca i8, align 4
+  %src.val = load i1, ptr %src, align 4
+
+  store i1 %src.val, ptr %dest, align 1
+
+  ret i32 0
+}
+
+
 ; Tests that a memcpy that doesn't completely overwrite a stack value is a use
 ; for the purposes of liveness analysis, not a definition.
 define void @incomplete_memcpy() {


        


More information about the llvm-commits mailing list