[llvm] [MemCpyOpt] fix miscompile for non-dominated use of src alloca for stack-move optimization (PR #66618)

Kohei Asano via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 18 01:15:45 PDT 2023


https://github.com/khei4 created https://github.com/llvm/llvm-project/pull/66618

Stack-move optimization, the optimization that merges src and dest alloca of the full-size copy, replaces all uses of the dest alloca with src alloca. For safety, we needed to check all uses of the dest alloca locations are dominated by src alloca, to be replaced. This PR adds the check for that. 

Fixes #65225 


>From 60e64e52aa60e20bb479ef00248bb79a644688f5 Mon Sep 17 00:00:00 2001
From: khei4 <kk.asano.luxy at gmail.com>
Date: Mon, 18 Sep 2023 16:49:39 +0900
Subject: [PATCH] [MemCpyOpt] let stack-move optzn bailout if any use of the
 location isn't dominated by src alloca

---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 15 ++++++++---
 llvm/test/Transforms/MemCpyOpt/stack-move.ll  | 25 +++++++++++++++++++
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 8c9d9c6e91b70fe..33d08b1b982214c 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1481,28 +1481,35 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
     Worklist.push_back(AI);
     unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
     Worklist.reserve(MaxUsesToExplore);
-    SmallSet<const Use *, 20> Visited;
+    SmallSet<Instruction *, 20> Visited;
     while (!Worklist.empty()) {
       Instruction *I = Worklist.back();
       Worklist.pop_back();
       for (const Use &U : I->uses()) {
+        auto *UI = cast<Instruction>(U.getUser());
+        // If any use that isn't dominated by SrcAlloca exists, non-dominating
+        // uses will be produced.
+        if (!DT->dominates(SrcAlloca, UI)) {
+          LLVM_DEBUG(dbgs() << "Stack Move: SrcAlloca doesn't dominate all "
+                               "uses for the location, bailing\n");
+          return false;
+        }
         if (Visited.size() >= MaxUsesToExplore) {
           LLVM_DEBUG(
               dbgs()
               << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
           return false;
         }
-        if (!Visited.insert(&U).second)
+        if (!Visited.insert(UI).second)
           continue;
         switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
         case UseCaptureKind::MAY_CAPTURE:
           return false;
         case UseCaptureKind::PASSTHROUGH:
           // Instructions cannot have non-instruction users.
-          Worklist.push_back(cast<Instruction>(U.getUser()));
+          Worklist.push_back(UI);
           continue;
         case UseCaptureKind::NO_CAPTURE: {
-          auto *UI = cast<Instruction>(U.getUser());
           if (UI->isLifetimeStartOrEnd()) {
             // We note the locations of these intrinsic calls so that we can
             // delete them later if the optimization succeeds, this is safe
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 161a732fdc2a94b..c100382fda89b41 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -981,6 +981,31 @@ bb2:
 
 ; Optimization failures follow:
 
+; Tests that a the optimization isn't performed,
+; when any use that isn't dominated by SrcAlloca exists.
+define i32 @use_not_dominated_by_src_alloca() {
+; CHECK-LABEL: define i32 @use_not_dominated_by_src_alloca() {
+; CHECK-NEXT:    [[DEST:%.*]] = alloca i1, align 1
+; CHECK-NEXT:    [[DEST_GEP:%.*]] = getelementptr i64, ptr [[DEST]], i64 -1
+; CHECK-NEXT:    [[DEST_USE:%.*]] = load i8, ptr [[DEST_GEP]], align 1
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i8, align 4
+; CHECK-NEXT:    [[SRC_VAL:%.*]] = load i1, ptr [[SRC]], align 4
+; CHECK-NEXT:    store i1 [[SRC_VAL]], ptr [[DEST]], align 1
+; CHECK-NEXT:    ret i32 0
+;
+  %dest = alloca i1, align 1
+  ; Replacing the use of dest with src causes no domination uses.
+  %dest.gep = getelementptr i64, ptr %dest, i64 -1
+  %dest.use = load i8, ptr %dest.gep, align 1
+  %src = alloca i8, align 4
+  %src.val = load i1, ptr %src, align 4
+
+  store i1 %src.val, ptr %dest, align 1
+
+  ret i32 0
+}
+
+
 ; Tests that a memcpy that doesn't completely overwrite a stack value is a use
 ; for the purposes of liveness analysis, not a definition.
 define void @incomplete_memcpy() {



More information about the llvm-commits mailing list