[llvm] abdf0da - [LoopIdiom] Fix bailout for aliasing in memcpy transform.

Tue May 31 17:24:41 PDT 2022

Author: Eli Friedman
Date: 2022-05-31T17:24:23-07:00
New Revision: abdf0da8009f272f6c3d6398cf63f9f0a8257637

URL: https://github.com/llvm/llvm-project/commit/abdf0da8009f272f6c3d6398cf63f9f0a8257637
DIFF: https://github.com/llvm/llvm-project/commit/abdf0da8009f272f6c3d6398cf63f9f0a8257637.diff

LOG: [LoopIdiom] Fix bailout for aliasing in memcpy transform.

Commit dd5991cc modified the aliasing checks here to allow transforming
a memcpy where the source and destination point into the same object.
However, the change accidentally made the code skip the alias check for
other operations in the loop.

Instead of completely skipping the alias check, just skip the check for
whether the memcpy aliases itself.

Differential Revision: https://reviews.llvm.org/D126486

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/basic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 87202f769ef4..7a62a3b502ce 100644

--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1422,26 +1422,19 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
 
   // If the store is a memcpy instruction, we must check if it will write to
   // the load memory locations. So remove it from the ignored stores.
-  if (IsMemCpy)
-    IgnoredInsts.erase(TheStore);
   MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
+  if (IsMemCpy && !Verifier.IsSameObject)
+    IgnoredInsts.erase(TheStore);
   if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
                             StoreSizeSCEV, *AA, IgnoredInsts)) {
-    if (!IsMemCpy) {
-      ORE.emit([&]() {
-        return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
-                                        TheLoad)
-               << ore::NV("Inst", InstRemark) << " in "
-               << ore::NV("Function", TheStore->getFunction())
-               << " function will not be hoisted: "
-               << ore::NV("Reason", "The loop may access load location");
-      });
-      return Changed;
-    }
-    // At this point loop may access load only for memcpy in same underlying
-    // object. If that's not the case bail out.
-    if (!Verifier.IsSameObject)
-      return Changed;
+    ORE.emit([&]() {
+      return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
+             << ore::NV("Inst", InstRemark) << " in "
+             << ore::NV("Function", TheStore->getFunction())
+             << " function will not be hoisted: "
+             << ore::NV("Reason", "The loop may access load location");
+    });
+    return Changed;
   }
 
   bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;

diff  --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 1bd1dcb6e0d7..3c9d8a53a9f4 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -1536,6 +1536,49 @@ for.body:                                      ; preds = %entry, %for.body
   br i1 %cmp, label %for.body, label %for.cond.cleanup
 }
 
+; Do not form memmove when there's an aliasing operation, even
+; if the memcpy source and destination are in the same object.
+define void @do_not_form_memmove8(i64* %p) {
+; CHECK-LABEL: @do_not_form_memmove8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 1000
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       loop:
+; CHECK-NEXT:    [[X4:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X13:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X5:%.*]] = zext i32 [[X4]] to i64
+; CHECK-NEXT:    [[X7:%.*]] = getelementptr inbounds i64, i64* [[P2]], i64 [[X5]]
+; CHECK-NEXT:    [[X8:%.*]] = bitcast i64* [[X7]] to i8*
+; CHECK-NEXT:    store i64 1, i64* [[X7]], align 4
+; CHECK-NEXT:    [[X11:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[X5]]
+; CHECK-NEXT:    [[X12:%.*]] = bitcast i64* [[X11]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[X12]], i8* [[X8]], i64 8, i1 false)
+; CHECK-NEXT:    [[X13]] = add i32 [[X4]], 1
+; CHECK-NEXT:    [[X14:%.*]] = icmp eq i32 [[X13]], 44
+; CHECK-NEXT:    br i1 [[X14]], label [[EXIT:%.*]], label [[LOOP]]
+;
+entry:
+  %p2 = getelementptr inbounds i64, i64* %p, i64 1000
+  br label %loop
+
+exit:
+  ret void
+
+loop:
+  %x4 = phi i32 [ 0, %entry ], [ %x13, %loop ]
+  %x5 = zext i32 %x4 to i64
+  %x7 = getelementptr inbounds i64, i64* %p2, i64 %x5
+  %x8 = bitcast i64* %x7 to i8*
+  store i64 1, i64* %x7, align 4
+  %x11 = getelementptr inbounds i64, i64* %p, i64 %x5
+  %x12 = bitcast i64* %x11 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x12, i8* %x8, i64 8, i1 false)
+  %x13 = add i32 %x4, 1
+  %x14 = icmp eq i32 %x13, 44
+  br i1 %x14, label %exit, label %loop
+}
+
 ;; Memcpy formation is still preferred over memmove.
 define void @prefer_memcpy_over_memmove(i8* noalias %Src, i8* noalias %Dest, i64 %Size) {
 ; CHECK-LABEL: @prefer_memcpy_over_memmove(