[llvm] 2d60d7b - [LoopVectorize][Fix] Crash when invariant store address is calculated inside loop

Wed Sep 28 02:38:10 PDT 2022

Author: Igor Kirillov
Date: 2022-09-28T10:33:50+01:00
New Revision: 2d60d7ba1a26c50d90a518a6ebde4f08a8d1ac04

URL: https://github.com/llvm/llvm-project/commit/2d60d7ba1a26c50d90a518a6ebde4f08a8d1ac04
DIFF: https://github.com/llvm/llvm-project/commit/2d60d7ba1a26c50d90a518a6ebde4f08a8d1ac04.diff

LOG: [LoopVectorize][Fix] Crash when invariant store address is calculated inside loop

Fixes #57572

Generally LICM pass is responsible for sinking out code that calculates
invariant address inside loop as it only needed to be calculated once.
But in rare case it does not happen we will not be vectorizing the
loop.

Differential Revision: https://reviews.llvm.org/D133687

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
    llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index b3bea8c5be7e8..d8d21818f87a1 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -933,10 +933,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   // vectorize loop is made, runtime checks are added so as to make sure that
   // invariant address won't alias with any other objects.
   if (!LAI->getStoresToInvariantAddresses().empty()) {
-    // For each invariant address, check its last stored value is unconditional.
+    // For each invariant address, check if last stored value is unconditional
+    // and the address is not calculated inside the loop.
     for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
-      if (isInvariantStoreOfReduction(SI) &&
-          blockNeedsPredication(SI->getParent())) {
+      if (!isInvariantStoreOfReduction(SI))
+        continue;
+
+      if (blockNeedsPredication(SI->getParent())) {
         reportVectorizationFailure(
             "We don't allow storing to uniform addresses",
             "write of conditional recurring variant value to a loop "
@@ -944,6 +947,20 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
             "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
         return false;
       }
+
+      // Invariant address should be defined outside of loop. LICM pass usually
+      // makes sure it happens, but in rare cases it does not, we do not want
+      // to overcomplicate vectorization to support this case.
+      if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
+        if (TheLoop->contains(Ptr)) {
+          reportVectorizationFailure(
+              "Invariant address is calculated inside the loop",
+              "write to a loop invariant address could not "
+              "be vectorized",
+              "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+          return false;
+        }
+      }
     }
 
     if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 9a8e57d396a5a..137d9035d8f5d 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -513,3 +513,25 @@ body:
 exit:
   ret void
 }
+
+define void @reduc_store_invariant_addr_not_hoisted(i32* %dst, i32* readonly %src) {
+; CHECK-LABEL: @reduc_store_invariant_addr_not_hoisted
+; CHECK-NOT: vector.body:
+entry:
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 %add, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}