[llvm] 0b24031 - [LoopVectorize] In LoopVectorize.cpp start using getSymbolicMaxBackedgeTakenCount (#108833)

Wed Oct 2 02:28:58 PDT 2024

Author: David Sherwood
Date: 2024-10-02T10:28:54+01:00
New Revision: 0b2403197ff026627cf27df63ce83cd53d1cd680

URL: https://github.com/llvm/llvm-project/commit/0b2403197ff026627cf27df63ce83cd53d1cd680
DIFF: https://github.com/llvm/llvm-project/commit/0b2403197ff026627cf27df63ce83cd53d1cd680.diff

LOG: [LoopVectorize] In LoopVectorize.cpp start using getSymbolicMaxBackedgeTakenCount (#108833)

LoopVectorizationLegality currently only treats a loop as legal to vectorise
if PredicatedScalarEvolution::getBackedgeTakenCount returns a valid
SCEV, or more precisely that the loop must have an exact backedge taken
count. Therefore, in LoopVectorize.cpp we can safely replace all calls to
getBackedgeTakenCount with calls to getSymbolicMaxBackedgeTakenCount,
since the result is the same.

This also helps prepare the loop vectoriser for PR #88385.

Added: 
    llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlan.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f5ef50934f59fd..e1f9f29ac6d92f 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4054,7 +4054,13 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     unsigned MaxVFtimesIC =
         UserIC ? *MaxPowerOf2RuntimeVF * UserIC : *MaxPowerOf2RuntimeVF;
     ScalarEvolution *SE = PSE.getSE();
-    const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+    // Currently only loops with countable exits are vectorized, but calling
+    // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
+    // uncountable exits whilst also ensuring the symbolic maximum and known
+    // back-edge taken count remain identical for loops with countable exits.
+    const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
+    assert(BackedgeTakenCount == PSE.getBackedgeTakenCount() &&
+           "Invalid loop count");
     const SCEV *ExitCount = SE->getAddExpr(
         BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
     const SCEV *Rem = SE->getURemExpr(

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4247d20cb0e530..5e3a6388094940 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -881,11 +881,18 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
   auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
 
   // Create SCEV and VPValue for the trip count.
-  const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
-  assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
+
+  // Currently only loops with countable exits are vectorized, but calling
+  // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
+  // uncountable exits whilst also ensuring the symbolic maximum and known
+  // back-edge taken count remain identical for loops with countable exits.
+  const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
+  assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
+          BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) &&
+         "Invalid loop count");
   ScalarEvolution &SE = *PSE.getSE();
-  const SCEV *TripCount =
-      SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop);
+  const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
+                                                       InductionTy, TheLoop);
   Plan->TripCount =
       vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll
new file mode 100644
index 00000000000000..9dc76c6bf0c43d
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_early_exit.ll
@@ -0,0 +1,49 @@
+; REQUIRES: asserts
+; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -disable-output -debug 2>&1 < %s | FileCheck %s
+
+; CHECK-LABEL: LV: Found a loop: for.body
+; CHECK: LV: Not vectorizing: Unsupported conditional branch.
+; CHECK: loop not vectorized: loop control flow is not understood by vectorizer
+; CHECK: LV: Not vectorizing: Unsupported outer loop.
+
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+define i32 @foo(i32 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv.outer = phi i64 [ 0, %entry ], [%iv.outer.next, %for.inc ]
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %iv.outer
+  %ld1 = load i32, ptr %arrayidx, align 4
+  %0 = trunc i64 %iv.outer to i32
+  store i32 %0, ptr %arrayidx, align 4
+  %1 = trunc i64 %iv.outer to i32
+  %add = add nsw i32 %1, %n
+  %cmp.early = icmp eq i32 %ld1, 3
+  br i1 %cmp.early, label %for.early, label %for.body.inner
+
+for.body.inner:
+  %iv.inner = phi i64 [ 0, %for.body ], [ %iv.inner.next, %for.body.inner ]
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %iv.inner, i64 %iv.outer
+  store i32 %add, ptr %arrayidx7, align 4
+  %iv.inner.next = add nuw nsw i64 %iv.inner, 1
+  %cmp.inner = icmp eq i64 %iv.inner.next, 8
+  br i1 %cmp.inner, label %for.inc, label %for.body.inner
+
+for.inc:
+  %iv.outer.next = add nuw nsw i64 %iv.outer, 1
+  %cmp.outer = icmp eq i64%iv.outer.next, 8
+  br i1 %cmp.outer, label %for.end, label %for.body, !llvm.loop !1
+
+for.early:
+  ret i32 1
+
+for.end:
+  ret i32 0
+}
+
+!1 = distinct !{!1, !2, !3}
+!2 = !{!"llvm.loop.vectorize.width", i32 4}
+!3 = !{!"llvm.loop.vectorize.enable", i1 true}