[llvm] [SCEV][LV] Add Stride equal to one Predicate to enable strided access versioning (PR #77287)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 8 00:59:42 PST 2024
https://github.com/ShivaChen created https://github.com/llvm/llvm-project/pull/77287
There is a case in TSVC didn't be vectorized due to the BECount is unknown.
float s172(int xa, int xb) {
for (int i = xa - 1; i < 32000; i += xb)
a[i] += b[i];
}
By assuming the stride as one and generating the runtime checking to guard the vectorized loop, it seems the case can be vectorized.
>From 18fab95c34ba6ccae8f1b34ba48a2c7b2b508804 Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Thu, 4 Jan 2024 07:40:11 +0000
Subject: [PATCH 1/2] Add s172() to version-mem-access.ll
---
.../LoopVectorize/version-mem-access.ll | 48 +++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 7bf4fbd89b0eea..353c0185cfae03 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -92,3 +92,51 @@ for.end.loopexit:
for.end:
ret void
}
+
+; CHECK-LABEL: s172
+; CHECK-NOT: vector.body
+
+ at b = global [32000 x float] zeroinitializer, align 64
+ at a = global [32000 x float] zeroinitializer, align 64
+
+; for (int i = xa - 1; i < 32000; i += xb)
+; a[i] += b[i];
+;
+define float @s172(i32 signext %xa, i32 signext %xb) mustprogress {
+entry:
+ %cmp214 = icmp slt i32 %xa, 32001
+ br i1 %cmp214, label %for.body.us.preheader, label %for.cond.cleanup
+
+for.body.us.preheader: ; preds = %entry
+ %sub = add i32 %xa, -1
+ %0 = sext i32 %sub to i64
+ %1 = sext i32 %xb to i64
+ br label %for.body.us
+
+for.body.us: ; preds = %for.body.us.preheader, %for.cond1.for.cond.cleanup3_crit_edge.us
+ %nl.016.us = phi i32 [ %inc.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.body.us.preheader ]
+ br label %for.body4.us
+
+for.body4.us: ; preds = %for.body.us, %for.body4.us
+ %indvars.iv = phi i64 [ %0, %for.body.us ], [ %indvars.iv.next, %for.body4.us ]
+ %arrayidx.us = getelementptr inbounds [32000 x float], ptr @b, i64 0, i64 %indvars.iv
+ %2 = load float, ptr %arrayidx.us, align 4
+ %arrayidx6.us = getelementptr inbounds [32000 x float], ptr @a, i64 0, i64 %indvars.iv
+ %3 = load float, ptr %arrayidx6.us, align 4
+ %add.us = fadd fast float %3, %2
+ store float %add.us, ptr %arrayidx6.us, align 4
+ %indvars.iv.next = add i64 %indvars.iv, %1
+ %cmp2.us = icmp slt i64 %indvars.iv.next, 32000
+ br i1 %cmp2.us, label %for.body4.us, label %for.cond1.for.cond.cleanup3_crit_edge.us
+
+for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us
+ %inc.us = add nuw nsw i32 %nl.016.us, 1
+ %exitcond.not = icmp eq i32 %inc.us, 100000
+ br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body.us
+
+for.cond.cleanup.loopexit: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret float undef
+}
>From a88b09a6654e00376f7a3777f6e1ab4883a15899 Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Thu, 28 Dec 2023 06:41:20 +0000
Subject: [PATCH 2/2] [SCEV][LV] Add Stride equal to one Predicate to enable
strided access versioning
---
llvm/lib/Analysis/ScalarEvolution.cpp | 15 ++++++++++++++-
.../LoopVectorize/version-mem-access.ll | 6 +++++-
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 623814c038a78f..3c712ead953186 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -12778,10 +12778,23 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// The positive stride case is the same as isKnownPositive(Stride) returning
// true (original behavior of the function).
//
- if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) ||
+ if (PredicatedIV || !loopIsFiniteByAssumption(L) ||
!loopHasNoAbnormalExits(L))
return getCouldNotCompute();
+ // Adding Stride equal to one Predicate when there is no wrap flags.
+ // It might enable strided access versioning in LAA and calculate BECount
+ // with Stride = 1.
+ if (!NoWrap) {
+ if (AllowPredicates) {
+ const auto *One =
+ static_cast<const SCEVConstant *>(getOne(Stride->getType()));
+ Predicates.insert(getEqualPredicate(Stride, One));
+ Stride = One;
+ } else
+ return getCouldNotCompute();
+ }
+
if (!isKnownNonZero(Stride)) {
// If we have a step of zero, and RHS isn't invariant in L, we don't know
// if it might eventually be greater than start and if so, on which
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 353c0185cfae03..f1283365ef52a4 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -93,8 +93,12 @@ for.end:
ret void
}
+; We can vectorize the loop by using stride = 1 to calculate iteration count
+; and generate the runtime check to guard the vectorized loop.
+
; CHECK-LABEL: s172
-; CHECK-NOT: vector.body
+; CHECK-DAG: icmp ne i32 %xb, 1
+; CHECK: vector.body
@b = global [32000 x float] zeroinitializer, align 64
@a = global [32000 x float] zeroinitializer, align 64
More information about the llvm-commits
mailing list