[llvm] b9051ba - [LV] Remove assert that VF cannot be scalable in setCostBasedWideningDecision.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 26 09:12:08 PDT 2021
Author: Sander de Smalen
Date: 2021-07-26T17:11:45+01:00
New Revision: b9051ba84836f6c2a3b008638de14b588e58fa9f
URL: https://github.com/llvm/llvm-project/commit/b9051ba84836f6c2a3b008638de14b588e58fa9f
DIFF: https://github.com/llvm/llvm-project/commit/b9051ba84836f6c2a3b008638de14b588e58fa9f.diff
LOG: [LV] Remove assert that VF cannot be scalable in setCostBasedWideningDecision.
Scalarization for scalable vectors is not (yet) supported, so the
LV discards a VF when scalarization is chosen as the widening
decision. It should therefore not assert that the VF is not scalable
when it computes the decision to scalarize.
The code can get here when both the interleave-cost, gather/scatter cost
and scalarization-cost are all illegal. This may e.g. happen for SVE
when the VF=1, to avoid generating `<vscale x 1 x eltty>` types that
the code-generator cannot yet handle.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D106656
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f1d83d9e81f2..564812bc8a52 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7418,8 +7418,6 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
Decision = CM_GatherScatter;
Cost = GatherScatterCost;
} else {
- assert(!VF.isScalable() &&
- "We cannot yet scalarise for scalable vectors");
Decision = CM_Scalarize;
Cost = ScalarizationCost;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
index 573801947bfb..7c481d110321 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s
+; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - | FileCheck %s
define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) {
; CHECK-LABEL: @gather_nxv4i32_ind64
@@ -122,6 +122,37 @@ for.cond.cleanup: ; preds = %for.inc, %entry
ret void
}
+
+
+define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: @gather_nxv4i32_ind64_stride2
+; CHECK: vector.body:
+; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ]
+; CHECK-DAG: %[[STEP:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-DAG: %[[IDXSPLATINS:.*]] = insertelement <vscale x 4 x i64> poison, i64 %[[IDX]], i32 0
+; CHECK-DAG: %[[IDXSPLAT:.*]] = shufflevector <vscale x 4 x i64> %[[IDXSPLATINS]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK: %[[ADD:.*]] = add <vscale x 4 x i64> %[[IDXSPLAT]], %[[STEP]]
+; CHECK: %[[MUL:.*]] = shl <vscale x 4 x i64> %[[ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> undef, i64 1, i32 0), <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer)
+; CHECK: %[[PTRS:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64> %[[MUL]]
+; CHECK: call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[PTRS]]
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %indvars.iv.stride2 = mul i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ store float %0, float* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+}
+
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
More information about the llvm-commits
mailing list