[PATCH] D106646: [LoopVectorize] Don't interleave scalar ordered reductions for inner loops
David Sherwood via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 27 09:50:20 PDT 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rGa5dd6c6cf935: [LoopVectorize] Don't interleave scalar ordered reductions for inner loops (authored by david-arm).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D106646/new/
https://reviews.llvm.org/D106646
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
@@ -0,0 +1,42 @@
+; REQUIRES: asserts
+; RUN: opt -loop-vectorize -enable-strict-reductions=true -force-vector-width=1 -S < %s -debug 2>log | FileCheck %s
+; RUN: cat log | FileCheck %s --check-prefix=CHECK-DEBUG
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; CHECK-DEBUG: LV: Not interleaving scalar ordered reductions.
+
+define void @foo(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %M, i64 %N) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: vector.body
+
+entry:
+ br label %for.body.us
+
+for.body.us: ; preds = %entry, %for.cond3
+ %i.023.us = phi i64 [ %inc8.us, %for.cond3 ], [ 0, %entry ]
+ %arrayidx.us = getelementptr inbounds float, float* %dst, i64 %i.023.us
+ %mul.us = mul nsw i64 %i.023.us, %N
+ br label %for.body3.us
+
+for.body3.us: ; preds = %for.body.us, %for.body3.us
+ %0 = phi float [ 0.000000e+00, %for.body.us ], [ %add6.us, %for.body3.us ]
+ %j.021.us = phi i64 [ 0, %for.body.us ], [ %inc.us, %for.body3.us ]
+ %add.us = add nsw i64 %j.021.us, %mul.us
+ %arrayidx4.us = getelementptr inbounds float, float* %src, i64 %add.us
+ %1 = load float, float* %arrayidx4.us, align 4
+ %add6.us = fadd float %1, %0
+ %inc.us = add nuw nsw i64 %j.021.us, 1
+ %exitcond.not = icmp eq i64 %inc.us, %N
+ br i1 %exitcond.not, label %for.cond3, label %for.body3.us
+
+for.cond3: ; preds = %for.body3.us
+ %add6.us.lcssa = phi float [ %add6.us, %for.body3.us ]
+ store float %add6.us.lcssa, float* %arrayidx.us, align 4
+ %inc8.us = add nuw nsw i64 %i.023.us, 1
+ %exitcond26.not = icmp eq i64 %inc8.us, %M
+ br i1 %exitcond26.not, label %exit, label %for.body.us
+
+exit: ; preds = %for.cond3
+ ret void
+}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6473,9 +6473,21 @@
// If we have a scalar reduction (vector reductions are already dealt with
// by this point), we can increase the critical path length if the loop
- // we're interleaving is inside another loop. Limit, by default to 2, so the
- // critical path only gets increased by one reduction operation.
+ // we're interleaving is inside another loop. For tree-wise reductions
+ // set the limit to 2, and for ordered reductions it's best to disable
+ // interleaving entirely.
if (HasReductions && TheLoop->getLoopDepth() > 1) {
+ bool HasOrderedReductions =
+ any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return RdxDesc.isOrdered();
+ });
+ if (HasOrderedReductions) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Not interleaving scalar ordered reductions.\n");
+ return 1;
+ }
+
unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
SmallIC = std::min(SmallIC, F);
StoresIC = std::min(StoresIC, F);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D106646.362078.patch
Type: text/x-patch
Size: 3389 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210727/6264c8a0/attachment.bin>
More information about the llvm-commits
mailing list