[PATCH] D113973: [LoopVectorize][CostModel] Choose smaller VFs for in-loop reductions with no loads/stores
Rosie Sumpter via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 22 01:39:02 PST 2021
RosieSumpter updated this revision to Diff 388813.
RosieSumpter edited the summary of this revision.
RosieSumpter added a comment.
- If there are no element types, only set the max width to 32 if there are no legal int sizes set, otherwise set it to the smallest legal int width.
- update `X86/funclet.ll` test.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D113973/new/
https://reviews.llvm.org/D113973
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
llvm/test/Transforms/LoopVectorize/X86/funclet.ll
llvm/test/Transforms/LoopVectorize/pr32859.ll
llvm/test/Transforms/LoopVectorize/pr36983.ll
Index: llvm/test/Transforms/LoopVectorize/pr36983.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/pr36983.ll
+++ llvm/test/Transforms/LoopVectorize/pr36983.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
; There could be more than one LCSSA PHIs in loop exit block.
Index: llvm/test/Transforms/LoopVectorize/pr32859.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/pr32859.ll
+++ llvm/test/Transforms/LoopVectorize/pr32859.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
; Out of the LCSSA form we could have 'phi i32 [ loop-invariant, %for.inc.2.i ]'
; but the IR Verifier requires for PHI one entry for each predecessor of
Index: llvm/test/Transforms/LoopVectorize/X86/funclet.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/X86/funclet.ll
+++ llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -33,7 +33,7 @@
; CHECK-LABEL: define void @test1(
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
-; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
+; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
Index: llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -31,3 +31,30 @@
for.end:
ret void
}
+
+; For in-loop reductions with no loads or stores in the loop the default widest
+; type is 32 bits, which allows a sensible VF to be chosen.
+
+; CHECK-LABEL: Checking a loop in "no_loads_stores"
+; CHECK: The Smallest and Widest types: 4294967295 / 32 bits
+; CHECK: Selecting VF: 4
+
+define double @no_loads_stores() {
+entry:
+ br label %for.body
+
+for.body:
+ %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %conv = sitofp i64 %i.08 to double
+ %mul = fmul double %conv, %conv
+ %add = fadd double %s.09, %mul
+ %inc = add nuw nsw i64 %i.08, 1
+ %exitcond.not = icmp eq i64 %inc, 1234567
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ %.lcssa = phi double [ %add, %for.body ]
+ ret double %.lcssa
+}
+
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6319,6 +6319,17 @@
unsigned MinWidth = -1U;
unsigned MaxWidth = 8;
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
+ // For in-loop reductions, no element types are added to ElementTypesInLoop
+ // if there are no loads/stores in the loop. In this case, set the maximum
+ // width to be the smallest legal int width, or 32 if there are no legal
+ // widths set, so that a sensible VF is chosen.
+ if (ElementTypesInLoop.empty()) {
+ MaxWidth = 32;
+ Type *SmallestIntType =
+ DL.getSmallestLegalIntType(TheLoop->getHeader()->getContext());
+ if (SmallestIntType)
+ MaxWidth = DL.getTypeSizeInBits(SmallestIntType).getFixedSize();
+ }
for (Type *T : ElementTypesInLoop) {
MinWidth = std::min<unsigned>(
MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113973.388813.patch
Type: text/x-patch
Size: 3805 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211122/ef21b69c/attachment.bin>
More information about the llvm-commits
mailing list