[PATCH] D113973: [LoopVectorize][CostModel] Choose smaller VFs for in-loop reductions with no loads/stores

Rosie Sumpter via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 22 01:39:02 PST 2021


RosieSumpter updated this revision to Diff 388813.
RosieSumpter edited the summary of this revision.
RosieSumpter added a comment.

- If there are no element types, only set the max width to 32 if there are no legal int sizes set, otherwise set it to the smallest legal int width.
- update `X86/funclet.ll` test.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113973/new/

https://reviews.llvm.org/D113973

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
  llvm/test/Transforms/LoopVectorize/X86/funclet.ll
  llvm/test/Transforms/LoopVectorize/pr32859.ll
  llvm/test/Transforms/LoopVectorize/pr36983.ll


Index: llvm/test/Transforms/LoopVectorize/pr36983.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/pr36983.ll
+++ llvm/test/Transforms/LoopVectorize/pr36983.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
 
 ; There could be more than one LCSSA PHIs in loop exit block.
 
Index: llvm/test/Transforms/LoopVectorize/pr32859.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/pr32859.ll
+++ llvm/test/Transforms/LoopVectorize/pr32859.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
 
 ; Out of the LCSSA form we could have 'phi i32 [ loop-invariant, %for.inc.2.i ]'
 ; but the IR Verifier requires for PHI one entry for each predecessor of
Index: llvm/test/Transforms/LoopVectorize/X86/funclet.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/X86/funclet.ll
+++ llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -33,7 +33,7 @@
 
 ; CHECK-LABEL: define void @test1(
 ; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
-; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
+; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
 
 declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
 
Index: llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -31,3 +31,30 @@
 for.end:
   ret void
 }
+
+; For in-loop reductions with no loads or stores in the loop the default widest
+; type is 32 bits, which allows a sensible VF to be chosen.
+
+; CHECK-LABEL: Checking a loop in "no_loads_stores"
+; CHECK: The Smallest and Widest types: 4294967295 / 32 bits
+; CHECK: Selecting VF: 4
+
+define double @no_loads_stores() {
+entry:
+  br label %for.body
+
+for.body:
+  %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = sitofp i64 %i.08 to double
+  %mul = fmul double %conv, %conv
+  %add = fadd double %s.09, %mul
+  %inc = add nuw nsw i64 %i.08, 1
+  %exitcond.not = icmp eq i64 %inc, 1234567
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  %.lcssa = phi double [ %add, %for.body ]
+  ret double %.lcssa
+}
+
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6319,6 +6319,17 @@
   unsigned MinWidth = -1U;
   unsigned MaxWidth = 8;
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
+  // For in-loop reductions, no element types are added to ElementTypesInLoop
+  // if there are no loads/stores in the loop. In this case, set the maximum
+  // width to be the smallest legal int width, or 32 if there are no legal
+  // widths set, so that a sensible VF is chosen.
+  if (ElementTypesInLoop.empty()) {
+    MaxWidth = 32;
+    Type *SmallestIntType =
+        DL.getSmallestLegalIntType(TheLoop->getHeader()->getContext());
+    if (SmallestIntType)
+      MaxWidth = DL.getTypeSizeInBits(SmallestIntType).getFixedSize();
+  }
   for (Type *T : ElementTypesInLoop) {
     MinWidth = std::min<unsigned>(
         MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize());


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113973.388813.patch
Type: text/x-patch
Size: 3805 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211122/ef21b69c/attachment.bin>


More information about the llvm-commits mailing list