[PATCH] D113973: [LoopVectorize][CostModel] Choose smaller VFs for in-loop reductions with no loads/stores

Mon Nov 22 02:37:53 PST 2021

RosieSumpter updated this revision to Diff 388822.
RosieSumpter added a comment.

- Remove changes to `LoopVectorize/pr32859.ll` and `LoopVectorize/pr36983.ll` tests


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113973/new/

https://reviews.llvm.org/D113973

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
  llvm/test/Transforms/LoopVectorize/X86/funclet.ll


Index: llvm/test/Transforms/LoopVectorize/X86/funclet.ll
===================================================================

--- llvm/test/Transforms/LoopVectorize/X86/funclet.ll
+++ llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -33,7 +33,7 @@
 
 ; CHECK-LABEL: define void @test1(
 ; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
-; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
+; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
 
 declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
 
Index: llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -31,3 +31,30 @@
 for.end:
   ret void
 }
+
+; For in-loop reductions with no loads or stores in the loop the default widest
+; type is 32 bits, which allows a sensible VF to be chosen.
+
+; CHECK-LABEL: Checking a loop in "no_loads_stores"
+; CHECK: The Smallest and Widest types: 4294967295 / 32 bits
+; CHECK: Selecting VF: 4
+
+define double @no_loads_stores() {
+entry:
+  br label %for.body
+
+for.body:
+  %s.09 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %conv = sitofp i64 %i.08 to double
+  %mul = fmul double %conv, %conv
+  %add = fadd double %s.09, %mul
+  %inc = add nuw nsw i64 %i.08, 1
+  %exitcond.not = icmp eq i64 %inc, 1234567
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  %.lcssa = phi double [ %add, %for.body ]
+  ret double %.lcssa
+}
+
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6319,6 +6319,16 @@
   unsigned MinWidth = -1U;
   unsigned MaxWidth = 8;
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
+  // For in-loop reductions, no element types are added to ElementTypesInLoop
+  // if there are no loads/stores in the loop. In this case, set the maximum
+  // width to be the smallest legal int width (if legal widths are set) so that
+  // a sensible VF is chosen.
+  if (ElementTypesInLoop.empty()) {
+    Type *SmallestIntType =
+        DL.getSmallestLegalIntType(TheLoop->getHeader()->getContext());
+    if (SmallestIntType)
+      MaxWidth = DL.getTypeSizeInBits(SmallestIntType).getFixedSize();
+  }
   for (Type *T : ElementTypesInLoop) {
     MinWidth = std::min<unsigned>(
         MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedSize());


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113973.388822.patch
Type: text/x-patch
Size: 2823 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211122/9da9dd06/attachment-0001.bin>