[PATCH] D81415: [LoopVectorizer] Don't create unused block masks for reductions. NFC

Mon Jun 8 11:35:43 PDT 2020

dmgreen created this revision.
dmgreen added reviewers: Ayal, fhahn, SjoerdMeijer.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.

This is pulled out of D75069 <https://reviews.llvm.org/D75069>, where I somewhat accidentally removed some unneeded block masks when we don't have any reductions. This is the same thing as a separate patch, and shouldn't have any effect on codegen.


https://reviews.llvm.org/D81415

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
  llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll


Index: llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
===================================================================

--- llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -11,7 +11,7 @@
 ; CHECK-REMARKS-NOT:  remark: {{.*}} vectorized loop
 
 define void @VF1-VPlanExe() {
-; CHECK-LABEL: @VF1-VPlanExe
+; CHECK-LABEL: @VF1-VPlanExe(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -22,13 +22,9 @@
 ; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i64 [[INDUCTION]], 14
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[INDUCTION1]], 14
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule i64 [[INDUCTION2]], 14
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i64 [[INDUCTION3]], 14
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -56,7 +52,7 @@
 }
 
 define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
-; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe
+; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds double, double* [[PTR1:%.*]], i64 15
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -73,17 +69,9 @@
 ; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP3]]
-; CHECK-NEXT:    [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[VEC_IV]], 14
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[VEC_IV4]], 14
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[VEC_IV5]], 14
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[VEC_IV6]], 14
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
Index: llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -39,8 +39,7 @@
 ; CHECK-NEXT:    "loop:\n" +
 ; CHECK-NEXT:      "WIDEN-INDUCTION %iv = phi 0, %iv.next\l" +
 ; CHECK-NEXT:      "WIDEN\l""  %cond0 = icmp %iv, 13\l" +
-; CHECK-NEXT:      "WIDEN-SELECT%s = select %cond0, 10, 20\l" +
-; CHECK-NEXT:      "EMIT vp<%1> = icmp ule ir<%iv> vp<%0>\l"
+; CHECK-NEXT:      "WIDEN-SELECT%s = select %cond0, 10, 20\l"
 ; CHECK-NEXT:  ]
 define void @test() {
 entry:
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1301,6 +1301,11 @@
     return foldTailByMasking() || Legal->blockNeedsPredication(BB);
   }
 
+  /// Returns true if there are any outside-loop reductions.
+  bool hasOutOfLoopReductions() const {
+    return !Legal->getReductionVars().empty();
+  }
+
   /// Estimate cost of an intrinsic call instruction CI if it were vectorized
   /// with factor VF.  Return the cost of the instruction, including
   /// scalarization overhead if it's needed.
@@ -7346,7 +7351,7 @@
 
   // Finally, if tail is folded by masking, introduce selects between the phi
   // and the live-out instruction of each reduction, at the end of the latch.
-  if (CM.foldTailByMasking()) {
+  if (CM.foldTailByMasking() && CM.hasOutOfLoopReductions()) {
     Builder.setInsertPoint(VPBB);
     auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
     for (auto &Reduction : Legal->getReductionVars()) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D81415.269214.patch
Type: text/x-patch
Size: 4990 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200608/be059213/attachment.bin>