[PATCH] D57728: Relax constraints for reduction vectorization

Mon Feb 4 17:35:14 PST 2019

sanjoy created this revision.
sanjoy added reviewers: tvvikram, mkuper.
Herald added subscribers: bixia, jlebar, mcrosier.
Herald added a project: LLVM.

I'm somewhat unsure here, but gating vectorizing reductions on all
fastmath flags seems unnecessary; `reassoc` and `contract` should be
sufficient.


Repository:
  rL LLVM

https://reviews.llvm.org/D57728

Files:
  lib/Analysis/IVDescriptors.cpp
  test/Transforms/LoopVectorize/reduction-fastmath.ll


Index: test/Transforms/LoopVectorize/reduction-fastmath.ll
===================================================================

--- /dev/null
+++ test/Transforms/LoopVectorize/reduction-fastmath.ll
@@ -0,0 +1,73 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define float @reduction_sum_float_ieee(i32 %n, float* %array) {
+; CHECK-LABEL: define float @reduction_sum_float_ieee(
+entry:
+  %entry.cond = icmp ne i32 0, 4096
+  br i1 %entry.cond, label %loop, label %loop.exit
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
+  %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
+  %address = getelementptr float, float* %array, i32 %idx
+  %value = load float, float* %address
+  %sum.inc = fadd float %sum, %value
+  %idx.inc = add i32 %idx, 1
+  %be.cond = icmp ne i32 %idx.inc, 4096
+  br i1 %be.cond, label %loop, label %loop.exit
+
+loop.exit:
+  %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
+; CHECK-NOT: %wide.load = load <4 x float>, <4 x float>*
+; CHECK: ret float %sum.lcssa
+  ret float %sum.lcssa
+}
+
+define float @reduction_sum_float_fastmath(i32 %n, float* %array) {
+; CHECK-LABEL: define float @reduction_sum_float_fastmath(
+entry:
+  %entry.cond = icmp ne i32 0, 4096
+  br i1 %entry.cond, label %loop, label %loop.exit
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
+  %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
+  %address = getelementptr float, float* %array, i32 %idx
+  %value = load float, float* %address
+  %sum.inc = fadd fast float %sum, %value
+  %idx.inc = add i32 %idx, 1
+  %be.cond = icmp ne i32 %idx.inc, 4096
+  br i1 %be.cond, label %loop, label %loop.exit
+
+loop.exit:
+  %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
+; CHECK: %wide.load = load <4 x float>, <4 x float>*
+; CHECK: ret float %sum.lcssa
+  ret float %sum.lcssa
+}
+
+define float @reduction_sum_float_partial_fastmath(i32 %n, float* %array) {
+; CHECK-LABEL: define float @reduction_sum_float_partial_fastmath(
+entry:
+  %entry.cond = icmp ne i32 0, 4096
+  br i1 %entry.cond, label %loop, label %loop.exit
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
+  %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ]
+  %address = getelementptr float, float* %array, i32 %idx
+  %value = load float, float* %address
+  %sum.inc = fadd reassoc contract float %sum, %value
+  %idx.inc = add i32 %idx, 1
+  %be.cond = icmp ne i32 %idx.inc, 4096
+  br i1 %be.cond, label %loop, label %loop.exit
+
+loop.exit:
+  %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ]
+; CHECK: %wide.load = load <4 x float>, <4 x float>*
+; CHECK: ret float %sum.lcssa
+  ret float %sum.lcssa
+}
Index: lib/Analysis/IVDescriptors.cpp
===================================================================
--- lib/Analysis/IVDescriptors.cpp
+++ lib/Analysis/IVDescriptors.cpp
@@ -546,12 +546,16 @@
   return InstDesc(false, I);
 }
 
+static bool CanVectorizeReduction(Instruction *I) {
+  return I->hasAllowReassoc() && I->hasAllowContract();
+}
+
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
   bool FP = I->getType()->isFloatingPointTy();
   Instruction *UAI = Prev.getUnsafeAlgebraInst();
-  if (!UAI && FP && !I->isFast())
+  if (!UAI && FP && !CanVectorizeReduction(I))
     UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
 
   switch (I->getOpcode()) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D57728.185204.patch
Type: text/x-patch
Size: 3662 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190205/4f28c6a1/attachment.bin>