[PATCH] D112548: [LoopVectorize] Propagate fast-math flags for ordered reductions

Tue Oct 26 08:15:34 PDT 2021

RosieSumpter created this revision.
RosieSumpter added reviewers: paulwalker-arm, david-arm, kmclaughlin, sdesmalen.
Herald added a subscriber: hiraditya.
RosieSumpter requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

This patch updates ##VPReductionRecipe::execute## so that the fast-math
flags associated with the underlying instruction of the VPReductionRecipe are
propagated through to the reductions which are created.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D112548

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll


Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================

--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -812,6 +812,52 @@
   ret double %res
 }
 
+; Test case where fadd has a fast-math flag.
+define float @fadd_strict_fmf(float* noalias nocapture readonly %a, i64 %n) {
+; CHECK-ORDERED-LABEL: @fadd_strict_fmf
+; CHECK-ORDERED: vector.body:
+; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[RDX:%.*]], %vector.body ]
+; CHECK-ORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>*
+; CHECK-ORDERED: [[RDX]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float [[VEC_PHI]], <8 x float> [[LOAD_VEC]])
+; CHECK-ORDERED: for.end:
+; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX]], %middle.block ]
+; CHECK-ORDERED: ret float [[RES]]
+
+; CHECK-UNORDERED-LABEL: @fadd_strict_fmf
+; CHECK-UNORDERED: vector.body:
+; CHECK-UNORDERED: [[VEC_PHI:%.*]] = phi <8 x float> [ <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ [[FADD_VEC:%.*]], %vector.body ]
+; CHECK-UNORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>*
+; CHECK-UNORDERED: [[FADD_VEC]] = fadd nnan <8 x float> [[LOAD_VEC]], [[VEC_PHI]]
+; CHECK-UNORDERED-NOT: call nnan float @llvm.vector.reduce.fadd 
+; CHECK-UNORDERED: middle.block:
+; CHECK-UNORDERED: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[FADD_VEC]])
+; CHECK-UNORDERED: for.body:
+; CHECK-UNORDERED: [[LOAD:%.*]] = load float, float*
+; CHECK-UNORDERED: [[FADD:%.*]] = fadd nnan float [[LOAD]], {{.*}}
+; CHECK-UNORDERED: for.end:
+; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD]], %for.body ], [ [[RDX]], %middle.block ]
+; CHECK-UNORDERED: ret float [[RES]]
+
+; CHECK-NOT-VECTORIZED-LABEL: @fadd_strict_fmf
+; CHECK-NOT-VECTORIZED-NOT: vector.body
+
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd nnan float %0, %sum.07
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret float %add
+}
+
 !0 = distinct !{!0, !5, !9, !11}
 !1 = distinct !{!1, !5, !10, !11}
 !2 = distinct !{!2, !6, !9, !11}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9749,6 +9749,14 @@
   Value *PrevInChain = State.get(getChainOp(), 0);
   RecurKind Kind = RdxDesc->getRecurrenceKind();
   bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
+  FastMathFlags Current_FMF = State.Builder.getFastMathFlags();
+  if (IsOrdered) {
+    // Propagate the fast-math flags carried by the underlying instruction.
+    if (auto *FPMO = dyn_cast<FPMathOperator>(getUnderlyingInstr())) {
+      FastMathFlags FMF = FPMO->getFastMathFlags();
+      State.Builder.setFastMathFlags(FMF);
+    }
+  }
   for (unsigned Part = 0; Part < State.UF; ++Part) {
     Value *NewVecOp = State.get(getVecOp(), Part);
     if (VPValue *Cond = getCondOp()) {
@@ -9788,6 +9796,8 @@
           PrevInChain);
     State.set(this, NextInChain, Part);
   }
+  // Reset fast-math flags.
+  State.Builder.setFastMathFlags(Current_FMF);
 }
 
 void VPReplicateRecipe::execute(VPTransformState &State) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D112548.382333.patch
Type: text/x-patch
Size: 3842 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211026/e3faacd0/attachment.bin>