[PATCH] D112548: [LoopVectorize] Propagate fast-math flags for ordered reductions
Rosie Sumpter via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 26 08:15:34 PDT 2021
RosieSumpter created this revision.
RosieSumpter added reviewers: paulwalker-arm, david-arm, kmclaughlin, sdesmalen.
Herald added a subscriber: hiraditya.
RosieSumpter requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
This patch updates ##VPReductionRecipe::execute## so that the fast-math
flags associated with the underlying instruction of the VPReductionRecipe are
propagated through to the reductions which are created.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D112548
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -812,6 +812,52 @@
ret double %res
}
+; Test case where fadd has a fast-math flag.
+define float @fadd_strict_fmf(float* noalias nocapture readonly %a, i64 %n) {
+; CHECK-ORDERED-LABEL: @fadd_strict_fmf
+; CHECK-ORDERED: vector.body:
+; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[RDX:%.*]], %vector.body ]
+; CHECK-ORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>*
+; CHECK-ORDERED: [[RDX]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float [[VEC_PHI]], <8 x float> [[LOAD_VEC]])
+; CHECK-ORDERED: for.end:
+; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX]], %middle.block ]
+; CHECK-ORDERED: ret float [[RES]]
+
+; CHECK-UNORDERED-LABEL: @fadd_strict_fmf
+; CHECK-UNORDERED: vector.body:
+; CHECK-UNORDERED: [[VEC_PHI:%.*]] = phi <8 x float> [ <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ [[FADD_VEC:%.*]], %vector.body ]
+; CHECK-UNORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>*
+; CHECK-UNORDERED: [[FADD_VEC]] = fadd nnan <8 x float> [[LOAD_VEC]], [[VEC_PHI]]
+; CHECK-UNORDERED-NOT: call nnan float @llvm.vector.reduce.fadd
+; CHECK-UNORDERED: middle.block:
+; CHECK-UNORDERED: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[FADD_VEC]])
+; CHECK-UNORDERED: for.body:
+; CHECK-UNORDERED: [[LOAD:%.*]] = load float, float*
+; CHECK-UNORDERED: [[FADD:%.*]] = fadd nnan float [[LOAD]], {{.*}}
+; CHECK-UNORDERED: for.end:
+; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD]], %for.body ], [ [[RDX]], %middle.block ]
+; CHECK-UNORDERED: ret float [[RES]]
+
+; CHECK-NOT-VECTORIZED-LABEL: @fadd_strict_fmf
+; CHECK-NOT-VECTORIZED-NOT: vector.body
+
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+ %0 = load float, float* %arrayidx, align 4
+ %add = fadd nnan float %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+ ret float %add
+}
+
!0 = distinct !{!0, !5, !9, !11}
!1 = distinct !{!1, !5, !10, !11}
!2 = distinct !{!2, !6, !9, !11}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9749,6 +9749,14 @@
Value *PrevInChain = State.get(getChainOp(), 0);
RecurKind Kind = RdxDesc->getRecurrenceKind();
bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
+ FastMathFlags Current_FMF = State.Builder.getFastMathFlags();
+ if (IsOrdered) {
+ // Propagate the fast-math flags carried by the underlying instruction.
+ if (auto *FPMO = dyn_cast<FPMathOperator>(getUnderlyingInstr())) {
+ FastMathFlags FMF = FPMO->getFastMathFlags();
+ State.Builder.setFastMathFlags(FMF);
+ }
+ }
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
@@ -9788,6 +9796,8 @@
PrevInChain);
State.set(this, NextInChain, Part);
}
+ // Reset fast-math flags.
+ State.Builder.setFastMathFlags(Current_FMF);
}
void VPReplicateRecipe::execute(VPTransformState &State) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D112548.382333.patch
Type: text/x-patch
Size: 3842 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211026/e3faacd0/attachment.bin>
More information about the llvm-commits
mailing list