[PATCH] D118073: [IVDescriptor] Get the exact FP instruction that does not allow reordering

Mon Jan 24 13:19:02 PST 2022

congzhe created this revision.
Herald added a subscriber: hiraditya.
congzhe requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

This is a bugfix in IVDescriptor.cpp.

The helper function `RecurrenceDescriptor::getExactFPMathInst()` is supposed to return the 1st FP instruction that does not allow reordering. However, in certain cases it does not work as expected. For instance, in the test cases added in this patch, `RecurrenceDescriptor::getExactFPMathInst()` returns NULL for the reduction descriptor that corresponds to the reduction PHI node. This is because when constructing the RecurrenceDescriptor, we trace the use-def chain staring from a PHI node and for each instruction in the use-def chain, its descriptor overrides the previous one . Therefore in the final RecurrenceDescriptor we constructed., we lose previous FP instructions that does not allow reordering.

For the test case added in this patch, it should not be vectorized if reordering is not allowed. However with the current trunk it is vectorized.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D118073

Files:
  llvm/lib/Analysis/IVDescriptors.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll


Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================

--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -585,6 +585,36 @@
   ret float %rdx
 }
 
+; Negative test - loop contains two fadds and only one fadd has the fast flag,
+; which we cannot safely reorder.
+define float @fadd_multiple_one_flag(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
+; CHECK-ORDERED-LABEL: @fadd_multiple_one_flag
+; CHECK-ORDERED-NOT: vector.body
+
+; CHECK-NOT-VECTORIZED-LABEL: @fadd_multiple_one_flag
+; CHECK-NOT-VECTORIZED-NOT: vector.body
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi float [ -0.000000e+00, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %sum, %0
+  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
+  %1 = load float, float* %arrayidx2, align 4
+  %add3 = fadd fast float %add, %1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                         ; preds = %for.body
+  %rdx = phi float [ %add3, %for.body ]
+  ret float %rdx
+}
+
 ; Tests with both a floating point reduction & induction, e.g.
 ;
 ;float fp_iv_rdx_loop(float *values, float init, float * __restrict__ A, int N) {
Index: llvm/lib/Analysis/IVDescriptors.cpp
===================================================================
--- llvm/lib/Analysis/IVDescriptors.cpp
+++ llvm/lib/Analysis/IVDescriptors.cpp
@@ -309,6 +309,10 @@
   // flags from all the reduction operations.
   FastMathFlags FMF = FastMathFlags::getFast();
 
+  // The first instruction in the use-def chain of the Phi node that requires
+  // exact floating point operations.
+  Instruction *ExactFPMathInst = nullptr;
+
   // A value in the reduction can be used:
   //  - By the reduction:
   //      - Reduction operation:
@@ -352,6 +356,9 @@
     if (Cur != Start) {
       ReduxDesc =
           isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
+      ExactFPMathInst = ExactFPMathInst == nullptr
+                            ? ReduxDesc.getExactFPMathInst()
+                            : ExactFPMathInst;
       if (!ReduxDesc.isRecurrence())
         return false;
       // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -480,8 +487,8 @@
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
-  const bool IsOrdered = checkOrderedReduction(
-      Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
+  const bool IsOrdered =
+      checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
 
   if (Start != Phi) {
     // If the starting value is not the same as the phi node, we speculatively
@@ -538,9 +545,8 @@
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
-                          ReduxDesc.getExactFPMathInst(), RecurrenceType,
-                          IsSigned, IsOrdered, CastInsts,
+  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
+                          RecurrenceType, IsSigned, IsOrdered, CastInsts,
                           MinWidthCastToRecurrenceType);
   RedDes = RD;
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D118073.402638.patch
Type: text/x-patch
Size: 3672 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220124/7402a588/attachment.bin>