[PATCH] D126772: [LoopVectorize] Add support for invariant stores of ordered reductions

Mon Jun 6 08:26:13 PDT 2022

malharJ updated this revision to Diff 434486.
malharJ added a comment.

- Updated LIT test with more detailed CHECKs.

The main difference between this and "fadd_strict" is the presence of 
a store (to the invariant destination) in the middle.block


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126772/new/

https://reviews.llvm.org/D126772

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll


Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================

--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -1360,11 +1360,38 @@
 
 ; Test case with invariant store where fadd is strict.
 define void @reduction_store_to_invariant_address(float* %dst, float* readonly %src) {
+
+
+
 ; CHECK-ORDERED-LABEL: @reduction_store_to_invariant_address(
-; CHECK-ORDERED-NOT: vector.body
+; CHECK-ORDERED: entry
+; CHECK-ORDERED: %[[DEST_PTR:.*]] = getelementptr inbounds float, float* %dst, i64 42
+; CHECK-ORDERED: vector.body
+; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
+; CHECK-ORDERED: %[[LOAD_VEC:.*]] = load <8 x float>, <8 x float>*
+; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.v8f32(float %[[VEC_PHI]], <8 x float> %[[LOAD_VEC]])
+; CHECK-ORDERED: middle.block
+; CHECK-ORDERED: store float %4, float* %[[DEST_PTR]]
+; CHECK-ORDERED: for.body
+; CHECK-ORDERED: %[[LOAD:.*]] = load float, float*
+; CHECK-ORDERED: %[[FADD:.*]] = fadd float %{{.*}}, %[[LOAD]]
+; CHECK-ORDERED: store float %[[FADD]], float* %[[DEST_PTR]]
 
 ; CHECK-UNORDERED-LABEL: @reduction_store_to_invariant_address(
-; CHECK-UNORDERED-NOT: vector.body
+; CHECK-UNORDERED: entry
+; CHECK-UNORDERED: %[[DEST_PTR:.*]] = getelementptr inbounds float, float* %dst, i64 42
+; CHECK-UNORDERED: vector.body
+; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <8 x float> [ <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %[[FADD_VEC:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[LOAD_VEC:.*]] = load <8 x float>, <8 x float>*
+; CHECK-UNORDERED: %[[FADD_VEC]] = fadd <8 x float> %[[VEC_PHI]], %[[LOAD_VEC]]
+; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd
+; CHECK-UNORDERED: middle.block
+; CHECK-UNORDERED: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %[[FADD_VEC]])
+; CHECK-UNORDERED: store float %[[RDX]], float* %[[DEST_PTR]]
+; CHECK-UNORDERED: for.body
+; CHECK-UNORDERED: %[[LOAD:.*]] = load float, float*
+; CHECK-UNORDERED: %[[FADD:.*]] = fadd float {{.*}}, %[[LOAD]]
+; CHECK-UNORDERED: store float %[[FADD]], float* %[[DEST_PTR]]
 
 ; CHECK-NOT-VECTORIZED-LABEL: @reduction_store_to_invariant_address(
 ; CHECK-NOT-VECTORIZED-NOT: vector.body
@@ -1383,7 +1410,7 @@
   store float %add, float* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
 
 for.cond.cleanup:
   ret void
Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1020,8 +1020,7 @@
   // reduction operations in-loop, and do not have intermediate store.
   return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
     const RecurrenceDescriptor &RdxDesc = Reduction.second;
-    return !RdxDesc.hasExactFPMath() ||
-           (RdxDesc.isOrdered() && !RdxDesc.IntermediateStore);
+    return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
   }));
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D126772.434486.patch
Type: text/x-patch
Size: 3542 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220606/e9d428cf/attachment.bin>