[llvm] 0aecf7f - [CodeGen] Fix incorrectly detected reduction bug in ComplexDeinterleaving pass

Mon Jul 10 05:55:07 PDT 2023

Author: Igor Kirillov
Date: 2023-07-10T12:54:38Z
New Revision: 0aecf7ff0d81930d3d46daf1c0552441b793d904

URL: https://github.com/llvm/llvm-project/commit/0aecf7ff0d81930d3d46daf1c0552441b793d904
DIFF: https://github.com/llvm/llvm-project/commit/0aecf7ff0d81930d3d46daf1c0552441b793d904.diff

LOG: [CodeGen] Fix incorrectly detected reduction bug in ComplexDeinterleaving pass

Using ACLE intrinsics, it is possible to create a loop that the
deinterleaving pass incorrectly classified as a reduction loop.
For example, for fixed-width vectors the loop was like below:

vector.body:
  %a = phi <4 x float> [ %init.a, %entry ], [ %updated.a, %vector.body ]
  %b = phi <4 x float> [ %init.b, %entry ], [ %updated.b, %vector.body ]
  ...
; Does not depend on %a or %b:
  %updated.a = ...
  %updated.b = ...

Differential Revision: https://reviews.llvm.org/D154598

Added: 
    

Modified: 
    llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
    llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 9f2c665866d3c9..23827b9a2fd707 100644

--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -261,6 +261,10 @@ class ComplexDeinterleavingGraph {
   PHINode *RealPHI = nullptr;
   PHINode *ImagPHI = nullptr;
 
+  /// Set this flag to true if RealPHI and ImagPHI were reached during reduction
+  /// detection.
+  bool PHIsFound = false;
+
   /// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode.
   /// The new PHINode corresponds to a vector of deinterleaved complex numbers.
   /// This mapping is populated during
@@ -1419,7 +1423,8 @@ bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) {
       FinalReduction = dyn_cast<Instruction>(U);
     }
 
-    if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B)
+    if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B ||
+        isa<PHINode>(FinalReduction))
       continue;
 
     ReductionInfo[ReductionOp] = {&PHI, FinalReduction};
@@ -1460,6 +1465,7 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
 
       RealPHI = ReductionInfo[Real].first;
       ImagPHI = ReductionInfo[Imag].first;
+      PHIsFound = false;
       auto Node = identifyNode(Real, Imag);
       if (!Node) {
         std::swap(Real, Imag);
@@ -1467,9 +1473,10 @@ void ComplexDeinterleavingGraph::identifyReductionNodes() {
         Node = identifyNode(Real, Imag);
       }
 
-      // If a node is identified, mark its operation instructions as used to
-      // prevent re-identification and attach the node to the real part
-      if (Node) {
+      // If a node is identified and reduction PHINode is used in the chain of
+      // operations, mark its operation instructions as used to prevent
+      // re-identification and attach the node to the real part
+      if (Node && PHIsFound) {
         LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: "
                           << *Real << " / " << *Imag << "\n");
         Processed[i] = true;
@@ -1762,6 +1769,7 @@ ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
   if (Real != RealPHI || Imag != ImagPHI)
     return nullptr;
 
+  PHIsFound = true;
   NodePtr PlaceholderNode = prepareCompositeNode(
       ComplexDeinterleavingOperation::ReductionPHI, Real, Imag);
   return submitCompositeNode(PlaceholderNode);

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index de6611bed9c3e2..2eb1c9e07407e9 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -236,4 +236,31 @@ middle.block:                                     ; preds = %vector.body
   %.fca.0.1.insert = insertvalue %"struct.std::complex" %.fca.0.0.insert, double %18, 0, 1
   ret %"struct.std::complex" %.fca.0.1.insert
 }
+
+; The reduced bug from D153355. Shows that reduction was detected where it did not exist.
+define void @incorrect_reduction_pattern(i1 %exitcond.not) {
+; CHECK-LABEL: incorrect_reduction_pattern:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:  .LBB3_1: // %for.body
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    tbz w0, #0, .LBB3_1
+; CHECK-NEXT:  // %bb.2: // %for.end.loopexit
+; CHECK-NEXT:    ret
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %vec_r = phi <4 x float> [ zeroinitializer, %entry ], [ %lane_r, %for.body ]
+  %vec_i = phi <4 x float> [ zeroinitializer, %entry ], [ %lane_i, %for.body ]
+  %add = fadd <4 x float> %vec_r, %vec_i
+  %lane_r = shufflevector <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
+  %lane_i = shufflevector <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> zeroinitializer, <4 x i32> zeroinitializer
+  br i1 %exitcond.not, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  %mul.r = fadd <4 x float> %lane_r, %add
+  %mul.i = fadd <4 x float> %lane_i, %add
+  ret void
+}
+
 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)