[PATCH] D71569: [LoopFusion] Ensure that both loops are guarded or neither are guarded.

Kit Barton via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 16 14:07:35 PST 2019


kbarton created this revision.
kbarton added reviewers: Meinersbur, Whitney.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.

This patch modifies the current conditions for loop fusion by ensuring that
either both fusion candidates have a guard, or neither have a guard. This
prevents us from inadvertantly attempting to fuse a guarded loop with a
non-guarded loop.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D71569

Files:
  llvm/lib/Transforms/Scalar/LoopFuse.cpp
  llvm/test/Transforms/LoopFusion/cannot_fuse.ll


Index: llvm/test/Transforms/LoopFusion/cannot_fuse.ll
===================================================================
--- llvm/test/Transforms/LoopFusion/cannot_fuse.ll
+++ llvm/test/Transforms/LoopFusion/cannot_fuse.ll
@@ -414,3 +414,35 @@
 bb29:                                             ; preds = %bb18
   ret void
 }
+
+; Check that a guarded loop and a non-guarded loop are not fused.
+; CHECK: Performing Loop Fusion on function _Z3fooPiS_b
+; CHECK: Fusion candidates do not have identical guards. Not Fusing.
+define dso_local void @_Z3fooPiS_b(i32* noalias %A, i32* noalias %B, i1 zeroext %cond) #0 {
+entry:
+  %frombool = zext i1 %cond to i8
+  %tobool = trunc i8 %frombool to i1
+  br i1 %tobool, label %for.body.preheader, label %if.end
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.02 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nsw i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, 100
+  br i1 %cmp, label %for.body, label %if.end.loopexit
+if.end.loopexit:                                  ; preds = %for.body
+  br label %if.end
+if.end:                                           ; preds = %if.end.loopexit, %entry
+  br label %for.body4
+for.body4:                                        ; preds = %if.end, %for.body4
+  %i1.01 = phi i64 [ 0, %if.end ], [ %inc7, %for.body4 ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i64 %i1.01
+  store i32 0, i32* %arrayidx5, align 4
+  %inc7 = add nsw i64 %i1.01, 1
+  %cmp3 = icmp slt i64 %inc7, 100
+  br i1 %cmp3, label %for.body4, label %for.end8
+for.end8:                                         ; preds = %for.body4
+  ret void
+}
Index: llvm/lib/Transforms/Scalar/LoopFuse.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -735,15 +735,14 @@
           }
 
           // Ensure that FC0 and FC1 have identical guards.
-          // If one (or both) are not guarded, this check is not necessary.
-          if (FC0->GuardBranch && FC1->GuardBranch &&
-              !haveIdenticalGuards(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
-                                 "guards. Not Fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonIdenticalGuards);
-            continue;
-          }
+          if (FC0->GuardBranch || FC1->GuardBranch)
+            if (!haveIdenticalGuards(*FC0, *FC1)) {
+              LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
+                                   "guards. Not Fusing.\n");
+              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+                                                         NonIdenticalGuards);
+              continue;
+            }
 
           // The following three checks look for empty blocks in FC0 and FC1. If
           // any of these blocks are non-empty, we do not fuse. This is done
@@ -1061,8 +1060,16 @@
   /// NonLoopBlock). In other words, the the first successor of both loops must
   /// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
   /// the NonLoopBlock). The same must be true for the second successor.
+  /// If one of the loops has a guard, but the other loop does not, they do not
+  /// have identical guards.
   bool haveIdenticalGuards(const FusionCandidate &FC0,
                            const FusionCandidate &FC1) const {
+    // Either both loops should have a guard or neither loop should have a
+    // guard.
+    if (!((FC0.GuardBranch && FC1.GuardBranch) ||
+          (!FC0.GuardBranch && !FC1.GuardBranch)))
+      return false;
+
     assert(FC0.GuardBranch && FC1.GuardBranch &&
            "Expecting FC0 and FC1 to be guarded loops.");
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71569.234149.patch
Type: text/x-patch
Size: 4092 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191216/e7586e40/attachment.bin>


More information about the llvm-commits mailing list