[PATCH] D62748: [LoopPred] Handle a subset of NE comparison based latches

Fri May 31 13:46:03 PDT 2019

reames created this revision.
reames added reviewers: apilipenko, sanjoy, nikic.
Herald added subscribers: bollu, mcrosier.
Herald added a project: LLVM.

At the moment, LoopPredication completely bails out if it sees a latch of the form:
%cmp = icmp ne %iv, %N
br i1 %cmp, label %loop, label %exit
OR
%cmp = icmp ne %iv.next, %N
br i1 %cmp, label %loop, label %exit

This is unfortunate since this is exactly the form that LFTR likes to produce.

This patch is the first in a sequence to add support for ICMP_NE style latches to LoopPredication.  This one only handles the simple pre-increment form.  Handling the harder post-increment form is future work.

If there are any suggestions on code to common this with, please let me know.  I've been looking at IRCE, but that doesn't quite fit the pattern here.


Repository:
  rL LLVM

https://reviews.llvm.org/D62748

Files:
  lib/Transforms/Scalar/LoopPredication.cpp
  test/Transforms/LoopPredication/basic.ll


Index: test/Transforms/LoopPredication/basic.ll
===================================================================

--- test/Transforms/LoopPredication/basic.ll
+++ test/Transforms/LoopPredication/basic.ll
@@ -1635,11 +1635,14 @@
 ; CHECK-LABEL: @ne_latch_zext_preinc(
 ; CHECK-NEXT:  loop.preheader:
 ; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
@@ -1711,11 +1714,14 @@
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
Index: lib/Transforms/Scalar/LoopPredication.cpp
===================================================================
--- lib/Transforms/Scalar/LoopPredication.cpp
+++ lib/Transforms/Scalar/LoopPredication.cpp
@@ -230,7 +230,8 @@
     cl::init(true));
 
 namespace {
-class LoopPredication {
+  class LoopPredication {
+  public:
   /// Represents an induction variable check:
   ///   icmp Pred, <induction variable>, <loop invariant limit>
   struct LoopICmp {
@@ -246,6 +247,7 @@
              << ", Limit = " << *Limit << "\n";
     }
   };
+  private:
 
   AliasAnalysis *AA;
   ScalarEvolution *SE;
@@ -614,6 +616,21 @@
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
+static void normalizePredicate(ScalarEvolution *SE,
+                               LoopPredication::LoopICmp& RC) {
+  // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form.
+  // Normalize back to the ULT/SLT form for ease of handling.
+  // Note: At the moment, this is rather restrictive in practice.  It handles
+  // pre-increment comparison on a canonical IV against a known positive RHS,
+  // but does not handle even trivial post-increment forms or non-1 steps.
+  // TODO: Generalize! 
+  if (RC.Pred == ICmpInst::ICMP_NE &&
+      RC.IV->getStepRecurrence(*SE)->isOne() &&
+      SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
+    RC.Pred = ICmpInst::ICMP_ULT;
+}
+
+
 /// If ICI can be widened to a loop invariant condition emits the loop
 /// invariant condition in the loop preheader and return it, otherwise
 /// returns None.
@@ -852,6 +869,7 @@
     }
   };
 
+  normalizePredicate(SE, *Result);
   if (IsUnsupportedPredicate(Step, Result->Pred)) {
     LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
                       << ")!\n");


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62748.202475.patch
Type: text/x-patch
Size: 4118 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190531/f2859aa7/attachment.bin>