[llvm] r333702 - [LoopIdiomRecognize] Only convert loops to ctlz if we can prove that the input is non-negative.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu May 31 15:16:55 PDT 2018


Author: ctopper
Date: Thu May 31 15:16:55 2018
New Revision: 333702

URL: http://llvm.org/viewvc/llvm-project?rev=333702&view=rev
Log:
[LoopIdiomRecognize] Only convert loops to ctlz if we can prove that the input is non-negative.

Summary:
Loop idiom recognize tries to convert loops like

```
int foo(int x) {
  int cnt = 0;
  while (x) {
    x >>= 1;
    ++cnt;
  }
  return cnt;
}
```

into calls to ctlz, but if x is initially negative this loop should be infinite.

It happens that the cases that motivated this change have an absolute value of x before the loop. So this patch restricts the transform to cases where we know x is positive. Note: We are relying on the absolute value of INT_MIN to be undefined so we can assume that the result is always positive.

Fixes PR37479

Reviewers: spatel, hfinkel, efriedma, javed.absar

Reviewed By: efriedma

Subscribers: dmgreen, llvm-commits

Differential Revision: https://reviews.llvm.org/D47348

Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll
    llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=333702&r1=333701&r2=333702&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Thu May 31 15:16:55 2018
@@ -1316,6 +1316,7 @@ static bool detectCTLZIdiom(Loop *CurLoo
     return false;
 
   // step 2: detect instructions corresponding to "x.next = x >> 1"
+  // TODO: Support loops that use LShr.
   if (!DefX || DefX->getOpcode() != Instruction::AShr)
     return false;
   ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
@@ -1397,6 +1398,13 @@ bool LoopIdiomRecognize::recognizeAndIns
   // parent function RunOnLoop.
   BasicBlock *PH = CurLoop->getLoopPreheader();
   Value *InitX = PhiX->getIncomingValueForBlock(PH);
+
+  // Make sure the initial value can't be negative otherwise the ashr in the
+  // loop might never reach zero which would make the loop infinite.
+  // TODO: Support loops that use lshr and wouldn't need this check.
+  if (!isKnownNonNegative(InitX, *DL))
+    return false;
+
   // If we check X != 0 before entering the loop we don't need a zero
   // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the
   // loop (if it is used we count CTLZ(X >> 1)).

Modified: llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll?rev=333702&r1=333701&r2=333702&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/ARM/ctlz.ll Thu May 31 15:16:55 2018
@@ -7,6 +7,7 @@
 ;
 ; int ctlz_and_other(int n, char *a)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = 0, n0 = n;
 ;   while(n >>= 1) {
 ;     a[i] = (n0 & (1 << i)) ? 1 : 0;
@@ -30,7 +31,10 @@
 ; Function Attrs: norecurse nounwind uwtable
 define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
 entry:
-  %shr8 = ashr i32 %n, 1
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  %shr8 = lshr i32 %abs_n, 1
   %tobool9 = icmp eq i32 %shr8, 0
   br i1 %tobool9, label %while.end, label %while.body.preheader
 
@@ -42,7 +46,7 @@ while.body:
   %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
   %0 = trunc i64 %indvars.iv to i32
   %shl = shl i32 1, %0
-  %and = and i32 %shl, %n
+  %and = and i32 %shl, %abs_n
   %tobool1 = icmp ne i32 %and, 0
   %conv = zext i1 %tobool1 to i8
   %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
@@ -67,6 +71,7 @@ while.end:
 ;
 ; int ctlz_zero_check(int n)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = 0;
 ;   while(n) {
 ;     n >>= 1;
@@ -76,7 +81,7 @@ while.end:
 ; }
 ;
 ; ALL:  entry
-; ALL:  %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+; ALL:  %0 = call i32 @llvm.ctlz.i32(i32 %abs_n, i1 true)
 ; ALL-NEXT:  %1 = sub i32 32, %0
 ; ALL:  %inc.lcssa = phi i32 [ %1, %while.body ]
 ; ALL:  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
@@ -85,7 +90,10 @@ while.end:
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @ctlz_zero_check(i32 %n) {
 entry:
-  %tobool4 = icmp eq i32 %n, 0
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  %tobool4 = icmp eq i32 %abs_n, 0
   br i1 %tobool4, label %while.end, label %while.body.preheader
 
 while.body.preheader:                             ; preds = %entry
@@ -93,7 +101,7 @@ while.body.preheader:
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
+  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
   %shr = ashr i32 %n.addr.05, 1
   %inc = add nsw i32 %i.06, 1
   %tobool = icmp eq i32 %shr, 0
@@ -113,6 +121,7 @@ while.end:
 ;
 ; int ctlz(int n)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = 0;
 ;   while(n >>= 1) {
 ;     i++;
@@ -121,7 +130,7 @@ while.end:
 ; }
 ;
 ; ALL:  entry
-; ALL:  %0 = ashr i32 %n, 1
+; ALL:  %0 = ashr i32 %abs_n, 1
 ; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
 ; ALL-NEXT:  %2 = sub i32 32, %1
 ; ALL-NEXT:  %3 = add i32 %2, 1
@@ -131,10 +140,13 @@ while.end:
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @ctlz(i32 %n) {
 entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
   br label %while.cond
 
 while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
   %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
   %shr = ashr i32 %n.addr.0, 1
   %tobool = icmp eq i32 %shr, 0
@@ -151,6 +163,7 @@ while.end:
 ;
 ; int ctlz_add(int n, int i0)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = i0;
 ;   while(n >>= 1) {
 ;     i++;
@@ -159,7 +172,7 @@ while.end:
 ; }
 ;
 ; ALL:  entry
-; ALL:  %0 = ashr i32 %n, 1
+; ALL:  %0 = ashr i32 %abs_n, 1
 ; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
 ; ALL-NEXT:  %2 = sub i32 32, %1
 ; ALL-NEXT:  %3 = add i32 %2, 1
@@ -170,10 +183,13 @@ while.end:
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @ctlz_add(i32 %n, i32 %i0) {
 entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
   br label %while.cond
 
 while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
   %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
   %shr = ashr i32 %n.addr.0, 1
   %tobool = icmp eq i32 %shr, 0

Modified: llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll?rev=333702&r1=333701&r2=333702&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll (original)
+++ llvm/trunk/test/Transforms/LoopIdiom/X86/ctlz.ll Thu May 31 15:16:55 2018
@@ -7,6 +7,7 @@
 ;
 ; int ctlz_and_other(int n, char *a)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = 0, n0 = n;
 ;   while(n >>= 1) {
 ;     a[i] = (n0 & (1 << i)) ? 1 : 0;
@@ -30,7 +31,10 @@
 ; Function Attrs: norecurse nounwind uwtable
 define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
 entry:
-  %shr8 = ashr i32 %n, 1
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  %shr8 = lshr i32 %abs_n, 1
   %tobool9 = icmp eq i32 %shr8, 0
   br i1 %tobool9, label %while.end, label %while.body.preheader
 
@@ -42,7 +46,7 @@ while.body:
   %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
   %0 = trunc i64 %indvars.iv to i32
   %shl = shl i32 1, %0
-  %and = and i32 %shl, %n
+  %and = and i32 %shl, %abs_n
   %tobool1 = icmp ne i32 %and, 0
   %conv = zext i1 %tobool1 to i8
   %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
@@ -67,6 +71,7 @@ while.end:
 ;
 ; int ctlz_zero_check(int n)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = 0;
 ;   while(n) {
 ;     n >>= 1;
@@ -76,7 +81,7 @@ while.end:
 ; }
 ;
 ; ALL:  entry
-; ALL:  %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+; ALL:  %0 = call i32 @llvm.ctlz.i32(i32 %abs_n, i1 true)
 ; ALL-NEXT:  %1 = sub i32 32, %0
 ; ALL:  %inc.lcssa = phi i32 [ %1, %while.body ]
 ; ALL:  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
@@ -85,7 +90,10 @@ while.end:
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @ctlz_zero_check(i32 %n) {
 entry:
-  %tobool4 = icmp eq i32 %n, 0
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
+  %tobool4 = icmp eq i32 %abs_n, 0
   br i1 %tobool4, label %while.end, label %while.body.preheader
 
 while.body.preheader:                             ; preds = %entry
@@ -93,7 +101,7 @@ while.body.preheader:
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
-  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
+  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %abs_n, %while.body.preheader ]
   %shr = ashr i32 %n.addr.05, 1
   %inc = add nsw i32 %i.06, 1
   %tobool = icmp eq i32 %shr, 0
@@ -113,6 +121,7 @@ while.end:
 ;
 ; int ctlz(int n)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = 0;
 ;   while(n >>= 1) {
 ;     i++;
@@ -121,7 +130,7 @@ while.end:
 ; }
 ;
 ; ALL:  entry
-; ALL:  %0 = ashr i32 %n, 1
+; ALL:  %0 = ashr i32 %abs_n, 1
 ; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
 ; ALL-NEXT:  %2 = sub i32 32, %1
 ; ALL-NEXT:  %3 = add i32 %2, 1
@@ -131,10 +140,13 @@ while.end:
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @ctlz(i32 %n) {
 entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
   br label %while.cond
 
 while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
   %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
   %shr = ashr i32 %n.addr.0, 1
   %tobool = icmp eq i32 %shr, 0
@@ -151,6 +163,7 @@ while.end:
 ;
 ; int ctlz_add(int n, int i0)
 ; {
+;   n = n >= 0 ? n : -n;
 ;   int i = i0;
 ;   while(n >>= 1) {
 ;     i++;
@@ -159,7 +172,7 @@ while.end:
 ; }
 ;
 ; ALL:  entry
-; ALL:  %0 = ashr i32 %n, 1
+; ALL:  %0 = ashr i32 %abs_n, 1
 ; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
 ; ALL-NEXT:  %2 = sub i32 32, %1
 ; ALL-NEXT:  %3 = add i32 %2, 1
@@ -170,10 +183,13 @@ while.end:
 ; Function Attrs: norecurse nounwind readnone uwtable
 define i32 @ctlz_add(i32 %n, i32 %i0) {
 entry:
+  %c = icmp sgt i32 %n, 0
+  %negn = sub nsw i32 0, %n
+  %abs_n = select i1 %c, i32 %n, i32 %negn
   br label %while.cond
 
 while.cond:                                       ; preds = %while.cond, %entry
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
+  %n.addr.0 = phi i32 [ %abs_n, %entry ], [ %shr, %while.cond ]
   %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
   %shr = ashr i32 %n.addr.0, 1
   %tobool = icmp eq i32 %shr, 0
@@ -189,7 +205,6 @@ while.end:
 ; all ones and continue doing so. This prevents the loop from terminating. If
 ; we convert this to a countable loop using ctlz that loop will only run 32
 ; times. This is different than the infinite number of times of the original.
-; FIXME: Don't transform this loop.
 define i32 @foo(i32 %x) {
 ; LZCNT-LABEL: @foo(
 ; LZCNT-NEXT:  entry:
@@ -197,21 +212,17 @@ define i32 @foo(i32 %x) {
 ; LZCNT-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[X:%.*]], 0
 ; LZCNT-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_LR_PH:%.*]]
 ; LZCNT:       while.body.lr.ph:
-; LZCNT-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
 ; LZCNT-NEXT:    br label [[WHILE_BODY:%.*]]
 ; LZCNT:       while.body:
-; LZCNT-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_LR_PH]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
 ; LZCNT-NEXT:    [[CNT_06:%.*]] = phi i32 [ 0, [[WHILE_BODY_LR_PH]] ], [ [[INC:%.*]], [[WHILE_BODY]] ]
 ; LZCNT-NEXT:    [[X_ADDR_05:%.*]] = phi i32 [ [[X]], [[WHILE_BODY_LR_PH]] ], [ [[SHR:%.*]], [[WHILE_BODY]] ]
 ; LZCNT-NEXT:    [[SHR]] = ashr i32 [[X_ADDR_05]], 1
 ; LZCNT-NEXT:    [[INC]] = add i32 [[CNT_06]], 1
 ; LZCNT-NEXT:    store volatile i8 42, i8* [[V]], align 1
-; LZCNT-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
 ; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY]]
 ; LZCNT:       while.cond.while.end_crit_edge:
-; LZCNT-NEXT:    [[SPLIT:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY]] ]
+; LZCNT-NEXT:    [[SPLIT:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
 ; LZCNT-NEXT:    br label [[WHILE_END]]
 ; LZCNT:       while.end:
 ; LZCNT-NEXT:    [[CNT_0_LCSSA:%.*]] = phi i32 [ [[SPLIT]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]




More information about the llvm-commits mailing list