[llvm-branch-commits] [llvm] 25067f1 - [LoopIdiomRecognize] Teach detectShiftUntilZeroIdiom to recognize loops where the counter is decrementing.

Craig Topper via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 14 14:30:15 PST 2020


Author: Craig Topper
Date: 2020-12-14T14:25:05-08:00
New Revision: 25067f179f33ba1b764ac7a7d83385c8fd73801f

URL: https://github.com/llvm/llvm-project/commit/25067f179f33ba1b764ac7a7d83385c8fd73801f
DIFF: https://github.com/llvm/llvm-project/commit/25067f179f33ba1b764ac7a7d83385c8fd73801f.diff

LOG: [LoopIdiomRecognize] Teach detectShiftUntilZeroIdiom to recognize loops where the counter is decrementing.

This adds support for loops like

unsigned clz(unsigned x) {
    unsigned w = sizeof (x) * CHAR_BIT;
    while (x) {
        w--;
        x >>= 1;
    }

    return w;
}

and

unsigned clz(unsigned x) {
    unsigned w = sizeof (x) * CHAR_BIT - 1;
    while (x >>= 1) {
        w--;
    }

    return w;
}

To support these we look for add x, -1 as well as add x, 1 that
we already matched. If the value was -1 we need to subtract from
the initial counter value instead of adding to it.

Fixes PR48404.

Differential Revision: https://reviews.llvm.org/D92745

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
    llvm/test/Transforms/LoopIdiom/X86/cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 7e69cc5beffe..15d1e8da5baa 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1475,6 +1475,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
     return false;
 
   // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+  //         or cnt.next = cnt + -1.
   // TODO: We can skip the step. If loop trip count is known (CTLZ),
   //       then all uses of "cnt.next" could be optimized to the trip count
   //       plus "cnt0". Currently it is not optimized.
@@ -1488,7 +1489,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
       continue;
 
     ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
-    if (!Inc || !Inc->isOne())
+    if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
       continue;
 
     PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
@@ -1751,11 +1752,18 @@ void LoopIdiomRecognize::transformLoopToCountable(
   NewCount = Builder.CreateZExtOrTrunc(NewCount,
                                        cast<IntegerType>(CntInst->getType()));
 
-  // If the counter's initial value is not zero, insert Add Inst.
   Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
-  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
-  if (!InitConst || !InitConst->isZero())
-    NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+  if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
+    // If the counter was being incremented in the loop, add NewCount to the
+    // counter's initial value, but only if the initial value is not zero.
+    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+    if (!InitConst || !InitConst->isZero())
+      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+  } else {
+    // If the count was being decremented in the loop, subtract NewCount from
+    // the counter's initial value.
+    NewCount = Builder.CreateSub(CntInitVal, NewCount);
+  }
 
   // Step 2: Insert new IV and loop condition:
   // loop:

diff  --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
index 6d3863a0ee33..4724adb030a4 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
@@ -693,16 +693,21 @@ define i32 @ctlz_decrement(i32 %n) {
 ; ALL-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
 ; ALL-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; ALL:       while.body.preheader:
+; ALL-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[N]], i1 true)
+; ALL-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; ALL-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
 ; ALL-NEXT:    br label [[WHILE_BODY:%.*]]
 ; ALL:       while.body:
+; ALL-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
 ; ALL-NEXT:    [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
 ; ALL-NEXT:    [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
 ; ALL-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_05]], 1
 ; ALL-NEXT:    [[INC]] = add nsw i32 [[I_06]], -1
-; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
 ; ALL-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
 ; ALL:       while.end.loopexit:
-; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
 ; ALL-NEXT:    br label [[WHILE_END]]
 ; ALL:       while.end:
 ; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
@@ -747,16 +752,23 @@ while.end:                                        ; preds = %while.end.loopexit,
 define i32 @ctlz_lshr_decrement(i32 %n) {
 ; ALL-LABEL: @ctlz_lshr_decrement(
 ; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP0:%.*]] = lshr i32 [[N:%.*]], 1
+; ALL-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; ALL-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; ALL-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; ALL-NEXT:    [[TMP4:%.*]] = sub i32 31, [[TMP2]]
 ; ALL-NEXT:    br label [[WHILE_COND:%.*]]
 ; ALL:       while.cond:
-; ALL-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
 ; ALL-NEXT:    [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
 ; ALL-NEXT:    [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
 ; ALL-NEXT:    [[INC]] = add nsw i32 [[I_0]], -1
 ; ALL-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
 ; ALL:       while.end:
-; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
 ; ALL-NEXT:    ret i32 [[I_0_LCSSA]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
index 642eb11d2d7f..cc8a5388e431 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
@@ -133,16 +133,21 @@ define i32 @cttz_decrement(i32 %n) {
 ; ALL-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
 ; ALL-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; ALL:       while.body.preheader:
+; ALL-NEXT:    [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[N]], i1 true)
+; ALL-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; ALL-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
 ; ALL-NEXT:    br label [[WHILE_BODY:%.*]]
 ; ALL:       while.body:
+; ALL-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
 ; ALL-NEXT:    [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
 ; ALL-NEXT:    [[N_ADDR_05:%.*]] = phi i32 [ [[SHL:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
 ; ALL-NEXT:    [[SHL]] = shl i32 [[N_ADDR_05]], 1
 ; ALL-NEXT:    [[INC]] = add nsw i32 [[I_06]], -1
-; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[SHL]], 0
+; ALL-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
 ; ALL-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
 ; ALL:       while.end.loopexit:
-; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; ALL-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
 ; ALL-NEXT:    br label [[WHILE_END]]
 ; ALL:       while.end:
 ; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
@@ -187,16 +192,23 @@ while.end:                                        ; preds = %while.end.loopexit,
 define i32 @cttz_shl_decrement(i32 %n) {
 ; ALL-LABEL: @cttz_shl_decrement(
 ; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP0:%.*]] = shl i32 [[N:%.*]], 1
+; ALL-NEXT:    [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
+; ALL-NEXT:    [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; ALL-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; ALL-NEXT:    [[TMP4:%.*]] = sub i32 31, [[TMP2]]
 ; ALL-NEXT:    br label [[WHILE_COND:%.*]]
 ; ALL:       while.cond:
-; ALL-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHL:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHL:%.*]], [[WHILE_COND]] ]
 ; ALL-NEXT:    [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
 ; ALL-NEXT:    [[SHL]] = shl i32 [[N_ADDR_0]], 1
-; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[SHL]], 0
+; ALL-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
 ; ALL-NEXT:    [[INC]] = add nsw i32 [[I_0]], -1
 ; ALL-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
 ; ALL:       while.end:
-; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
+; ALL-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
 ; ALL-NEXT:    ret i32 [[I_0_LCSSA]]
 ;
 entry:


        


More information about the llvm-branch-commits mailing list