[llvm-branch-commits] [llvm] 25067f1 - [LoopIdiomRecognize] Teach detectShiftUntilZeroIdiom to recognize loops where the counter is decrementing.
Craig Topper via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 14 14:30:15 PST 2020
Author: Craig Topper
Date: 2020-12-14T14:25:05-08:00
New Revision: 25067f179f33ba1b764ac7a7d83385c8fd73801f
URL: https://github.com/llvm/llvm-project/commit/25067f179f33ba1b764ac7a7d83385c8fd73801f
DIFF: https://github.com/llvm/llvm-project/commit/25067f179f33ba1b764ac7a7d83385c8fd73801f.diff
LOG: [LoopIdiomRecognize] Teach detectShiftUntilZeroIdiom to recognize loops where the counter is decrementing.
This adds support for loops like
unsigned clz(unsigned x) {
unsigned w = sizeof (x) * CHAR_BIT;
while (x) {
w--;
x >>= 1;
}
return w;
}
and
unsigned clz(unsigned x) {
unsigned w = sizeof (x) * CHAR_BIT - 1;
while (x >>= 1) {
w--;
}
return w;
}
To support these we look for add x, -1 as well as add x, 1 that
we already matched. If the value was -1 we need to subtract from
the initial counter value instead of adding to it.
Fixes PR48404.
Differential Revision: https://reviews.llvm.org/D92745
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
llvm/test/Transforms/LoopIdiom/X86/cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 7e69cc5beffe..15d1e8da5baa 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1475,6 +1475,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
return false;
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+ // or cnt.next = cnt + -1.
// TODO: We can skip the step. If loop trip count is known (CTLZ),
// then all uses of "cnt.next" could be optimized to the trip count
// plus "cnt0". Currently it is not optimized.
@@ -1488,7 +1489,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
continue;
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
- if (!Inc || !Inc->isOne())
+ if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
@@ -1751,11 +1752,18 @@ void LoopIdiomRecognize::transformLoopToCountable(
NewCount = Builder.CreateZExtOrTrunc(NewCount,
cast<IntegerType>(CntInst->getType()));
- // If the counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
- ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
- if (!InitConst || !InitConst->isZero())
- NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
+ // If the counter was being incremented in the loop, add NewCount to the
+ // counter's initial value, but only if the initial value is not zero.
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero())
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ } else {
+ // If the count was being decremented in the loop, subtract NewCount from
+ // the counter's initial value.
+ NewCount = Builder.CreateSub(CntInitVal, NewCount);
+ }
// Step 2: Insert new IV and loop condition:
// loop:
diff --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
index 6d3863a0ee33..4724adb030a4 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
@@ -693,16 +693,21 @@ define i32 @ctlz_decrement(i32 %n) {
; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; ALL: while.body.preheader:
+; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[N]], i1 true)
+; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
; ALL-NEXT: br label [[WHILE_BODY:%.*]]
; ALL: while.body:
+; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_05]], 1
; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], -1
-; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; ALL: while.end.loopexit:
-; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
; ALL-NEXT: br label [[WHILE_END]]
; ALL: while.end:
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
@@ -747,16 +752,23 @@ while.end: ; preds = %while.end.loopexit,
define i32 @ctlz_lshr_decrement(i32 %n) {
; ALL-LABEL: @ctlz_lshr_decrement(
; ALL-NEXT: entry:
+; ALL-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], 1
+; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
+; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; ALL-NEXT: [[TMP4:%.*]] = sub i32 31, [[TMP2]]
; ALL-NEXT: br label [[WHILE_COND:%.*]]
; ALL: while.cond:
-; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
-; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
; ALL: while.end:
-; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
+; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
; ALL-NEXT: ret i32 [[I_0_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
index 642eb11d2d7f..cc8a5388e431 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
@@ -133,16 +133,21 @@ define i32 @cttz_decrement(i32 %n) {
; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; ALL: while.body.preheader:
+; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[N]], i1 true)
+; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
+; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
; ALL-NEXT: br label [[WHILE_BODY:%.*]]
; ALL: while.body:
+; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHL:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
; ALL-NEXT: [[SHL]] = shl i32 [[N_ADDR_05]], 1
; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], -1
-; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHL]], 0
+; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; ALL: while.end.loopexit:
-; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
; ALL-NEXT: br label [[WHILE_END]]
; ALL: while.end:
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
@@ -187,16 +192,23 @@ while.end: ; preds = %while.end.loopexit,
define i32 @cttz_shl_decrement(i32 %n) {
; ALL-LABEL: @cttz_shl_decrement(
; ALL-NEXT: entry:
+; ALL-NEXT: [[TMP0:%.*]] = shl i32 [[N:%.*]], 1
+; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
+; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
+; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
+; ALL-NEXT: [[TMP4:%.*]] = sub i32 31, [[TMP2]]
; ALL-NEXT: br label [[WHILE_COND:%.*]]
; ALL: while.cond:
-; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHL:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHL:%.*]], [[WHILE_COND]] ]
; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
; ALL-NEXT: [[SHL]] = shl i32 [[N_ADDR_0]], 1
-; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHL]], 0
+; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
+; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
; ALL: while.end:
-; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
+; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
; ALL-NEXT: ret i32 [[I_0_LCSSA]]
;
entry:
More information about the llvm-branch-commits
mailing list