[llvm] a0ddc61 - [LoopIdiom] 'left-shift until bittest' idiom: support canonical sign bit mask
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 23 11:28:27 PST 2020
Author: Roman Lebedev
Date: 2020-12-23T22:28:09+03:00
New Revision: a0ddc61c5b95be2585b6b4981cf8634d7a00e4a6
URL: https://github.com/llvm/llvm-project/commit/a0ddc61c5b95be2585b6b4981cf8634d7a00e4a6
DIFF: https://github.com/llvm/llvm-project/commit/a0ddc61c5b95be2585b6b4981cf8634d7a00e4a6.diff
LOG: [LoopIdiom] 'left-shift until bittest' idiom: support canonical sign bit mask
If the bitmask is for sign bit, instcombine would have canonicalized
the pattern into a proper sign bit check. Supporting that is still
simple, but requires a bit of a roundtrip - we first have to use
`decomposeBitTestICmp()`, and the rest again just works.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D91726
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f71f320aceb5..02556fb79dc5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,6 +47,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -1961,7 +1962,8 @@ inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
/// \endcode
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
Value *&BitMask, Value *&BitPos,
- Value *&CurrX, Value *&NextX) {
+ Value *&CurrX, Value *&NextX,
+ size_t &CanonicalHeaderSize) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" Performing shift-until-bittest idiom detection.\n");
@@ -1992,6 +1994,7 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
// Step 2: Check if the backedge's condition is in desirable form.
auto MatchVariableBitMask = [&]() {
+ CanonicalHeaderSize = 5;
return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
match(CmpLHS,
m_c_And(m_Value(CurrX),
@@ -2001,14 +2004,24 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
CurLoop))));
};
auto MatchConstantBitMask = [&]() {
+ CanonicalHeaderSize = 5;
return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
match(CmpLHS, m_And(m_Value(CurrX),
m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
(BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
};
+ auto MatchDecomposableConstantBitMask = [&]() {
+ CanonicalHeaderSize = 4;
+
+ APInt Mask;
+ return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
+ ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
+ (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
+ (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
+ };
- if (!MatchVariableBitMask() && !MatchConstantBitMask()) {
- // FIXME: support sign bit test (use llvm::decomposeBitTestICmp()).
+ if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
+ !MatchDecomposableConstantBitMask()) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
return false;
}
@@ -2097,8 +2110,9 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
bool MadeChange = false;
Value *X, *BitMask, *BitPos, *XCurr, *XNext;
- if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
- XNext)) {
+ size_t CanonicalHeaderSize;
+ if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, XNext,
+ CanonicalHeaderSize)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
" shift-until-bittest idiom detection failed.\n");
return MadeChange;
@@ -2118,7 +2132,6 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop must not have any other instructions other than the idiom itself.
// FIXME: we could just rewrite the loop with countable trip count.
size_t HeaderSize = LoopHeaderBB->sizeWithoutDebug();
- constexpr size_t CanonicalHeaderSize = 5;
assert(HeaderSize >= CanonicalHeaderSize);
if (HeaderSize > CanonicalHeaderSize) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Won't be able to delete loop!\n");
diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
index b5061efe73a5..53d33cb7a005 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
@@ -451,20 +451,42 @@ end:
}
define i32 @p7_constant_mask_signbit_canonical(i32 %x) {
-; ALL-LABEL: @p7_constant_mask_signbit_canonical(
-; ALL-NEXT: entry:
-; ALL-NEXT: br label [[LOOP:%.*]], [[DBG116:!dbg !.*]]
-; ALL: loop:
-; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG117:!dbg !.*]]
-; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG117]]
-; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]]
-; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]]
-; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]]
-; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]]
-; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG120:!dbg !.*]]
-; ALL: end:
-; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG117]]
-; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]]
+; LZCNT-LABEL: @p7_constant_mask_signbit_canonical(
+; LZCNT-NEXT: entry:
+; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG116:!dbg !.*]]
+; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG116]]
+; LZCNT-NEXT: [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG116]]
+; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG116]]
+; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG116]]
+; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG116]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG116]]
+; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG117:!dbg !.*]]
+; LZCNT: loop:
+; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG116]]
+; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG116]]
+; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]]
+; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]]
+; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]]
+; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]]
+; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG120:!dbg !.*]]
+; LZCNT: end:
+; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG116]]
+; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]]
+;
+; NOLZCNT-LABEL: @p7_constant_mask_signbit_canonical(
+; NOLZCNT-NEXT: entry:
+; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG116:!dbg !.*]]
+; NOLZCNT: loop:
+; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG117:!dbg !.*]]
+; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG117]]
+; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]]
+; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]]
+; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]]
+; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]]
+; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG120:!dbg !.*]]
+; NOLZCNT: end:
+; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG117]]
+; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]]
;
entry:
br label %loop
@@ -677,20 +699,42 @@ end:
; ICmp-Br are commutative
define i32 @p11(i32 %x) {
-; ALL-LABEL: @p11(
-; ALL-NEXT: entry:
-; ALL-NEXT: br label [[LOOP:%.*]], [[DBG172:!dbg !.*]]
-; ALL: loop:
-; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG173:!dbg !.*]]
-; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG173]]
-; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]]
-; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]]
-; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]]
-; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]]
-; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]]
-; ALL: end:
-; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG173]]
-; ALL-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]]
+; LZCNT-LABEL: @p11(
+; LZCNT-NEXT: entry:
+; LZCNT-NEXT: [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG172:!dbg !.*]]
+; LZCNT-NEXT: [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG172]]
+; LZCNT-NEXT: [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG172]]
+; LZCNT-NEXT: [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG172]]
+; LZCNT-NEXT: [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG172]]
+; LZCNT-NEXT: [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG172]]
+; LZCNT-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG172]]
+; LZCNT-NEXT: br label [[LOOP:%.*]], [[DBG173:!dbg !.*]]
+; LZCNT: loop:
+; LZCNT-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG172]]
+; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG172]]
+; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]]
+; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]]
+; LZCNT-NEXT: [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]]
+; LZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]]
+; LZCNT-NEXT: br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]]
+; LZCNT: end:
+; LZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG172]]
+; LZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]]
+;
+; NOLZCNT-LABEL: @p11(
+; NOLZCNT-NEXT: entry:
+; NOLZCNT-NEXT: br label [[LOOP:%.*]], [[DBG172:!dbg !.*]]
+; NOLZCNT: loop:
+; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG173:!dbg !.*]]
+; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG173]]
+; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]]
+; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]]
+; NOLZCNT-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]]
+; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]]
+; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]]
+; NOLZCNT: end:
+; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG173]]
+; NOLZCNT-NEXT: ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]]
;
entry:
br label %loop
More information about the llvm-commits
mailing list