[llvm] a0ddc61 - [LoopIdiom] 'left-shift until bittest' idiom: support canonical sign bit mask

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 23 11:28:27 PST 2020


Author: Roman Lebedev
Date: 2020-12-23T22:28:09+03:00
New Revision: a0ddc61c5b95be2585b6b4981cf8634d7a00e4a6

URL: https://github.com/llvm/llvm-project/commit/a0ddc61c5b95be2585b6b4981cf8634d7a00e4a6
DIFF: https://github.com/llvm/llvm-project/commit/a0ddc61c5b95be2585b6b4981cf8634d7a00e4a6.diff

LOG: [LoopIdiom] 'left-shift until bittest' idiom: support canonical sign bit mask

If the bitmask is for sign bit, instcombine would have canonicalized
the pattern into a proper sign bit check. Supporting that is still
simple, but requires a bit of a roundtrip - we first have to use
`decomposeBitTestICmp()`, and the rest again just works.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D91726

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f71f320aceb5..02556fb79dc5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,6 +47,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -1961,7 +1962,8 @@ inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
 /// \endcode
 static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
                                          Value *&BitMask, Value *&BitPos,
-                                         Value *&CurrX, Value *&NextX) {
+                                         Value *&CurrX, Value *&NextX,
+                                         size_t &CanonicalHeaderSize) {
   LLVM_DEBUG(dbgs() << DEBUG_TYPE
              " Performing shift-until-bittest idiom detection.\n");
 
@@ -1992,6 +1994,7 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
   // Step 2: Check if the backedge's condition is in desirable form.
 
   auto MatchVariableBitMask = [&]() {
+    CanonicalHeaderSize = 5;
     return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
            match(CmpLHS,
                  m_c_And(m_Value(CurrX),
@@ -2001,14 +2004,24 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
                                              CurLoop))));
   };
   auto MatchConstantBitMask = [&]() {
+    CanonicalHeaderSize = 5;
     return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
            match(CmpLHS, m_And(m_Value(CurrX),
                                m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
            (BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
   };
+  auto MatchDecomposableConstantBitMask = [&]() {
+    CanonicalHeaderSize = 4;
+
+    APInt Mask;
+    return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
+           ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
+           (BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
+           (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
+  };
 
-  if (!MatchVariableBitMask() && !MatchConstantBitMask()) {
-    // FIXME: support sign bit test (use llvm::decomposeBitTestICmp()).
+  if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
+      !MatchDecomposableConstantBitMask()) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
     return false;
   }
@@ -2097,8 +2110,9 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   bool MadeChange = false;
 
   Value *X, *BitMask, *BitPos, *XCurr, *XNext;
-  if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
-                                    XNext)) {
+  size_t CanonicalHeaderSize;
+  if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr, XNext,
+                                    CanonicalHeaderSize)) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE
                " shift-until-bittest idiom detection failed.\n");
     return MadeChange;
@@ -2118,7 +2132,6 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   // The loop must not have any other instructions other than the idiom itself.
   // FIXME: we could just rewrite the loop with countable trip count.
   size_t HeaderSize = LoopHeaderBB->sizeWithoutDebug();
-  constexpr size_t CanonicalHeaderSize = 5;
   assert(HeaderSize >= CanonicalHeaderSize);
   if (HeaderSize > CanonicalHeaderSize) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE " Won't be able to delete loop!\n");

diff  --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
index b5061efe73a5..53d33cb7a005 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll
@@ -451,20 +451,42 @@ end:
 }
 
 define i32 @p7_constant_mask_signbit_canonical(i32 %x) {
-; ALL-LABEL: @p7_constant_mask_signbit_canonical(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    br label [[LOOP:%.*]], [[DBG116:!dbg !.*]]
-; ALL:       loop:
-; ALL-NEXT:    [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG117:!dbg !.*]]
-; ALL-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG117]]
-; ALL-NEXT:    [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]]
-; ALL-NEXT:    call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]]
-; ALL-NEXT:    [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]]
-; ALL-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]]
-; ALL-NEXT:    br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG120:!dbg !.*]]
-; ALL:       end:
-; ALL-NEXT:    [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG117]]
-; ALL-NEXT:    ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]]
+; LZCNT-LABEL: @p7_constant_mask_signbit_canonical(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG116:!dbg !.*]]
+; LZCNT-NEXT:    [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG116]]
+; LZCNT-NEXT:    [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG116]]
+; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG116]]
+; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG116]]
+; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG116]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG116]]
+; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG117:!dbg !.*]]
+; LZCNT:       loop:
+; LZCNT-NEXT:    [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG116]]
+; LZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG116]]
+; LZCNT-NEXT:    [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]]
+; LZCNT-NEXT:    call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]]
+; LZCNT-NEXT:    [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]]
+; LZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]]
+; LZCNT-NEXT:    br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG120:!dbg !.*]]
+; LZCNT:       end:
+; LZCNT-NEXT:    [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG116]]
+; LZCNT-NEXT:    ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]]
+;
+; NOLZCNT-LABEL: @p7_constant_mask_signbit_canonical(
+; NOLZCNT-NEXT:  entry:
+; NOLZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG116:!dbg !.*]]
+; NOLZCNT:       loop:
+; NOLZCNT-NEXT:    [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG117:!dbg !.*]]
+; NOLZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META113:metadata !.*]], metadata !DIExpression()), [[DBG117]]
+; NOLZCNT-NEXT:    [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[X_CURR]], -1, [[DBG118:!dbg !.*]]
+; NOLZCNT-NEXT:    call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META114:metadata !.*]], metadata !DIExpression()), [[DBG118]]
+; NOLZCNT-NEXT:    [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG119:!dbg !.*]]
+; NOLZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META115:metadata !.*]], metadata !DIExpression()), [[DBG119]]
+; NOLZCNT-NEXT:    br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], [[DBG120:!dbg !.*]]
+; NOLZCNT:       end:
+; NOLZCNT-NEXT:    [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG117]]
+; NOLZCNT-NEXT:    ret i32 [[X_CURR_LCSSA]], [[DBG121:!dbg !.*]]
 ;
 entry:
   br label %loop
@@ -677,20 +699,42 @@ end:
 
 ; ICmp-Br are commutative
 define i32 @p11(i32 %x) {
-; ALL-LABEL: @p11(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    br label [[LOOP:%.*]], [[DBG172:!dbg !.*]]
-; ALL:       loop:
-; ALL-NEXT:    [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG173:!dbg !.*]]
-; ALL-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG173]]
-; ALL-NEXT:    [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]]
-; ALL-NEXT:    call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]]
-; ALL-NEXT:    [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]]
-; ALL-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]]
-; ALL-NEXT:    br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]]
-; ALL:       end:
-; ALL-NEXT:    [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG173]]
-; ALL-NEXT:    ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]]
+; LZCNT-LABEL: @p11(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[X_NUMLEADINGZEROS:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), [[DBG172:!dbg !.*]]
+; LZCNT-NEXT:    [[X_NUMACTIVEBITS:%.*]] = sub i32 32, [[X_NUMLEADINGZEROS]], [[DBG172]]
+; LZCNT-NEXT:    [[X_LEADINGONEPOS:%.*]] = add i32 [[X_NUMACTIVEBITS]], -1, [[DBG172]]
+; LZCNT-NEXT:    [[LOOP_BACKEDGETAKENCOUNT:%.*]] = sub i32 31, [[X_LEADINGONEPOS]], [[DBG172]]
+; LZCNT-NEXT:    [[LOOP_TRIPCOUNT:%.*]] = add nuw i32 [[LOOP_BACKEDGETAKENCOUNT]], 1, [[DBG172]]
+; LZCNT-NEXT:    [[X_CURR:%.*]] = shl i32 [[X]], [[LOOP_BACKEDGETAKENCOUNT]], [[DBG172]]
+; LZCNT-NEXT:    [[X_NEXT:%.*]] = shl i32 [[X]], [[LOOP_TRIPCOUNT]], [[DBG172]]
+; LZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG173:!dbg !.*]]
+; LZCNT:       loop:
+; LZCNT-NEXT:    [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[X_NEXT]], [[LOOP]] ], [[DBG172]]
+; LZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG172]]
+; LZCNT-NEXT:    [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]]
+; LZCNT-NEXT:    call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]]
+; LZCNT-NEXT:    [[TMP1:%.*]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]]
+; LZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]]
+; LZCNT-NEXT:    br i1 true, label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]]
+; LZCNT:       end:
+; LZCNT-NEXT:    [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG172]]
+; LZCNT-NEXT:    ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]]
+;
+; NOLZCNT-LABEL: @p11(
+; NOLZCNT-NEXT:  entry:
+; NOLZCNT-NEXT:    br label [[LOOP:%.*]], [[DBG172:!dbg !.*]]
+; NOLZCNT:       loop:
+; NOLZCNT-NEXT:    [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], [[DBG173:!dbg !.*]]
+; NOLZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_CURR]], [[META169:metadata !.*]], metadata !DIExpression()), [[DBG173]]
+; NOLZCNT-NEXT:    [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[X_CURR]], 0, [[DBG174:!dbg !.*]]
+; NOLZCNT-NEXT:    call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], [[META170:metadata !.*]], metadata !DIExpression()), [[DBG174]]
+; NOLZCNT-NEXT:    [[X_NEXT]] = shl i32 [[X_CURR]], 1, [[DBG175:!dbg !.*]]
+; NOLZCNT-NEXT:    call void @llvm.dbg.value(metadata i32 [[X_NEXT]], [[META171:metadata !.*]], metadata !DIExpression()), [[DBG175]]
+; NOLZCNT-NEXT:    br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], [[DBG176:!dbg !.*]]
+; NOLZCNT:       end:
+; NOLZCNT-NEXT:    [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], [[DBG173]]
+; NOLZCNT-NEXT:    ret i32 [[X_CURR_LCSSA]], [[DBG177:!dbg !.*]]
 ;
 entry:
   br label %loop


        


More information about the llvm-commits mailing list