[llvm] r350074 - [LoopIdiomRecognize] Add CTTZ support

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 26 13:59:48 PST 2018


Author: ctopper
Date: Wed Dec 26 13:59:48 2018
New Revision: 350074

URL: http://llvm.org/viewvc/llvm-project?rev=350074&view=rev
Log:
[LoopIdiomRecognize] Add CTTZ support

Summary:
Existing LIR recognizes CTLZ where shifting input variable right until it is zero. (Shift-Until-Zero idiom)

This commit:
1. Augments Shift-Until-Zero idiom to recognize CTTZ where input variable is shifted left.
2. Prepare for BitScan idiom recognition.

Patch by Yuanfang Chen (tabloid.adroit)

Reviewers: craig.topper, evstupac

Reviewed By: craig.topper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D55876

Added:
    llvm/trunk/test/Transforms/LoopIdiom/X86/cttz.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=350074&r1=350073&r2=350074&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Wed Dec 26 13:59:48 2018
@@ -26,7 +26,7 @@
 // Future floating point idioms to recognize in -ffast-math mode:
 //   fpowi
 // Future integer operation idioms to recognize:
-//   ctpop, ctlz, cttz
+//   ctpop
 //
 // Beware that isel's default lowering for ctpop is highly inefficient for
 // i64 and larger types when i64 is legal and the value has few bits set.  It
@@ -187,9 +187,10 @@ private:
   bool recognizePopcount();
   void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
                                PHINode *CntPhi, Value *Var);
-  bool recognizeAndInsertCTLZ();
-  void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
-                                PHINode *CntPhi, Value *Var, Instruction *DefX,
+  bool recognizeAndInsertFFS();  /// Find First Set: ctlz or cttz
+  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
+                                Instruction *CntInst, PHINode *CntPhi,
+                                Value *Var, Instruction *DefX,
                                 const DebugLoc &DL, bool ZeroCheck,
                                 bool IsCntPhiUsedOutsideLoop);
 
@@ -1108,15 +1109,17 @@ bool LoopIdiomRecognize::avoidLIRForMult
 }
 
 bool LoopIdiomRecognize::runOnNoncountableLoop() {
-  return recognizePopcount() || recognizeAndInsertCTLZ();
+  return recognizePopcount() || recognizeAndInsertFFS();
 }
 
 /// Check if the given conditional branch is based on the comparison between
-/// a variable and zero, and if the variable is non-zero, the control yields to
-/// the loop entry. If the branch matches the behavior, the variable involved
-/// in the comparison is returned. This function will be called to see if the
-/// precondition and postcondition of the loop are in desirable form.
-static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
+/// true), the control yields to the loop entry. If the branch matches the
+/// behavior, the variable involved in the comparison is returned. This function
+/// will be called to see if the precondition and postcondition of the loop are
+/// in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
+                             bool JmpOnZero = false) {
   if (!BI || !BI->isConditional())
     return nullptr;
 
@@ -1128,9 +1131,14 @@ static Value *matchCondition(BranchInst
   if (!CmpZero || !CmpZero->isZero())
     return nullptr;
 
+  BasicBlock *TrueSucc = BI->getSuccessor(0);
+  BasicBlock *FalseSucc = BI->getSuccessor(1);
+  if (JmpOnZero)
+    std::swap(TrueSucc, FalseSucc);
+
   ICmpInst::Predicate Pred = Cond->getPredicate();
-  if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
-      (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
+      (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
     return Cond->getOperand(0);
 
   return nullptr;
@@ -1306,14 +1314,14 @@ static bool detectPopcountIdiom(Loop *Cu
 ///
 /// loop-exit:
 /// \endcode
-static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
-                            Instruction *&CntInst, PHINode *&CntPhi,
-                            Instruction *&DefX) {
+static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
+                                      Intrinsic::ID &IntrinID, Value *&InitX,
+                                      Instruction *&CntInst, PHINode *&CntPhi,
+                                      Instruction *&DefX) {
   BasicBlock *LoopEntry;
   Value *VarX = nullptr;
 
   DefX = nullptr;
-  PhiX = nullptr;
   CntInst = nullptr;
   CntPhi = nullptr;
   LoopEntry = *(CurLoop->block_begin());
@@ -1325,20 +1333,28 @@ static bool detectCTLZIdiom(Loop *CurLoo
   else
     return false;
 
-  // step 2: detect instructions corresponding to "x.next = x >> 1"
-  if (!DefX || (DefX->getOpcode() != Instruction::AShr &&
-                DefX->getOpcode() != Instruction::LShr))
+  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
+  if (!DefX || !DefX->isShift())
     return false;
+  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
+                                                     Intrinsic::ctlz;
   ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
   if (!Shft || !Shft->isOne())
     return false;
   VarX = DefX->getOperand(0);
 
   // step 3: Check the recurrence of variable X
-  PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
   if (!PhiX)
     return false;
 
+  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+  // Make sure the initial value can't be negative otherwise the ashr in the
+  // loop might never reach zero which would make the loop infinite.
+  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
+    return false;
+
   // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
   // TODO: We can skip the step. If loop trip count is known (CTLZ),
   //       then all uses of "cnt.next" could be optimized to the trip count
@@ -1370,17 +1386,25 @@ static bool detectCTLZIdiom(Loop *CurLoo
   return true;
 }
 
-/// Recognize CTLZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ trip count).
-/// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
   // Give up if the loop has multiple blocks or multiple backedges.
   if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
     return false;
 
-  Instruction *CntInst, *DefX;
-  PHINode *CntPhi, *PhiX;
-  if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
+  Intrinsic::ID IntrinID;
+  Value *InitX;
+  Instruction *DefX = nullptr;
+  PHINode *CntPhi = nullptr;
+  Instruction *CntInst = nullptr;
+  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+  // this is always 6.
+  size_t IdiomCanonicalSize = 6;
+
+  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+                                 CntInst, CntPhi, DefX))
     return false;
 
   bool IsCntPhiUsedOutsideLoop = false;
@@ -1407,12 +1431,6 @@ bool LoopIdiomRecognize::recognizeAndIns
   // It is safe to assume Preheader exist as it was checked in
   // parent function RunOnLoop.
   BasicBlock *PH = CurLoop->getLoopPreheader();
-  Value *InitX = PhiX->getIncomingValueForBlock(PH);
-
-  // Make sure the initial value can't be negative otherwise the ashr in the
-  // loop might never reach zero which would make the loop infinite.
-  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))
-    return false;
 
   // If we are using the count instruction outside the loop, make sure we
   // have a zero check as a precondition. Without the check the loop would run
@@ -1430,8 +1448,10 @@ bool LoopIdiomRecognize::recognizeAndIns
     ZeroCheck = true;
   }
 
-  // Check if CTLZ intrinsic is profitable. Assume it is always profitable
-  // if we delete the loop (the loop has only 6 instructions):
+  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+  // profitable if we delete the loop.
+
+  // the loop has only 6 instructions:
   //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
   //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
   //  %shr = ashr %n.addr.0, 1
@@ -1442,12 +1462,12 @@ bool LoopIdiomRecognize::recognizeAndIns
   const Value *Args[] =
       {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
                         : ConstantInt::getFalse(InitX->getContext())};
-  if (CurLoop->getHeader()->size() != 6 &&
-      TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
-          TargetTransformInfo::TCC_Basic)
+  if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+      TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
+        TargetTransformInfo::TCC_Basic)
     return false;
 
-  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX,
+  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
                            DefX->getDebugLoc(), ZeroCheck,
                            IsCntPhiUsedOutsideLoop);
   return true;
@@ -1516,20 +1536,21 @@ static CallInst *createPopcntIntrinsic(I
   return CI;
 }
 
-static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
-                                     const DebugLoc &DL, bool ZeroCheck) {
+static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+                                    const DebugLoc &DL, bool ZeroCheck,
+                                    Intrinsic::ID IID) {
   Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
   Type *Tys[] = {Val->getType()};
 
   Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
-  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
+  Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
   CallInst *CI = IRBuilder.CreateCall(Func, Ops);
   CI->setDebugLoc(DL);
 
   return CI;
 }
 
-/// Transform the following loop:
+/// Transform the following loop (Using CTLZ, CTTZ is similar):
 /// loop:
 ///   CntPhi = PHI [Cnt0, CntInst]
 ///   PhiX = PHI [InitX, DefX]
@@ -1561,19 +1582,19 @@ static CallInst *createCTLZIntrinsic(IRB
 /// If LOOP_BODY is empty the loop will be deleted.
 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
 void LoopIdiomRecognize::transformLoopToCountable(
-    BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
-    Instruction *DefX, const DebugLoc &DL, bool ZeroCheck,
-    bool IsCntPhiUsedOutsideLoop) {
+    Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
+    PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
+    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
-  // Step 1: Insert the CTLZ instruction at the end of the preheader block
-  //   Count = BitWidth - CTLZ(InitX);
-  // If there are uses of CntPhi create:
-  //   CountPrev = BitWidth - CTLZ(InitX >> 1);
+  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
   IRBuilder<> Builder(PreheaderBr);
   Builder.SetCurrentDebugLocation(DL);
-  Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
+  Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
 
+  //   Count = BitWidth - CTLZ(InitX);
+  // If there are uses of CntPhi create:
+  //   CountPrev = BitWidth - CTLZ(InitX >> 1);
   if (IsCntPhiUsedOutsideLoop) {
     if (DefX->getOpcode() == Instruction::AShr)
       InitXNext =
@@ -1581,29 +1602,30 @@ void LoopIdiomRecognize::transformLoopTo
     else if (DefX->getOpcode() == Instruction::LShr)
       InitXNext =
           Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
+    else if (DefX->getOpcode() == Instruction::Shl) // cttz
+      InitXNext =
+          Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
     else
       llvm_unreachable("Unexpected opcode!");
   } else
     InitXNext = InitX;
-  CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
+  FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
   Count = Builder.CreateSub(
-      ConstantInt::get(CTLZ->getType(),
-                       CTLZ->getType()->getIntegerBitWidth()),
-      CTLZ);
+      ConstantInt::get(FFS->getType(),
+                       FFS->getType()->getIntegerBitWidth()),
+      FFS);
   if (IsCntPhiUsedOutsideLoop) {
     CountPrev = Count;
     Count = Builder.CreateAdd(
         CountPrev,
         ConstantInt::get(CountPrev->getType(), 1));
   }
-  if (IsCntPhiUsedOutsideLoop)
-    NewCount = Builder.CreateZExtOrTrunc(CountPrev,
-        cast<IntegerType>(CntInst->getType()));
-  else
-    NewCount = Builder.CreateZExtOrTrunc(Count,
-        cast<IntegerType>(CntInst->getType()));
 
-  // If the CTLZ counter's initial value is not zero, insert Add Inst.
+  NewCount = Builder.CreateZExtOrTrunc(
+                      IsCntPhiUsedOutsideLoop ? CountPrev : Count,
+                      cast<IntegerType>(CntInst->getType()));
+
+  // If the counter's initial value is not zero, insert Add Inst.
   Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
   ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
   if (!InitConst || !InitConst->isZero())
@@ -1639,8 +1661,7 @@ void LoopIdiomRecognize::transformLoopTo
   LbCond->setOperand(1, ConstantInt::get(Ty, 0));
 
   // Step 3: All the references to the original counter outside
-  //  the loop are replaced with the NewCount -- the value returned from
-  //  __builtin_ctlz(x).
+  //  the loop are replaced with the NewCount
   if (IsCntPhiUsedOutsideLoop)
     CntPhi->replaceUsesOutsideBlock(NewCount, Body);
   else

Added: llvm/trunk/test/Transforms/LoopIdiom/X86/cttz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/X86/cttz.ll?rev=350074&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopIdiom/X86/cttz.ll (added)
+++ llvm/trunk/test/Transforms/LoopIdiom/X86/cttz.ll Wed Dec 26 13:59:48 2018
@@ -0,0 +1,82 @@
+; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck --check-prefix=ALL %s
+; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck --check-prefix=ALL %s
+
+; Recognize CTTZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int cttz_zero_check(int n)
+; {
+;   int i = 0;
+;   while(n) {
+;     n <<= 1;
+;     i++;
+;   }
+;   return i;
+; }
+;
+; ALL-LABEL: @cttz_zero_check
+; ALL:       %0 = call i32 @llvm.cttz.i32(i32 %n, i1 true)
+; ALL-NEXT:  %1 = sub i32 32, %0
+;
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @cttz_zero_check(i32 %n) {
+entry:
+  %tobool4 = icmp eq i32 %n, 0
+  br i1 %tobool4, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
+  %n.addr.05 = phi i32 [ %shl, %while.body ], [ %n, %while.body.preheader ]
+  %shl = shl i32 %n.addr.05, 1
+  %inc = add nsw i32 %i.06, 1
+  %tobool = icmp eq i32 %shl, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
+; Recognize CTTZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int cttz(int n)
+; {
+;   int i = 0;
+;   while(n <<= 1) {
+;     i++;
+;   }
+;   return i;
+; }
+;
+; ALL-LABEL: @cttz
+; ALL:      %0 = shl i32 %n, 1
+; ALL-NEXT: %1 = call i32 @llvm.cttz.i32(i32 %0, i1 false)
+; ALL-NEXT: %2 = sub i32 32, %1
+; ALL-NEXT: %3 = add i32 %2, 1
+;
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @cttz(i32 %n) {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %shl, %while.cond ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
+  %shl = shl i32 %n.addr.0, 1
+  %tobool = icmp eq i32 %shl, 0
+  %inc = add nsw i32 %i.0, 1
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %i.0
+}
+




More information about the llvm-commits mailing list