[llvm] r296828 - [BypassSlowDivision] Refactor fast division insertion logic (NFC)
Nikolai Bozhenov via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 2 14:05:07 PST 2017
Author: n.bozhenov
Date: Thu Mar 2 16:05:07 2017
New Revision: 296828
URL: http://llvm.org/viewvc/llvm-project?rev=296828&view=rev
Log:
[BypassSlowDivision] Refactor fast division insertion logic (NFC)
The most important goal of the patch is to break large insertFastDiv function
into separate pieces, so that later a different fast insertion logic can be
implemented using some of these pieces.
Differential Revision: https://reviews.llvm.org/D29896
Modified:
llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp
Modified: llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp?rev=296828&r1=296827&r2=296828&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp Thu Mar 2 16:05:07 2017
@@ -36,12 +36,21 @@ namespace {
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
};
- struct DivPhiNodes {
- PHINode *Quotient;
- PHINode *Remainder;
+ struct QuotRemPair {
+ Value *Quotient;
+ Value *Remainder;
- DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
- : Quotient(InQuotient), Remainder(InRemainder) {}
+ QuotRemPair(Value *InQuotient, Value *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+
+ /// A quotient and remainder, plus a BB from which they logically "originate".
+ /// If you use Quotient or Remainder in a Phi node, you should use BB as its
+ /// corresponding predecessor.
+ struct QuotRemWithBB {
+ BasicBlock *BB = nullptr;
+ Value *Quotient = nullptr;
+ Value *Remainder = nullptr;
};
}
@@ -69,92 +78,174 @@ namespace llvm {
}
};
- typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+ typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
+ typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
}
-// insertFastDiv - Substitutes the div/rem instruction with code that checks the
-// value of the operands and uses a shorter-faster div/rem instruction when
-// possible and the longer-slower div/rem instruction otherwise.
-static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
- bool UseDivOp, bool UseSignedOp,
- DivCacheTy &PerBBDivCache) {
- Function *F = I->getParent()->getParent();
- // Get instruction operands
- Value *Dividend = I->getOperand(0);
- Value *Divisor = I->getOperand(1);
+namespace {
+class FastDivInsertionTask {
+ bool IsValidTask = false;
+ Instruction *SlowDivOrRem = nullptr;
+ IntegerType *BypassType = nullptr;
+ BasicBlock *MainBB = nullptr;
+
+ QuotRemWithBB createSlowBB(BasicBlock *Successor);
+ QuotRemWithBB createFastBB(BasicBlock *Successor);
+ QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
+ BasicBlock *PhiBB);
+ Value *insertOperandRuntimeCheck();
+ Optional<QuotRemPair> insertFastDivAndRem();
+
+ bool isSignedOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::SRem;
+ }
+ bool isDivisionOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::UDiv;
+ }
+ Type *getSlowType() { return SlowDivOrRem->getType(); }
- if (isa<ConstantInt>(Divisor)) {
- // Division by a constant should have been been solved and replaced earlier
- // in the pipeline.
- return false;
+public:
+ FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+ Value *getReplacement(DivCacheTy &Cache);
+};
+} // anonymous namespace
+
+FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
+ const BypassWidthsTy &BypassWidths) {
+ switch (I->getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ SlowDivOrRem = I;
+ break;
+ default:
+ // I is not a div/rem operation.
+ return;
}
- // If the numerator is a constant, bail if it doesn't fit into BypassType.
- if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
- if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
- return false;
+ // Skip division on vector types. Only optimize integer instructions.
+ IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
+ if (!SlowType)
+ return;
+
+ // Skip if this bitwidth is not bypassed.
+ auto BI = BypassWidths.find(SlowType->getBitWidth());
+ if (BI == BypassWidths.end())
+ return;
+
+ // Get type for div/rem instruction with bypass bitwidth.
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
+ BypassType = BT;
+
+ // The original basic block.
+ MainBB = I->getParent();
- // Basic Block is split before divide
- BasicBlock *MainBB = &*I->getParent();
- BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
-
- // Add new basic block for slow divide operation
- BasicBlock *SlowBB =
- BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
- SlowBB->moveBefore(SuccessorBB);
- IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
- Value *SlowQuotientV;
- Value *SlowRemainderV;
- if (UseSignedOp) {
- SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
- SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ // The instruction is indeed a slow div or rem operation.
+ IsValidTask = true;
+}
+
+/// Reuses previously-computed dividend or remainder from the current BB if
+/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
+/// perform the optimization and caches the resulting dividend and remainder.
+/// If no replacement can be generated, nullptr is returned.
+Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
+ // First, make sure that the task is valid.
+ if (!IsValidTask)
+ return nullptr;
+
+ // Then, look for a value in Cache.
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ DivOpInfo Key(isSignedOp(), Dividend, Divisor);
+ auto CacheI = Cache.find(Key);
+
+ if (CacheI == Cache.end()) {
+ // If previous instance does not exist, try to insert fast div.
+ Optional<QuotRemPair> OptResult = insertFastDivAndRem();
+ // Bail out if insertFastDivAndRem has failed.
+ if (!OptResult)
+ return nullptr;
+ CacheI = Cache.insert({Key, *OptResult}).first;
+ }
+
+ QuotRemPair &Value = CacheI->second;
+ return isDivisionOp() ? Value.Quotient : Value.Remainder;
+}
+
+/// Add new basic block for slow div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isSignedOp()) {
+ DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
} else {
- SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
- SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
}
- SlowBuilder.CreateBr(SuccessorBB);
- // Add new basic block for fast divide operation
- BasicBlock *FastBB =
- BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
- FastBB->moveBefore(SlowBB);
- IRBuilder<> FastBuilder(FastBB, FastBB->begin());
- Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
- BypassType);
- Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
- BypassType);
-
- // udiv/urem because optimization only handles positive numbers
- Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
- Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
- ShortDivisorV);
- Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
- ShortQuotientV,
- Dividend->getType());
- Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
- ShortRemainderV,
- Dividend->getType());
- FastBuilder.CreateBr(SuccessorBB);
-
- // Phi nodes for result of div and rem
- IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
- PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
- QuoPhi->addIncoming(SlowQuotientV, SlowBB);
- QuoPhi->addIncoming(FastQuotientV, FastBB);
- PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
- RemPhi->addIncoming(SlowRemainderV, SlowBB);
- RemPhi->addIncoming(FastRemainderV, FastBB);
-
- // Replace I with appropriate phi node
- if (UseDivOp)
- I->replaceAllUsesWith(QuoPhi);
- else
- I->replaceAllUsesWith(RemPhi);
- I->eraseFromParent();
+ Builder.CreateBr(SuccessorBB);
+ return DivRemPair;
+}
- // Combine operands into a single value with OR for value testing below
- MainBB->getInstList().back().eraseFromParent();
- IRBuilder<> MainBuilder(MainBB, MainBB->end());
+/// Add new basic block for fast div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ Value *ShortDivisorV =
+ Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
+ Value *ShortDividendV =
+ Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
+
+ // udiv/urem because this optimization only handles positive numbers.
+ Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
+ Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
+ DivRemPair.Quotient =
+ Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
+ DivRemPair.Remainder =
+ Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
+ Builder.CreateBr(SuccessorBB);
+
+ return DivRemPair;
+}
+
+/// Creates Phi nodes for result of Div and Rem.
+QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
+ QuotRemWithBB &RHS,
+ BasicBlock *PhiBB) {
+ IRBuilder<> Builder(PhiBB, PhiBB->begin());
+ PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
+ QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
+ QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
+ PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
+ RemPhi->addIncoming(LHS.Remainder, LHS.BB);
+ RemPhi->addIncoming(RHS.Remainder, RHS.BB);
+ return QuotRemPair(QuoPhi, RemPhi);
+}
+
+/// Creates a runtime check to test whether both the divisor and dividend fit
+/// into BypassType. The check is inserted at the end of MainBB. True return
+/// value means that the operands fit.
+Value *FastDivInsertionTask::insertOperandRuntimeCheck() {
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
// We should have bailed out above if the divisor is a constant, but the
// dividend may still be a constant. Set OrV to our non-constant operands
@@ -163,65 +254,54 @@ static bool insertFastDiv(Instruction *I
Value *OrV;
if (!isa<ConstantInt>(Dividend))
- OrV = MainBuilder.CreateOr(Dividend, Divisor);
+ OrV = Builder.CreateOr(Dividend, Divisor);
else
OrV = Divisor;
// BitMask is inverted to check if the operands are
// larger than the bypass type
uint64_t BitMask = ~BypassType->getBitMask();
- Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
+ Value *AndV = Builder.CreateAnd(OrV, BitMask);
- // Compare operand values and branch
- Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
- Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
- MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
-
- // Cache phi nodes to be used later in place of other instances
- // of div or rem with the same sign, dividend, and divisor
- DivOpInfo Key(UseSignedOp, Dividend, Divisor);
- DivPhiNodes Value(QuoPhi, RemPhi);
- PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
- return true;
-}
-
-// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
-// the current BB if operands and operation are identical. Otherwise calls
-// insertFastDiv to perform the optimization and caches the resulting dividend
-// and remainder.
-static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
- bool UseDivOp, bool UseSignedOp,
- DivCacheTy &PerBBDivCache) {
- // Get instruction operands
- DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
- DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
-
- if (CacheI == PerBBDivCache.end()) {
- // If previous instance does not exist, insert fast div
- return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
- }
-
- // Replace operation value with previously generated phi node
- DivPhiNodes &Value = CacheI->second;
- if (UseDivOp) {
- // Replace all uses of div instruction with quotient phi node
- I->replaceAllUsesWith(Value.Quotient);
- } else {
- // Replace all uses of rem instruction with remainder phi node
- I->replaceAllUsesWith(Value.Remainder);
+ // Compare operand values
+ Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
+ return Builder.CreateICmpEQ(AndV, ZeroV);
+}
+
+/// Substitutes the div/rem instruction with code that checks the value of the
+/// operands and uses a shorter-faster div/rem instruction when possible.
+Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor)) {
+ // Keep division by a constant for DAGCombiner.
+ return None;
}
- // Remove redundant operation
- I->eraseFromParent();
- return true;
+ // If the numerator is a constant, bail if it doesn't fit into BypassType.
+ if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
+ if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
+ return None;
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemWithBB Slow = createSlowBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
+ Value *CmpV = insertOperandRuntimeCheck();
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
+ return Result;
}
-// bypassSlowDivision - This optimization identifies DIV instructions in a BB
-// that can be profitably bypassed and carried out with a shorter, faster
-// divide.
-bool llvm::bypassSlowDivision(
- BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
- DivCacheTy DivCache;
+/// This optimization identifies DIV/REM instructions in a BB that can be
+/// profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(BasicBlock *BB,
+ const BypassWidthsTy &BypassWidths) {
+ DivCacheTy PerBBDivCache;
bool MadeChange = false;
Instruction* Next = &*BB->begin();
@@ -231,42 +311,20 @@ bool llvm::bypassSlowDivision(
Instruction* I = Next;
Next = Next->getNextNode();
- // Get instruction details
- unsigned Opcode = I->getOpcode();
- bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
- bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
- bool UseSignedOp = Opcode == Instruction::SDiv ||
- Opcode == Instruction::SRem;
-
- // Only optimize div or rem ops
- if (!UseDivOp && !UseRemOp)
- continue;
-
- // Skip division on vector types, only optimize integer instructions
- if (!I->getType()->isIntegerTy())
- continue;
-
- // Get bitwidth of div/rem instruction
- IntegerType *T = cast<IntegerType>(I->getType());
- unsigned int bitwidth = T->getBitWidth();
-
- // Continue if bitwidth is not bypassed
- DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
- if (BI == BypassWidths.end())
- continue;
-
- // Get type for div/rem instruction with bypass bitwidth
- IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
-
- MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
+ FastDivInsertionTask Task(I, BypassWidths);
+ if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ MadeChange = true;
+ }
}
// Above we eagerly create divs and rems, as pairs, so that we can efficiently
// create divrem machine instructions. Now erase any unused divs / rems so we
// don't leave extra instructions sitting around.
- for (auto &KV : DivCache)
- for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
- RecursivelyDeleteTriviallyDeadInstructions(Phi);
+ for (auto &KV : PerBBDivCache)
+ for (Value *V : {KV.second.Quotient, KV.second.Remainder})
+ RecursivelyDeleteTriviallyDeadInstructions(V);
return MadeChange;
}
More information about the llvm-commits
mailing list