<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Sat, Jun 29, 2013 at 1:40 AM, David Majnemer <span dir="ltr"><<a href="mailto:david.majnemer@gmail.com" target="_blank">david.majnemer@gmail.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: majnemer<br>
Date: Sat Jun 29 03:40:07 2013<br>
New Revision: 185257<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=185257&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=185257&view=rev</a><br>
Log:<br>
InstCombine: Be more agressive optimizing 'udiv' instrs with 'select' denoms<br>
<br>
Real world code sometimes has the denominator of a 'udiv' be a<br>
'select'. LLVM can handle such cases but only when the 'select'<br>
operands are symmetric in structure (both select operands are a constant<br>
power of two or a left shift, etc.). This falls apart if we are dealt a<br>
'udiv' where the code is not symetric or if the select operands lead us<br>
to more select instructions.<br>
<br>
Instead, we should treat the LHS and each select operand as a distinct<br>
divide operation and try to optimize them independently. If we can<br>
to simplify each operation, then we can replace the 'udiv' with, say, a<br>
'lshr' that has a new select with a bunch of new operands for the<br>
select.<br>
<br>
Modified:<br>
llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp<br>
llvm/trunk/test/Transforms/InstCombine/div-shift.ll<br>
<br>
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=185257&r1=185256&r2=185257&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=185257&r1=185256&r2=185257&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Sat Jun 29 03:40:07 2013<br>
@@ -705,26 +705,27 @@ static Value *dyn_castZExtVal(Value *V,<br>
return 0;<br>
}<br>
<br>
-Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {<br>
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);<br>
-<br>
- if (Value *V = SimplifyUDivInst(Op0, Op1, TD))<br>
- return ReplaceInstUsesWith(I, V);<br>
-<br>
- // Handle the integer div common cases<br>
- if (Instruction *Common = commonIDivTransforms(I))<br>
- return Common;<br>
+const unsigned MaxDepth = 6;<br>
<br>
+// \brief Recursively visits the possible right hand operands of a udiv<br>
+// instruction, seeing through select instructions, to determine if we can<br>
+// replace the udiv with something simpler. If we find that an operand is not<br>
+// able to simplify the udiv, we abort the entire transformation.<br>
+//<br>
+// Inserts any intermediate instructions used for the simplification into<br>
+// NewInstrs and returns a new instruction that depends upon them.<br>
+static Instruction *visitUDivOperand(Value *Op0, Value *Op1,<br>
+ const BinaryOperator &I,<br>
+ SmallVectorImpl<Instruction *> &NewInstrs,<br>
+ unsigned Depth = 0) {<br>
{<br>
// X udiv 2^C -> X >> C<br>
// Check to see if this is an unsigned division with an exact power of 2,<br>
// if so, convert to a right shift.<br>
const APInt *C;<br>
if (match(Op1, m_Power2(C))) {<br>
- BinaryOperator *LShr =<br>
- BinaryOperator::CreateLShr(Op0,<br>
- ConstantInt::get(Op0->getType(),<br>
- C->logBase2()));<br>
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(<br>
+ Op0, ConstantInt::get(Op0->getType(), C->logBase2()));<br>
if (I.isExact()) LShr->setIsExact();<br>
return LShr;<br>
}<br>
@@ -733,51 +734,68 @@ Instruction *InstCombiner::visitUDiv(Bin<br>
if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {<br>
// X udiv C, where C >= signbit<br>
if (C->getValue().isNegative()) {<br>
- Value *IC = Builder->CreateICmpULT(Op0, C);<br>
+ ICmpInst *IC = new ICmpInst(ICmpInst::ICMP_ULT, Op0, C);<br>
+ NewInstrs.push_back(IC);<br>
+<br>
return SelectInst::Create(IC, Constant::getNullValue(I.getType()),<br>
ConstantInt::get(I.getType(), 1));<br>
}<br>
}<br>
<br>
- // (x lshr C1) udiv C2 --> x udiv (C2 << C1)<br>
- if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {<br>
- Value *X;<br>
- ConstantInt *C1;<br>
- if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {<br>
- APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));<br>
- return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));<br>
- }<br>
- }<br>
-<br>
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)<br>
{ const APInt *CI; Value *N;<br>
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||<br>
match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {<br>
- if (*CI != 1)<br>
- N = Builder->CreateAdd(N,<br>
- ConstantInt::get(N->getType(), CI->logBase2()));<br>
- if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))<br>
- N = Builder->CreateZExt(N, Z->getDestTy());<br>
- if (I.isExact())<br>
- return BinaryOperator::CreateExactLShr(Op0, N);<br>
- return BinaryOperator::CreateLShr(Op0, N);<br>
+ if (*CI != 1) {<br>
+ N = BinaryOperator::CreateAdd(<br>
+ N, ConstantInt::get(N->getType(), CI->logBase2()));<br>
+ NewInstrs.push_back(cast<Instruction>(N));<br>
+ }<br>
+ if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) {<br>
+ N = new ZExtInst(N, Z->getDestTy());<br>
+ NewInstrs.push_back(cast<Instruction>(N));<br>
+ }<br>
+ BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);<br>
+ if (I.isExact()) LShr->setIsExact();<br>
+ return LShr;<br>
}<br>
}<br>
<br>
- // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)<br>
- // where C1&C2 are powers of two.<br>
- { Value *Cond; const APInt *C1, *C2;<br>
- if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {<br>
- // Construct the "on true" case of the select<br>
- Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",<br>
- I.isExact());<br>
-<br>
- // Construct the "on false" case of the select<br>
- Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",<br>
- I.isExact());<br>
+ // The remaining tests are all recursive, so bail out if we hit the limit.<br>
+ if (Depth++ == MaxDepth)<br>
+ return 0;<br>
+<br>
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))<br>
+ if (Instruction *LHS =<br>
+ visitUDivOperand(Op0, SI->getOperand(1), I, NewInstrs)) {<br>
+ NewInstrs.push_back(LHS);<br>
+ if (Instruction *RHS =<br>
+ visitUDivOperand(Op0, SI->getOperand(2), I, NewInstrs)) {<br>
+ NewInstrs.push_back(RHS);<br>
+ return SelectInst::Create(SI->getCondition(), LHS, RHS);<br>
+ }<br>
+ }<br>
+<br>
+ return 0;<br>
+}<br>
+<br>
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {<br>
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);<br>
+<br>
+ if (Value *V = SimplifyUDivInst(Op0, Op1, TD))<br>
+ return ReplaceInstUsesWith(I, V);<br>
+<br>
+ // Handle the integer div common cases<br>
+ if (Instruction *Common = commonIDivTransforms(I))<br>
+ return Common;<br>
<br>
- // construct the select instruction and return it.<br>
- return SelectInst::Create(Cond, TSI, FSI);<br>
+ // (x lshr C1) udiv C2 --> x udiv (C2 << C1)<br>
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {<br>
+ Value *X;<br>
+ ConstantInt *C1;<br>
+ if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {<br>
+ APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));<br>
+ return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));<br>
}<br>
}<br>
<br>
@@ -788,6 +806,21 @@ Instruction *InstCombiner::visitUDiv(Bin<br>
I.isExact()),<br>
I.getType());<br>
<br>
+ // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))<br>
+ SmallVector<Instruction *, 4> NewInstrs;<br>
+ Instruction *RetI = visitUDivOperand(Op0, Op1, I, NewInstrs);<br>
+ for (unsigned i = 0, e = NewInstrs.size(); i != e; i++)<br>
+ // If we managed to replace the UDiv completely, insert the new intermediate<br>
+ // instructions before where the UDiv was.<br>
+ // If we couldn't, we must clean up after ourselves by deleting the new<br>
+ // instructions.<br>
+ if (RetI)<br>
+ NewInstrs[i]->insertBefore(&I);<br>
+ else<br>
+ delete NewInstrs[i];<br>
+ if (RetI)<br>
+ return RetI;<br><br></blockquote><div><br></div><div>"Create a bunch of instructions, then delete them" is a pattern we generally try to avoid in InstCombine. I can see why you chose it here, but please don't; it's a bad idea in terms of performance.<br>
<br></div><div>-Eli<br></div></div></div></div>