[llvm-commits] [llvm] r170471 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineAddSub.cpp test/Transforms/InstCombine/fast-math.ll
Eli Friedman
eli.friedman at gmail.com
Tue Dec 18 15:52:39 PST 2012
On Tue, Dec 18, 2012 at 3:49 PM, Eli Friedman <eli.friedman at gmail.com> wrote:
> On Tue, Dec 18, 2012 at 3:10 PM, Shuxin Yang <shuxin.llvm at gmail.com> wrote:
>> Author: shuxin_yang
>> Date: Tue Dec 18 17:10:12 2012
>> New Revision: 170471
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=170471&view=rev
>> Log:
>> rdar://12801297
>>
>> InstCombine for unsafe floating-point add/sub.
>>
>> Modified:
>> llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
>> llvm/trunk/test/Transforms/InstCombine/fast-math.ll
>
> General comment: I was sort of expecting one more round of review
> before you committed this because you made some substantial changes to
> the previous version. (No need to back out, though.)
>
>> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp?rev=170471&r1=170470&r2=170471&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp (original)
>> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp Tue Dec 18 17:10:12 2012
>> @@ -19,10 +19,715 @@
>> using namespace llvm;
>> using namespace PatternMatch;
>>
>> +namespace {
>> +
>> + /// Class representing coefficient of floating-point addend.
>> + /// This class needs to be highly efficient, which is especially true for
>> + /// the constructor. As of I write this comment, the cost of the default
>> + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
>> + /// perform write-merging).
>> + ///
>> + class FAddendCoef {
>> + public:
>> + // The constructor has to initialize a APFloat, which is uncessary for
>> + // most addends which have coefficient either 1 or -1. So, the constructor
>> + // is expensive. In order to avoid the cost of the constructor, we should
>> + // reuse some instances whenever possible. The pre-created instances
>> + // FAddCombine::Add[0-5] embodies this idea.
>> + //
>> + FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
>> + ~FAddendCoef();
>> +
>> + void set(short C) {
>> + assert(!insaneIntVal(C) && "Insane coefficient");
>> + IsFp = false; IntVal = C;
>> + }
>> +
>> + void set(const APFloat& C);
>> +
>> + void negate();
>> +
>> + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
>> + Value *getValue(Type *) const;
>> +
>> + // If possible, don't define operator+/operator- etc because these
>> + // operators inevitably call FAddendCoef's constructor which is not cheap.
>> + void operator=(const FAddendCoef &A);
>> + void operator+=(const FAddendCoef &A);
>> + void operator-=(const FAddendCoef &A);
>> + void operator*=(const FAddendCoef &S);
>> +
>> + bool isOne() const { return isInt() && IntVal == 1; }
>> + bool isTwo() const { return isInt() && IntVal == 2; }
>> + bool isMinusOne() const { return isInt() && IntVal == -1; }
>> + bool isMinusTwo() const { return isInt() && IntVal == -2; }
>> +
>> + private:
>> + bool insaneIntVal(int V) { return V > 4 || V < -4; }
>> + APFloat *getFpValPtr(void)
>> + { return reinterpret_cast<APFloat*>(&FpValBuf[0]); }
>> +
>> + const APFloat &getFpVal(void) const {
>> + assert(IsFp && BufHasFpVal && "Incorret state");
>> + return *reinterpret_cast<const APFloat*>(&FpValBuf[0]);
>> + }
>> +
>> + APFloat &getFpVal(void)
>> + { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
>> +
>> + bool isInt() const { return !IsFp; }
>> +
>> + private:
>> + bool IsFp;
>> +
>> + // True iff FpValBuf contains an instance of APFloat.
>> + bool BufHasFpVal;
>> +
>> + // The integer coefficient of an individual addend is either 1 or -1,
>> + // and we try to simplify at most 4 addends from neighboring at most
>> + // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
>> + // is overkill of this end.
>> + short IntVal;
>> +
>> + union {
>> + char FpValBuf[sizeof(APFloat)];
>> + int dummy; // So this structure has at least 4-byte alignment.
>> + };
>
> I'm sure I made a comment here before about using llvm::AlignedCharArrayUnion...
>
>> + };
>> +
>> + /// FAddend is used to represent floating-point addend. An addend is
>> + /// represented as <C, V>, where the V is a symbolic value, and C is a
>> + /// constant coefficient. A constant addend is represented as <C, 0>.
>> + ///
>> + class FAddend {
>> + public:
>> + FAddend() { Val = 0; }
>> +
>> + Value *getSymVal (void) const { return Val; }
>> + const FAddendCoef &getCoef(void) const { return Coeff; }
>> +
>> + bool isConstant() const { return Val == 0; }
>> + bool isZero() const { return Coeff.isZero(); }
>> +
>> + void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
>> + void set(const APFloat& Coefficient, Value *V)
>> + { Coeff.set(Coefficient); Val = V; }
>> + void set(const ConstantFP* Coefficient, Value *V)
>> + { Coeff.set(Coefficient->getValueAPF()); Val = V; }
>> +
>> + void negate() { Coeff.negate(); }
>> +
>> + /// Drill down the U-D chain one step to find the definition of V, and
>> + /// try to break the definition into one or two addends.
>> + static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
>> +
>> + /// Similar to FAddend::drillDownOneStep() except that the value being
>> + /// splitted is the addend itself.
>> + unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
>> +
>> + void operator+=(const FAddend &T) {
>> + assert((Val == T.Val) && "Symbolic-values disagree");
>> + Coeff += T.Coeff;
>> + }
>> +
>> + private:
>> + void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
>> +
>> + // This addend has the value of "Coeff * Val".
>> + Value *Val;
>> + FAddendCoef Coeff;
>> + };
>> +
>> + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
>> + /// with its neighboring at most two instructions.
>> + ///
>> + class FAddCombine {
>> + public:
>> + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
>> + Value *simplify(Instruction *FAdd);
>> +
>> + private:
>> + typedef SmallVector<const FAddend*, 4> AddendVect;
>> +
>> + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
>> +
>> + /// Convert given addend to a Value
>> + Value *createAddendVal(const FAddend &A, bool& NeedNeg);
>> +
>> + /// Return the number of instructions needed to emit the N-ary addition.
>> + unsigned calcInstrNumber(const AddendVect& Vect);
>> + Value *createFSub(Value *Opnd0, Value *Opnd1);
>> + Value *createFAdd(Value *Opnd0, Value *Opnd1);
>> + Value *createFMul(Value *Opnd0, Value *Opnd1);
>> + Value *createFNeg(Value *V);
>> + Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
>> + void createInstPostProc(Instruction *NewInst);
>> +
>> + InstCombiner::BuilderTy *Builder;
>> + Instruction *Instr;
>> +
>> + private:
>> + // Debugging stuff are clustered here.
>> + #ifndef NDEBUG
>> + unsigned CreateInstrNum;
>> + void initCreateInstNum() { CreateInstrNum = 0; }
>> + void incCreateInstNum() { CreateInstrNum++; }
>> + #else
>> + void initCreateInstNum() {}
>> + void incCreateInstNum() {}
>> + #endif
>> + };
>> +}
>> +
>> +//===----------------------------------------------------------------------===//
>> +//
>> +// Implementation of
>> +// {FAddendCoef, FAddend, FAddition, FAddCombine}.
>> +//
>> +//===----------------------------------------------------------------------===//
>> +FAddendCoef::~FAddendCoef() {
>> + if (BufHasFpVal)
>> + getFpValPtr()->~APFloat();
>> +}
>> +
>> +void FAddendCoef::set(const APFloat& C) {
>> + APFloat *P = getFpValPtr();
>> +
>> + if (isInt()) {
>> + // As the buffer is meanless byte stream, we cannot call
>> + // APFloat::operator=().
>> + new(P) APFloat(C);
>> + } else
>> + *P = C;
>> +
>> + IsFp = BufHasFpVal = true;
>> +}
>> +
>> +void FAddendCoef::operator=(const FAddendCoef& That) {
>> + if (That.isInt())
>> + set(That.IntVal);
>> + else
>> + set(That.getFpVal());
>> +}
>> +
>> +void FAddendCoef::operator+=(const FAddendCoef &That) {
>> + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
>> + if (isInt() == That.isInt()) {
>> + if (isInt())
>> + IntVal += That.IntVal;
>> + else
>> + getFpVal().add(That.getFpVal(), RndMode);
>> + return;
>> + }
>> +
>> + if (isInt()) {
>> + const APFloat &T = That.getFpVal();
>> + set(T);
>> + getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
>> + return;
>> + }
>> +
>> + APFloat &T = getFpVal();
>> + T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
>> +}
>> +
>> +void FAddendCoef::operator-=(const FAddendCoef &That) {
>> + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
>> + if (isInt() == That.isInt()) {
>> + if (isInt())
>> + IntVal -= That.IntVal;
>> + else
>> + getFpVal().subtract(That.getFpVal(), RndMode);
>> + return;
>> + }
>> +
>> + if (isInt()) {
>> + const APFloat &T = That.getFpVal();
>> + set(T);
>> + getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
>> + return;
>> + }
>> +
>> + APFloat &T = getFpVal();
>> + T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
>> +}
>> +
>> +void FAddendCoef::operator*=(const FAddendCoef &That) {
>> + if (That.isOne())
>> + return;
>> +
>> + if (That.isMinusOne()) {
>> + negate();
>> + return;
>> + }
>> +
>> + if (isInt() && That.isInt()) {
>> + int Res = IntVal * (int)That.IntVal;
>> + assert(!insaneIntVal(Res) && "Insane int value");
>> + IntVal = Res;
>> + return;
>> + }
>> +
>> + const fltSemantics &Semantic =
>> + isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
>> +
>> + if (isInt())
>> + set(APFloat(Semantic, IntVal));
>> + APFloat &F0 = getFpVal();
>> +
>> + if (That.isInt())
>> + F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
>> + else
>> + F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
>> +
>> + return;
>> +}
>> +
>> +void FAddendCoef::negate() {
>> + if (isInt())
>> + IntVal = 0 - IntVal;
>> + else
>> + getFpVal().changeSign();
>> +}
>> +
>> +Value *FAddendCoef::getValue(Type *Ty) const {
>> + return isInt() ?
>> + ConstantFP::get(Ty, float(IntVal)) :
>> + ConstantFP::get(Ty->getContext(), getFpVal());
>> +}
>> +
>> +// The definition of <Val> Addends
>> +// =========================================
>> +// A + B <1, A>, <1,B>
>> +// A - B <1, A>, <1,B>
>> +// 0 - B <-1, B>
>> +// C * A, <C, A>
>> +// A + C <1, A> <C, NULL>
>> +// 0 +/- 0 <0, NULL> (corner case)
>> +//
>> +// Legend: A and B are not constant, C is constant
>> +//
>> +unsigned FAddend::drillValueDownOneStep
>> + (Value *Val, FAddend &Addend0, FAddend &Addend1) {
>> + Instruction *I = 0;
>> + if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
>> + return 0;
>> +
>> + unsigned Opcode = I->getOpcode();
>> +
>> + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
>> + ConstantFP *C0, *C1;
>> + Value *Opnd0 = I->getOperand(0);
>> + Value *Opnd1 = I->getOperand(1);
>> + if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
>> + Opnd0 = 0;
>> +
>> + if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
>> + Opnd1 = 0;
>> +
>> + if (Opnd0) {
>> + if (!C0)
>> + Addend0.set(1, Opnd0);
>> + else
>> + Addend0.set(C0, 0);
>> + }
>> +
>> + if (Opnd1) {
>> + FAddend &Addend = Opnd0 ? Addend1 : Addend0;
>> + if (!C1)
>> + Addend.set(1, Opnd1);
>> + else
>> + Addend.set(C1, 0);
>> + if (Opcode == Instruction::FSub)
>> + Addend.negate();
>> + }
>> +
>> + if (Opnd0 || Opnd1)
>> + return Opnd0 && Opnd1 ? 2 : 1;
>> +
>> + // Both operands are zero. Weird!
>> + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
>> + return 1;
>> + }
>> +
>> + if (I->getOpcode() == Instruction::FMul) {
>> + Value *V0 = I->getOperand(0);
>> + Value *V1 = I->getOperand(1);
>> + if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
>> + Addend0.set(C, V1);
>> + return 1;
>> + }
>> +
>> + if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
>> + Addend0.set(C, V0);
>> + return 1;
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +// Try to break *this* addend into two addends. e.g. Suppose this addend is
>> +// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
>> +// i.e. <2.3, X> and <2.3, Y>.
>> +//
>> +unsigned FAddend::drillAddendDownOneStep
>> + (FAddend &Addend0, FAddend &Addend1) const {
>> + if (isConstant())
>> + return 0;
>> +
>> + unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
>> + if (!BreakNum || Coeff.isOne())
>> + return BreakNum;
>> +
>> + Addend0.Scale(Coeff);
>> +
>> + if (BreakNum == 2)
>> + Addend1.Scale(Coeff);
>> +
>> + return BreakNum;
>> +}
>> +
>> +Value *FAddCombine::simplify(Instruction *I) {
>> + assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
>> +
>> + // Currently we are not able to handle vector type.
>> + if (I->getType()->isVectorTy())
>> + return 0;
>> +
>> + assert((I->getOpcode() == Instruction::FAdd ||
>> + I->getOpcode() == Instruction::FSub) && "Expect add/sub");
>> +
>> + // Save the instruction before calling other member-functions.
>> + Instr = I;
>> +
>> + FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
>> +
>> + unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
>> +
>> + // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
>> + unsigned Opnd0_ExpNum = 0;
>> + unsigned Opnd1_ExpNum = 0;
>> +
>> + if (!Opnd0.isConstant())
>> + Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
>> +
>> + // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
>> + if (OpndNum == 2 && !Opnd1.isConstant())
>> + Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
>> +
>> + // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
>> + if (Opnd0_ExpNum && Opnd1_ExpNum) {
>> + AddendVect AllOpnds;
>> + AllOpnds.push_back(&Opnd0_0);
>> + AllOpnds.push_back(&Opnd1_0);
>> + if (Opnd0_ExpNum == 2)
>> + AllOpnds.push_back(&Opnd0_1);
>> + if (Opnd1_ExpNum == 2)
>> + AllOpnds.push_back(&Opnd1_1);
>> +
>> + // Compute instruction quota. We should save at least one instruction.
>> + unsigned InstQuota = 0;
>> +
>> + Value *V0 = I->getOperand(0);
>> + Value *V1 = I->getOperand(1);
>> + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
>> + (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
>> +
>> + if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
>> + return R;
>> + }
>> +
>> + if (OpndNum != 2) {
>> + // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
>> + // splitted into two addends, say "V = X - Y", the instruction would have
>> + // been optimized into "I = Y - X" in the previous steps.
>> + //
>> + const FAddendCoef &CE = Opnd0.getCoef();
>> + return CE.isOne() ? Opnd0.getSymVal() : 0;
>> + }
>> +
>> + // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
>> + if (Opnd1_ExpNum) {
>> + AddendVect AllOpnds;
>> + AllOpnds.push_back(&Opnd0);
>> + AllOpnds.push_back(&Opnd1_0);
>> + if (Opnd1_ExpNum == 2)
>> + AllOpnds.push_back(&Opnd1_1);
>> +
>> + if (Value *R = simplifyFAdd(AllOpnds, 1))
>> + return R;
>> + }
>> +
>> + // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
>> + if (Opnd0_ExpNum) {
>> + AddendVect AllOpnds;
>> + AllOpnds.push_back(&Opnd1);
>> + AllOpnds.push_back(&Opnd0_0);
>> + if (Opnd0_ExpNum == 2)
>> + AllOpnds.push_back(&Opnd0_1);
>> +
>> + if (Value *R = simplifyFAdd(AllOpnds, 1))
>> + return R;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
>> +
>> + unsigned AddendNum = Addends.size();
>> + assert(AddendNum <= 4 && "Too many addends");
>> +
>> + // For saving intermediate results;
>> + unsigned NextTmpIdx = 0;
>> + FAddend TmpResult[3];
>> +
>> + // Points to the constant addend of the resulting simplified expression.
>> + // If the resulting expr has constant-addend, this constant-addend is
>> + // desirable to reside at the top of the resulting expression tree. Placing
>> + // constant close to supper-expr(s) will potentially reveal some optimization
>> + // opportunities in super-expr(s).
>> + //
>> + const FAddend *ConstAdd = 0;
>> +
>> + // Simplified addends are placed <SimpVect>.
>> + AddendVect SimpVect;
>> +
>> + // The outer loop works on one symbolic-value at a time. Suppose the input
>> + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
>> + // The symbolic-values will be processed in this order: x, y, z.
>> + //
>> + for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
>> +
>> + const FAddend *ThisAddend = Addends[SymIdx];
>> + if (!ThisAddend) {
>> + // This addend was processed before.
>> + continue;
>> + }
>> +
>> + Value *Val = ThisAddend->getSymVal();
>> + unsigned StartIdx = SimpVect.size();
>> + SimpVect.push_back(ThisAddend);
>> +
>> + // The inner loop collects addends sharing same symbolic-value, and these
>> + // addends will be later on folded into a single addend. Following above
>> + // example, if the symbolic value "y" is being processed, the inner loop
>> + // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
>> + // be later on folded into "<b1+b2, y>".
>> + //
>> + for (unsigned SameSymIdx = SymIdx + 1;
>> + SameSymIdx < AddendNum; SameSymIdx++) {
>> + const FAddend *T = Addends[SameSymIdx];
>> + if (T && T->getSymVal() == Val) {
>> + // Set null such that next iteration of the outer loop will not process
>> + // this addend again.
>> + Addends[SameSymIdx] = 0;
>> + SimpVect.push_back(T);
>> + }
>> + }
>> +
>> + // If multiple addends share same symbolic value, fold them together.
>> + if (StartIdx + 1 != SimpVect.size()) {
>> + FAddend &R = TmpResult[NextTmpIdx ++];
>> + R = *SimpVect[StartIdx];
>> + for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
>> + R += *SimpVect[Idx];
>> +
>> + // Pop all addends being folded and push the resulting folded addend.
>> + SimpVect.resize(StartIdx);
>> + if (Val != 0) {
>> + if (!R.isZero()) {
>> + SimpVect.push_back(&R);
>> + }
>> + } else {
>> + // Don't push constant addend at this time. It will be the last element
>> + // of <SimpVect>.
>> + ConstAdd = &R;
>> + }
>> + }
>> + }
>
> This loop seems overly complicated for what it's actually doing... are
> you sure you can't simplify it any?
>
>> + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
>> + "out-of-bound access");
>
> llvm::array_lengthof?
>
> -Eli
Oh, and one more comment: I would like to see at least twice as many
tests as you have now. The tests you have cover the basic
functionality, but not the more complicated cases.
-Eli
More information about the llvm-commits
mailing list