[llvm-commits] [llvm] r170471 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineAddSub.cpp test/Transforms/InstCombine/fast-math.ll

Tue Dec 18 15:49:38 PST 2012

On Tue, Dec 18, 2012 at 3:10 PM, Shuxin Yang <shuxin.llvm at gmail.com> wrote:
> Author: shuxin_yang
> Date: Tue Dec 18 17:10:12 2012
> New Revision: 170471
>
> URL: http://llvm.org/viewvc/llvm-project?rev=170471&view=rev
> Log:
> rdar://12801297
>
>  InstCombine for unsafe floating-point add/sub.
>
> Modified:
>     llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
>     llvm/trunk/test/Transforms/InstCombine/fast-math.ll

General comment: I was sort of expecting one more round of review
before you committed this because you made some substantial changes to
the previous version.  (No need to back out, though.)

> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp?rev=170471&r1=170470&r2=170471&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp (original)
> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp Tue Dec 18 17:10:12 2012
> @@ -19,10 +19,715 @@
>  using namespace llvm;
>  using namespace PatternMatch;
>
> +namespace {
> +
> +  /// Class representing coefficient of floating-point addend.
> +  /// This class needs to be highly efficient, which is especially true for
> +  /// the constructor. As of I write this comment, the cost of the default
> +  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
> +  /// perform write-merging).
> +  ///
> +  class FAddendCoef {
> +  public:
> +    // The constructor has to initialize a APFloat, which is uncessary for
> +    // most addends which have coefficient either 1 or -1. So, the constructor
> +    // is expensive. In order to avoid the cost of the constructor, we should
> +    // reuse some instances whenever possible. The pre-created instances
> +    // FAddCombine::Add[0-5] embodies this idea.
> +    //
> +    FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
> +    ~FAddendCoef();
> +
> +    void set(short C) {
> +      assert(!insaneIntVal(C) && "Insane coefficient");
> +      IsFp = false; IntVal = C;
> +    }
> +
> +    void set(const APFloat& C);
> +
> +    void negate();
> +
> +    bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
> +    Value *getValue(Type *) const;
> +
> +    // If possible, don't define operator+/operator- etc because these
> +    // operators inevitably call FAddendCoef's constructor which is not cheap.
> +    void operator=(const FAddendCoef &A);
> +    void operator+=(const FAddendCoef &A);
> +    void operator-=(const FAddendCoef &A);
> +    void operator*=(const FAddendCoef &S);
> +
> +    bool isOne() const { return isInt() && IntVal == 1; }
> +    bool isTwo() const { return isInt() && IntVal == 2; }
> +    bool isMinusOne() const { return isInt() && IntVal == -1; }
> +    bool isMinusTwo() const { return isInt() && IntVal == -2; }
> +
> +  private:
> +    bool insaneIntVal(int V) { return V > 4 || V < -4; }
> +    APFloat *getFpValPtr(void)
> +      { return reinterpret_cast<APFloat*>(&FpValBuf[0]); }
> +
> +    const APFloat &getFpVal(void) const {
> +      assert(IsFp && BufHasFpVal && "Incorret state");
> +      return *reinterpret_cast<const APFloat*>(&FpValBuf[0]);
> +    }
> +
> +    APFloat &getFpVal(void)
> +      { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
> +
> +    bool isInt() const { return !IsFp; }
> +
> +  private:
> +    bool IsFp;
> +
> +    // True iff FpValBuf contains an instance of APFloat.
> +    bool BufHasFpVal;
> +
> +    // The integer coefficient of an individual addend is either 1 or -1,
> +    // and we try to simplify at most 4 addends from neighboring at most
> +    // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
> +    // is overkill of this end.
> +    short IntVal;
> +
> +    union {
> +      char FpValBuf[sizeof(APFloat)];
> +      int dummy; // So this structure has at least 4-byte alignment.
> +    };

I'm sure I made a comment here before about using llvm::AlignedCharArrayUnion...

> +  };
> +
> +  /// FAddend is used to represent floating-point addend. An addend is
> +  /// represented as <C, V>, where the V is a symbolic value, and C is a
> +  /// constant coefficient. A constant addend is represented as <C, 0>.
> +  ///
> +  class FAddend {
> +  public:
> +    FAddend() { Val = 0; }
> +
> +    Value *getSymVal (void) const { return Val; }
> +    const FAddendCoef &getCoef(void) const { return Coeff; }
> +
> +    bool isConstant() const { return Val == 0; }
> +    bool isZero() const { return Coeff.isZero(); }
> +
> +    void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
> +    void set(const APFloat& Coefficient, Value *V)
> +      { Coeff.set(Coefficient); Val = V; }
> +    void set(const ConstantFP* Coefficient, Value *V)
> +      { Coeff.set(Coefficient->getValueAPF()); Val = V; }
> +
> +    void negate() { Coeff.negate(); }
> +
> +    /// Drill down the U-D chain one step to find the definition of V, and
> +    /// try to break the definition into one or two addends.
> +    static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
> +
> +    /// Similar to FAddend::drillDownOneStep() except that the value being
> +    /// splitted is the addend itself.
> +    unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
> +
> +    void operator+=(const FAddend &T) {
> +      assert((Val == T.Val) && "Symbolic-values disagree");
> +      Coeff += T.Coeff;
> +    }
> +
> +  private:
> +    void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
> +
> +    // This addend has the value of "Coeff * Val".
> +    Value *Val;
> +    FAddendCoef Coeff;
> +  };
> +
> +  /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
> +  /// with its neighboring at most two instructions.
> +  ///
> +  class FAddCombine {
> +  public:
> +    FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
> +    Value *simplify(Instruction *FAdd);
> +
> +  private:
> +    typedef SmallVector<const FAddend*, 4> AddendVect;
> +
> +    Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
> +
> +    /// Convert given addend to a Value
> +    Value *createAddendVal(const FAddend &A, bool& NeedNeg);
> +
> +    /// Return the number of instructions needed to emit the N-ary addition.
> +    unsigned calcInstrNumber(const AddendVect& Vect);
> +    Value *createFSub(Value *Opnd0, Value *Opnd1);
> +    Value *createFAdd(Value *Opnd0, Value *Opnd1);
> +    Value *createFMul(Value *Opnd0, Value *Opnd1);
> +    Value *createFNeg(Value *V);
> +    Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
> +    void createInstPostProc(Instruction *NewInst);
> +
> +    InstCombiner::BuilderTy *Builder;
> +    Instruction *Instr;
> +
> +  private:
> +     // Debugging stuff are clustered here.
> +    #ifndef NDEBUG
> +      unsigned CreateInstrNum;
> +      void initCreateInstNum() { CreateInstrNum = 0; }
> +      void incCreateInstNum() { CreateInstrNum++; }
> +    #else
> +      void initCreateInstNum() {}
> +      void incCreateInstNum() {}
> +    #endif
> +  };
> +}
> +
> +//===----------------------------------------------------------------------===//
> +//
> +// Implementation of
> +//    {FAddendCoef, FAddend, FAddition, FAddCombine}.
> +//
> +//===----------------------------------------------------------------------===//
> +FAddendCoef::~FAddendCoef() {
> +  if (BufHasFpVal)
> +    getFpValPtr()->~APFloat();
> +}
> +
> +void FAddendCoef::set(const APFloat& C) {
> +  APFloat *P = getFpValPtr();
> +
> +  if (isInt()) {
> +    // As the buffer is meanless byte stream, we cannot call
> +    // APFloat::operator=().
> +    new(P) APFloat(C);
> +  } else
> +    *P = C;
> +
> +  IsFp = BufHasFpVal = true;
> +}
> +
> +void FAddendCoef::operator=(const FAddendCoef& That) {
> +  if (That.isInt())
> +    set(That.IntVal);
> +  else
> +    set(That.getFpVal());
> +}
> +
> +void FAddendCoef::operator+=(const FAddendCoef &That) {
> +  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
> +  if (isInt() == That.isInt()) {
> +    if (isInt())
> +      IntVal += That.IntVal;
> +    else
> +      getFpVal().add(That.getFpVal(), RndMode);
> +    return;
> +  }
> +
> +  if (isInt()) {
> +    const APFloat &T = That.getFpVal();
> +    set(T);
> +    getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
> +    return;
> +  }
> +
> +  APFloat &T = getFpVal();
> +  T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
> +}
> +
> +void FAddendCoef::operator-=(const FAddendCoef &That) {
> +  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
> +  if (isInt() == That.isInt()) {
> +    if (isInt())
> +      IntVal -= That.IntVal;
> +    else
> +      getFpVal().subtract(That.getFpVal(), RndMode);
> +    return;
> +  }
> +
> +  if (isInt()) {
> +    const APFloat &T = That.getFpVal();
> +    set(T);
> +    getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
> +    return;
> +  }
> +
> +  APFloat &T = getFpVal();
> +  T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
> +}
> +
> +void FAddendCoef::operator*=(const FAddendCoef &That) {
> +  if (That.isOne())
> +    return;
> +
> +  if (That.isMinusOne()) {
> +    negate();
> +    return;
> +  }
> +
> +  if (isInt() && That.isInt()) {
> +    int Res = IntVal * (int)That.IntVal;
> +    assert(!insaneIntVal(Res) && "Insane int value");
> +    IntVal = Res;
> +    return;
> +  }
> +
> +  const fltSemantics &Semantic =
> +    isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
> +
> +  if (isInt())
> +    set(APFloat(Semantic, IntVal));
> +  APFloat &F0 = getFpVal();
> +
> +  if (That.isInt())
> +    F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
> +  else
> +    F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
> +
> +  return;
> +}
> +
> +void FAddendCoef::negate() {
> +  if (isInt())
> +    IntVal = 0 - IntVal;
> +  else
> +    getFpVal().changeSign();
> +}
> +
> +Value *FAddendCoef::getValue(Type *Ty) const {
> +  return isInt() ?
> +    ConstantFP::get(Ty, float(IntVal)) :
> +    ConstantFP::get(Ty->getContext(), getFpVal());
> +}
> +
> +// The definition of <Val>     Addends
> +// =========================================
> +//  A + B                     <1, A>, <1,B>
> +//  A - B                     <1, A>, <1,B>
> +//  0 - B                     <-1, B>
> +//  C * A,                    <C, A>
> +//  A + C                     <1, A> <C, NULL>
> +//  0 +/- 0                   <0, NULL> (corner case)
> +//
> +// Legend: A and B are not constant, C is constant
> +//
> +unsigned FAddend::drillValueDownOneStep
> +  (Value *Val, FAddend &Addend0, FAddend &Addend1) {
> +  Instruction *I = 0;
> +  if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
> +    return 0;
> +
> +  unsigned Opcode = I->getOpcode();
> +
> +  if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
> +    ConstantFP *C0, *C1;
> +    Value *Opnd0 = I->getOperand(0);
> +    Value *Opnd1 = I->getOperand(1);
> +    if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
> +      Opnd0 = 0;
> +
> +    if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
> +      Opnd1 = 0;
> +
> +    if (Opnd0) {
> +      if (!C0)
> +        Addend0.set(1, Opnd0);
> +      else
> +        Addend0.set(C0, 0);
> +    }
> +
> +    if (Opnd1) {
> +      FAddend &Addend = Opnd0 ? Addend1 : Addend0;
> +      if (!C1)
> +        Addend.set(1, Opnd1);
> +      else
> +        Addend.set(C1, 0);
> +      if (Opcode == Instruction::FSub)
> +        Addend.negate();
> +    }
> +
> +    if (Opnd0 || Opnd1)
> +      return Opnd0 && Opnd1 ? 2 : 1;
> +
> +    // Both operands are zero. Weird!
> +    Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
> +    return 1;
> +  }
> +
> +  if (I->getOpcode() == Instruction::FMul) {
> +    Value *V0 = I->getOperand(0);
> +    Value *V1 = I->getOperand(1);
> +    if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
> +      Addend0.set(C, V1);
> +      return 1;
> +    }
> +
> +    if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
> +      Addend0.set(C, V0);
> +      return 1;
> +    }
> +  }
> +
> +  return 0;
> +}
> +
> +// Try to break *this* addend into two addends. e.g. Suppose this addend is
> +// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
> +// i.e. <2.3, X> and <2.3, Y>.
> +//
> +unsigned FAddend::drillAddendDownOneStep
> +  (FAddend &Addend0, FAddend &Addend1) const {
> +  if (isConstant())
> +    return 0;
> +
> +  unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
> +  if (!BreakNum || Coeff.isOne())
> +    return BreakNum;
> +
> +  Addend0.Scale(Coeff);
> +
> +  if (BreakNum == 2)
> +    Addend1.Scale(Coeff);
> +
> +  return BreakNum;
> +}
> +
> +Value *FAddCombine::simplify(Instruction *I) {
> +  assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
> +
> +  // Currently we are not able to handle vector type.
> +  if (I->getType()->isVectorTy())
> +    return 0;
> +
> +  assert((I->getOpcode() == Instruction::FAdd ||
> +          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
> +
> +  // Save the instruction before calling other member-functions.
> +  Instr = I;
> +
> +  FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
> +
> +  unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
> +
> +  // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
> +  unsigned Opnd0_ExpNum = 0;
> +  unsigned Opnd1_ExpNum = 0;
> +
> +  if (!Opnd0.isConstant())
> +    Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
> +
> +  // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
> +  if (OpndNum == 2 && !Opnd1.isConstant())
> +    Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
> +
> +  // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
> +  if (Opnd0_ExpNum && Opnd1_ExpNum) {
> +    AddendVect AllOpnds;
> +    AllOpnds.push_back(&Opnd0_0);
> +    AllOpnds.push_back(&Opnd1_0);
> +    if (Opnd0_ExpNum == 2)
> +      AllOpnds.push_back(&Opnd0_1);
> +    if (Opnd1_ExpNum == 2)
> +      AllOpnds.push_back(&Opnd1_1);
> +
> +    // Compute instruction quota. We should save at least one instruction.
> +    unsigned InstQuota = 0;
> +
> +    Value *V0 = I->getOperand(0);
> +    Value *V1 = I->getOperand(1);
> +    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
> +                 (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
> +
> +    if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
> +      return R;
> +  }
> +
> +  if (OpndNum != 2) {
> +    // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
> +    // splitted into two addends, say "V = X - Y", the instruction would have
> +    // been optimized into "I = Y - X" in the previous steps.
> +    //
> +    const FAddendCoef &CE = Opnd0.getCoef();
> +    return CE.isOne() ? Opnd0.getSymVal() : 0;
> +  }
> +
> +  // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
> +  if (Opnd1_ExpNum) {
> +    AddendVect AllOpnds;
> +    AllOpnds.push_back(&Opnd0);
> +    AllOpnds.push_back(&Opnd1_0);
> +    if (Opnd1_ExpNum == 2)
> +      AllOpnds.push_back(&Opnd1_1);
> +
> +    if (Value *R = simplifyFAdd(AllOpnds, 1))
> +      return R;
> +  }
> +
> +  // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
> +  if (Opnd0_ExpNum) {
> +    AddendVect AllOpnds;
> +    AllOpnds.push_back(&Opnd1);
> +    AllOpnds.push_back(&Opnd0_0);
> +    if (Opnd0_ExpNum == 2)
> +      AllOpnds.push_back(&Opnd0_1);
> +
> +    if (Value *R = simplifyFAdd(AllOpnds, 1))
> +      return R;
> +  }
> +
> +  return 0;
> +}
> +
> +Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
> +
> +  unsigned AddendNum = Addends.size();
> +  assert(AddendNum <= 4 && "Too many addends");
> +
> +  // For saving intermediate results;
> +  unsigned NextTmpIdx = 0;
> +  FAddend TmpResult[3];
> +
> +  // Points to the constant addend of the resulting simplified expression.
> +  // If the resulting expr has constant-addend, this constant-addend is
> +  // desirable to reside at the top of the resulting expression tree. Placing
> +  // constant close to supper-expr(s) will potentially reveal some optimization
> +  // opportunities in super-expr(s).
> +  //
> +  const FAddend *ConstAdd = 0;
> +
> +  // Simplified addends are placed <SimpVect>.
> +  AddendVect SimpVect;
> +
> +  // The outer loop works on one symbolic-value at a time. Suppose the input
> +  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
> +  // The symbolic-values will be processed in this order: x, y, z.
> +  //
> +  for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
> +
> +    const FAddend *ThisAddend = Addends[SymIdx];
> +    if (!ThisAddend) {
> +      // This addend was processed before.
> +      continue;
> +    }
> +
> +    Value *Val = ThisAddend->getSymVal();
> +    unsigned StartIdx = SimpVect.size();
> +    SimpVect.push_back(ThisAddend);
> +
> +    // The inner loop collects addends sharing same symbolic-value, and these
> +    // addends will be later on folded into a single addend. Following above
> +    // example, if the symbolic value "y" is being processed, the inner loop
> +    // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
> +    // be later on folded into "<b1+b2, y>".
> +    //
> +    for (unsigned SameSymIdx = SymIdx + 1;
> +         SameSymIdx < AddendNum; SameSymIdx++) {
> +      const FAddend *T = Addends[SameSymIdx];
> +      if (T && T->getSymVal() == Val) {
> +        // Set null such that next iteration of the outer loop will not process
> +        // this addend again.
> +        Addends[SameSymIdx] = 0;
> +        SimpVect.push_back(T);
> +      }
> +    }
> +
> +    // If multiple addends share same symbolic value, fold them together.
> +    if (StartIdx + 1 != SimpVect.size()) {
> +      FAddend &R = TmpResult[NextTmpIdx ++];
> +      R = *SimpVect[StartIdx];
> +      for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
> +        R += *SimpVect[Idx];
> +
> +      // Pop all addends being folded and push the resulting folded addend.
> +      SimpVect.resize(StartIdx);
> +      if (Val != 0) {
> +        if (!R.isZero()) {
> +          SimpVect.push_back(&R);
> +        }
> +      } else {
> +        // Don't push constant addend at this time. It will be the last element
> +        // of <SimpVect>.
> +        ConstAdd = &R;
> +      }
> +    }
> +  }

This loop seems overly complicated for what it's actually doing... are
you sure you can't simplify it any?

> +  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
> +         "out-of-bound access");

llvm::array_lengthof?

-Eli