[llvm] r277151 - [Hexagon] Improve balancing of address calculation

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 29 08:15:36 PDT 2016


Author: kparzysz
Date: Fri Jul 29 10:15:35 2016
New Revision: 277151

URL: http://llvm.org/viewvc/llvm-project?rev=277151&view=rev
Log:
[Hexagon] Improve balancing of address calculation

Rebalances address calculation trees and applies Hexagon-specific
optimizations to the trees to improve instruction selection.

Patch by Tobias Edler von Koch.

Added:
    llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp?rev=277151&r1=277150&r2=277151&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp Fri Jul 29 10:15:35 2016
@@ -32,6 +32,24 @@ MaxNumOfUsesForConstExtenders("ga-max-nu
   cl::desc("Maximum number of uses of a global address such that we still us a"
            "constant extended instruction"));
 
+static
+cl::opt<bool>
+EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
+  cl::desc("Rebalance address calculation trees to improve "
+          "instruction selection"));
+
+// Rebalance only if this allows e.g. combining a GA with an offset or
+// factoring out a shift.
+static
+cl::opt<bool>
+RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
+  cl::desc("Rebalance address tree only if this allows optimizations"));
+
+static
+cl::opt<bool>
+RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
+  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
+
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
 //===----------------------------------------------------------------------===//
@@ -42,14 +60,13 @@ MaxNumOfUsesForConstExtenders("ga-max-nu
 ///
 namespace {
 class HexagonDAGToDAGISel : public SelectionDAGISel {
-  const HexagonTargetMachine &HTM;
   const HexagonSubtarget *HST;
   const HexagonInstrInfo *HII;
   const HexagonRegisterInfo *HRI;
 public:
   explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
                                CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+      : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr),
         HRI(nullptr) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override {
@@ -92,7 +109,6 @@ public:
                                     std::vector<SDValue> &OutOps) override;
   bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
   void SelectLoad(SDNode *N);
-  void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
   void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
   void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
   void SelectStore(SDNode *N);
@@ -179,6 +195,17 @@ private:
   bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
   bool orIsAdd(const SDNode *N) const;
   bool isAlignedMemNode(const MemSDNode *N) const;
+
+  SmallDenseMap<SDNode *,int> RootWeights;
+  SmallDenseMap<SDNode *,int> RootHeights;
+  SmallDenseMap<const Value *,int> GAUsesInFunction;
+  int getWeight(SDNode *N);
+  int getHeight(SDNode *N);
+  SDValue getMultiplierForSHL(SDNode *N);
+  SDValue factorOutPowerOf2(SDValue V, unsigned Power);
+  unsigned getUsesInFunction(const Value *V);
+  SDValue balanceSubTree(SDNode *N, bool Factorize = false);
+  void rebalanceAddressTrees();
 }; // end HexagonDAGToDAGISel
 }  // end anonymous namespace
 
@@ -1373,6 +1400,16 @@ void HexagonDAGToDAGISel::PreprocessISel
     SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
     ReplaceNode(T0.getNode(), NewShl.getNode());
   }
+
+  if (EnableAddressRebalancing) {
+    rebalanceAddressTrees();
+
+    DEBUG(
+      dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
+      CurDAG->dump();
+      dbgs() << "************* End SelectionDAG after preprocessing ********\n";
+    );
+  }
 }
 
 void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
@@ -1540,3 +1577,701 @@ bool HexagonDAGToDAGISel::orIsAdd(const
 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
   return N->getAlignment() >= N->getMemoryVT().getStoreSize();
 }
+
+////////////////////////////////////////////////////////////////////////////////
+// Rebalancing of address calculation trees
+
+static bool isOpcodeHandled(const SDNode *N) {
+  switch (N->getOpcode()) {
+    case ISD::ADD:
+    case ISD::MUL:
+      return true;
+    case ISD::SHL:
+      // We only handle constant shifts because these can be easily flattened
+      // into multiplications by 2^Op1.
+      return isa<ConstantSDNode>(N->getOperand(1).getNode());
+    default:
+      return false;
+  }
+}
+
+/// \brief Return the weight of an SDNode
+int HexagonDAGToDAGISel::getWeight(SDNode *N) {
+  if (!isOpcodeHandled(N))
+    return 1;
+  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
+  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
+  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
+  return RootWeights[N];
+}
+
+int HexagonDAGToDAGISel::getHeight(SDNode *N) {
+  if (!isOpcodeHandled(N))
+    return 0;
+  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
+      "Cannot query height of unvisited/RAUW'd node!");
+  return RootHeights[N];
+}
+
+struct WeightedLeaf {
+  SDValue Value;
+  int Weight;
+  int InsertionOrder;
+
+  WeightedLeaf() : Value(SDValue()) { }
+
+  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
+    Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
+    assert(Weight >= 0 && "Weight must be >= 0");
+  }
+
+  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
+    assert(A.Value.getNode() && B.Value.getNode());
+    return A.Weight == B.Weight ?
+            (A.InsertionOrder > B.InsertionOrder) :
+            (A.Weight > B.Weight);
+  }
+};
+
+/// A specialized priority queue for WeigthedLeaves. It automatically folds
+/// constants and allows removal of non-top elements while maintaining the
+/// priority order.
+class LeafPrioQueue {
+  SmallVector<WeightedLeaf, 8> Q;
+  bool HaveConst;
+  WeightedLeaf ConstElt;
+  unsigned Opcode;
+
+public:
+  bool empty() {
+    return (!HaveConst && Q.empty());
+  }
+
+  size_t size() {
+    return Q.size() + HaveConst;
+  }
+
+  bool hasConst() {
+    return HaveConst;
+  }
+
+  const WeightedLeaf &top() {
+    if (HaveConst)
+      return ConstElt;
+    return Q.front();
+  }
+
+  WeightedLeaf pop() {
+    if (HaveConst) {
+      HaveConst = false;
+      return ConstElt;
+    }
+    std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+    return Q.pop_back_val();
+  }
+
+  void push(WeightedLeaf L, bool SeparateConst=true) {
+    if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
+      if (Opcode == ISD::MUL &&
+          cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
+        return;
+      if (Opcode == ISD::ADD &&
+          cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
+        return;
+
+      HaveConst = true;
+      ConstElt = L;
+    } else {
+      Q.push_back(L);
+      std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+    }
+  }
+
+  /// Push L to the bottom of the queue regardless of its weight. If L is
+  /// constant, it will not be folded with other constants in the queue.
+  void pushToBottom(WeightedLeaf L) {
+    L.Weight = 1000;
+    push(L, false);
+  }
+
+  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
+  /// lowest weight and remove it from the queue.
+  WeightedLeaf findSHL(uint64_t MaxAmount);
+
+  WeightedLeaf findMULbyConst();
+
+  LeafPrioQueue(unsigned Opcode) :
+    HaveConst(false), Opcode(Opcode) { }
+};
+
+WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
+  int ResultPos;
+  WeightedLeaf Result;
+
+  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
+    const WeightedLeaf &L = Q[Pos];
+    const SDValue &Val = L.Value;
+    if (Val.getOpcode() != ISD::SHL ||
+        !isa<ConstantSDNode>(Val.getOperand(1)) ||
+        Val.getConstantOperandVal(1) > MaxAmount)
+      continue;
+    if (!Result.Value.getNode() || Result.Weight > L.Weight ||
+        (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
+    {
+      Result = L;
+      ResultPos = Pos;
+    }
+  }
+
+  if (Result.Value.getNode()) {
+    Q.erase(&Q[ResultPos]);
+    std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+  }
+
+  return Result;
+}
+
+WeightedLeaf LeafPrioQueue::findMULbyConst() {
+  int ResultPos;
+  WeightedLeaf Result;
+
+  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
+    const WeightedLeaf &L = Q[Pos];
+    const SDValue &Val = L.Value;
+    if (Val.getOpcode() != ISD::MUL ||
+        !isa<ConstantSDNode>(Val.getOperand(1)) ||
+        Val.getConstantOperandVal(1) > 127)
+      continue;
+    if (!Result.Value.getNode() || Result.Weight > L.Weight ||
+        (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
+    {
+      Result = L;
+      ResultPos = Pos;
+    }
+  }
+
+  if (Result.Value.getNode()) {
+    Q.erase(&Q[ResultPos]);
+    std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+  }
+
+  return Result;
+}
+
+SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
+  uint64_t MulFactor = 1 << N->getConstantOperandVal(1);
+  return CurDAG->getConstant(MulFactor, SDLoc(N),
+                             N->getOperand(1).getValueType());
+}
+
+/// @returns the value x for which 2^x is a factor of Val
+static unsigned getPowerOf2Factor(SDValue Val) {
+  if (Val.getOpcode() == ISD::MUL) {
+    unsigned MaxFactor = 0;
+    for (int i=0; i < 2; ++i) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
+      if (!C)
+        continue;
+      const APInt &CInt = C->getAPIntValue();
+      if (CInt.getBoolValue())
+        MaxFactor = CInt.countTrailingZeros();
+    }
+    return MaxFactor;
+  }
+  if (Val.getOpcode() == ISD::SHL) {
+    if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
+      return 0;
+    return (unsigned) Val.getConstantOperandVal(1);
+  }
+
+  return 0;
+}
+
+/// @returns true if V>>Amount will eliminate V's operation on its child
+static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
+  if (V.getOpcode() == ISD::MUL) {
+    SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
+    for (int i=0; i < 2; ++i)
+      if (isa<ConstantSDNode>(Ops[i].getNode()) &&
+          V.getConstantOperandVal(i) % ((uint64_t)1 << Amount) == 0) {
+        uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
+        return (NewConst == 1);
+      }
+  } else if (V.getOpcode() == ISD::SHL) {
+    return (Amount == V.getConstantOperandVal(1));
+  }
+
+  return false;
+}
+
+SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
+  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
+  if (V.getOpcode() == ISD::MUL) {
+    for (int i=0; i < 2; ++i) {
+      if (isa<ConstantSDNode>(Ops[i].getNode()) &&
+          V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
+        uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
+        if (NewConst == 1)
+          return Ops[!i];
+        Ops[i] = CurDAG->getConstant(NewConst,
+                                     SDLoc(V), V.getValueType());
+        break;
+      }
+    }
+  } else if (V.getOpcode() == ISD::SHL) {
+    uint64_t ShiftAmount = V.getConstantOperandVal(1);
+    if (ShiftAmount == Power)
+      return Ops[0];
+    Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
+                                 SDLoc(V), V.getValueType());
+  }
+
+  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
+}
+
+static bool isTargetConstant(const SDValue &V) {
+  return V.getOpcode() == HexagonISD::CONST32 ||
+         V.getOpcode() == HexagonISD::CONST32_GP;
+}
+
+unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
+  if (GAUsesInFunction.count(V))
+    return GAUsesInFunction[V];
+
+  unsigned Result = 0;
+  const Function *CurF = CurDAG->getMachineFunction().getFunction();
+  for (const User *U : V->users()) {
+    if (isa<Instruction>(U) &&
+        cast<Instruction>(U)->getParent()->getParent() == CurF)
+      ++Result;
+  }
+
+  GAUsesInFunction[V] = Result;
+
+  return Result;
+}
+
+/// Note - After calling this, N may be dead. It may have been replaced by a
+/// new node, so always use the returned value in place of N.
+///
+/// @returns The SDValue taking the place of N (which could be N if it is
+/// unchanged)
+SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
+  assert(RootWeights.count(N) && "Cannot balance non-root node.");
+  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
+  assert(!TopLevel || N->getOpcode() == ISD::ADD);
+
+  // Return early if this node was already visited
+  if (RootWeights[N] != -1)
+    return SDValue(N, 0);
+
+  assert(isOpcodeHandled(N));
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // Return early if the operands will remain unchanged or are all roots
+  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
+      (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
+    SDNode *Op0N = Op0.getNode();
+    int Weight;
+    if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
+      Weight = getWeight(balanceSubTree(Op0N).getNode());
+      // Weight = calculateWeight(Op0N);
+    } else
+      Weight = getWeight(Op0N);
+
+    SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
+    if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
+      Weight += getWeight(balanceSubTree(Op1N).getNode());
+      // Weight += calculateWeight(Op1N);
+    } else
+      Weight += getWeight(Op1N);
+
+    RootWeights[N] = Weight;
+    RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
+                              getHeight(N->getOperand(1).getNode())) + 1;
+
+    DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
+                 << " Height=" << RootHeights[N] << "): ");
+    DEBUG(N->dump());
+
+    return SDValue(N, 0);
+  }
+
+  DEBUG(dbgs() << "** Balancing root node: ");
+  DEBUG(N->dump());
+
+  unsigned NOpcode = N->getOpcode();
+
+  LeafPrioQueue Leaves(NOpcode);
+  SmallVector<SDValue, 4> Worklist;
+  Worklist.push_back(SDValue(N, 0));
+
+  // SHL nodes will be converted to MUL nodes
+  if (NOpcode == ISD::SHL)
+    NOpcode = ISD::MUL;
+
+  bool CanFactorize = false;
+  WeightedLeaf Mul1, Mul2;
+  unsigned MaxPowerOf2 = 0;
+  WeightedLeaf GA;
+
+  // Do not try to factor out a shift if there is already a shift at the tip of
+  // the tree.
+  bool HaveTopLevelShift = false;
+  if (TopLevel &&
+      ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
+                        Op0.getConstantOperandVal(1) < 4) ||
+       (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
+                        Op1.getConstantOperandVal(1) < 4)))
+    HaveTopLevelShift = true;
+
+  // Flatten the subtree into an ordered list of leaves; at the same time
+  // determine whether the tree is already balanced.
+  int InsertionOrder = 0;
+  SmallDenseMap<SDValue, int> NodeHeights;
+  bool Imbalanced = false;
+  int CurrentWeight = 0;
+  while (!Worklist.empty()) {
+    SDValue Child = Worklist.pop_back_val();
+
+    if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
+      // CASE 1: Child is a root note
+
+      int Weight = RootWeights[Child.getNode()];
+      if (Weight == -1) {
+        Child = balanceSubTree(Child.getNode());
+        // calculateWeight(Child.getNode());
+        Weight = getWeight(Child.getNode());
+      } else if (Weight == -2) {
+        // Whoops, this node was RAUWd by one of the balanceSubTree calls we
+        // made. Our worklist isn't up to date anymore.
+        // Restart the whole process.
+        DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+        return balanceSubTree(N, TopLevel);
+      }
+
+      NodeHeights[Child] = 1;
+      CurrentWeight += Weight;
+
+      unsigned PowerOf2;
+      if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
+          (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
+          Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
+        // Try to identify two factorizable MUL/SHL children greedily. Leave
+        // them out of the priority queue for now so we can deal with them
+        // after.
+        if (!Mul1.Value.getNode()) {
+          Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
+          MaxPowerOf2 = PowerOf2;
+        } else {
+          Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
+          MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
+
+          // Our addressing modes can only shift by a maximum of 3
+          if (MaxPowerOf2 > 3)
+            MaxPowerOf2 = 3;
+
+          CanFactorize = true;
+        }
+      } else
+        Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
+    } else if (!isOpcodeHandled(Child.getNode())) {
+      // CASE 2: Child is an unhandled kind of node (e.g. constant)
+      int Weight = getWeight(Child.getNode());
+
+      NodeHeights[Child] = getHeight(Child.getNode());
+      CurrentWeight += Weight;
+
+      if (isTargetConstant(Child) && !GA.Value.getNode())
+        GA = WeightedLeaf(Child, Weight, InsertionOrder++);
+      else
+        Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
+    } else {
+      // CASE 3: Child is a subtree of same opcode
+      // Visit children first, then flatten.
+      unsigned ChildOpcode = Child.getOpcode();
+      assert(ChildOpcode == NOpcode ||
+             (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
+
+      // Convert SHL to MUL
+      SDValue Op1;
+      if (ChildOpcode == ISD::SHL)
+        Op1 = getMultiplierForSHL(Child.getNode());
+      else
+        Op1 = Child->getOperand(1);
+
+      if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
+        assert(!NodeHeights.count(Child) && "Parent visited before children?");
+        // Visit children first, then re-visit this node
+        Worklist.push_back(Child);
+        Worklist.push_back(Op1);
+        Worklist.push_back(Child->getOperand(0));
+      } else {
+        // Back at this node after visiting the children
+        if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
+          Imbalanced = true;
+
+        NodeHeights[Child] = std::max(NodeHeights[Op1],
+                                      NodeHeights[Child->getOperand(0)]) + 1;
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
+               << " weight=" << CurrentWeight << " imbalanced="
+               << Imbalanced << "\n");
+
+  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
+  //  This factors out a shift in order to match memw(a<<Y+b).
+  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
+                       willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
+    DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
+    int Weight = Mul1.Weight + Mul2.Weight;
+    int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
+    SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
+    SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
+    SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
+                                  Mul1Factored, Mul2Factored);
+    SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
+                                        Mul1.Value.getValueType());
+    SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
+                                  Sum, Const);
+    NodeHeights[New] = Height;
+    Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
+  } else if (Mul1.Value.getNode()) {
+    // We failed to factorize two MULs, so now the Muls are left outside the
+    // queue... add them back.
+    Leaves.push(Mul1);
+    if (Mul2.Value.getNode())
+      Leaves.push(Mul2);
+    CanFactorize = false;
+  }
+
+  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
+  // and the root node itself is not used more than twice. This reduces the
+  // amount of additional constant extenders introduced by this optimization.
+  bool CombinedGA = false;
+  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
+      GA.Value.hasOneUse() && N->use_size() < 3) {
+    GlobalAddressSDNode *GANode =
+      cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
+    ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
+
+    if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
+        getTargetLowering()->isOffsetFoldingLegal(GANode)) {
+      DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
+          << "): ");
+      DEBUG(GANode->dump());
+
+      SDValue NewTGA =
+        CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
+            GANode->getValueType(0),
+            GANode->getOffset() + (uint64_t)Offset->getSExtValue());
+      GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
+          GA.Value.getValueType(), NewTGA);
+      GA.Weight += Leaves.top().Weight;
+
+      NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
+      CombinedGA = true;
+
+      Leaves.pop(); // Remove the offset constant from the queue
+    }
+  }
+
+  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
+      (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
+    RootWeights[N] = CurrentWeight;
+    RootHeights[N] = NodeHeights[SDValue(N, 0)];
+
+    return SDValue(N, 0);
+  }
+
+  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
+  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
+    WeightedLeaf SHL = Leaves.findSHL(31);
+    if (SHL.Value.getNode()) {
+      int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
+      GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
+                                 GA.Value.getValueType(),
+                                 GA.Value, SHL.Value);
+      GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
+      NodeHeights[GA.Value] = Height;
+    }
+  }
+
+  if (GA.Value.getNode())
+    Leaves.push(GA);
+
+  // If this is the top level and we haven't factored out a shift, we should try
+  // to move a constant to the bottom to match addressing modes like memw(rX+C)
+  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
+    DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
+    Leaves.pushToBottom(Leaves.pop());
+  }
+
+  // Rebuild the tree using Huffman's algorithm
+  while (Leaves.size() > 1) {
+    WeightedLeaf L0 = Leaves.pop();
+
+    // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
+    // otherwise just get the next leaf
+    WeightedLeaf L1 = Leaves.findMULbyConst();
+    if (!L1.Value.getNode())
+      L1 = Leaves.pop();
+
+    assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
+
+    SDValue V0 = L0.Value;
+    int V0Weight = L0.Weight;
+    SDValue V1 = L1.Value;
+    int V1Weight = L1.Weight;
+
+    // Make sure that none of these nodes have been RAUW'd
+    if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
+        (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
+      DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+      return balanceSubTree(N, TopLevel);
+    }
+
+    ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
+    ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
+    EVT VT = N->getValueType(0);
+    SDValue NewNode;
+
+    if (V0C && !V1C) {
+      std::swap(V0, V1);
+      std::swap(V0C, V1C);
+    }
+
+    // Calculate height of this node
+    assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
+           "Children must have been visited before re-combining them!");
+    int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
+
+    // Rebuild this node (and restore SHL from MUL if needed)
+    if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
+      NewNode = CurDAG->getNode(
+          ISD::SHL, SDLoc(V0), VT, V0,
+          CurDAG->getConstant(
+              V1C->getAPIntValue().logBase2(), SDLoc(N),
+              getTargetLowering()->getScalarShiftAmountTy(CurDAG->getDataLayout(), V0.getValueType())));
+    else
+      NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
+
+    NodeHeights[NewNode] = Height;
+
+    int Weight = V0Weight + V1Weight;
+    Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
+
+    DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
+                 << Height << "):\n");
+    DEBUG(NewNode.dump());
+  }
+
+  assert(Leaves.size() == 1);
+  SDValue NewRoot = Leaves.top().Value;
+
+  assert(NodeHeights.count(NewRoot));
+  int Height = NodeHeights[NewRoot];
+
+  // Restore SHL if we earlier converted it to a MUL
+  if (NewRoot.getOpcode() == ISD::MUL) {
+    ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
+    if (V1C && V1C->getAPIntValue().isPowerOf2()) {
+      EVT VT = NewRoot.getValueType();
+      SDValue V0 = NewRoot.getOperand(0);
+      NewRoot = CurDAG->getNode(
+          ISD::SHL, SDLoc(NewRoot), VT, V0,
+          CurDAG->getConstant(V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
+                              getTargetLowering()->getScalarShiftAmountTy(
+                                  CurDAG->getDataLayout(), V0.getValueType())));
+    }
+  }
+
+  if (N != NewRoot.getNode()) {
+    DEBUG(dbgs() << "--> Root is now: ");
+    DEBUG(NewRoot.dump());
+
+    // Replace all uses of old root by new root
+    CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
+    // Mark that we have RAUW'd N
+    RootWeights[N] = -2;
+  } else {
+    DEBUG(dbgs() << "--> Root unchanged.\n");
+  }
+
+  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
+  RootHeights[NewRoot.getNode()] = Height;
+
+  return NewRoot;
+}
+
+void HexagonDAGToDAGISel::rebalanceAddressTrees() {
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E;) {
+    SDNode *N = &*I++;
+    if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
+      continue;
+
+    SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
+    if (BasePtr.getOpcode() != ISD::ADD)
+      continue;
+
+    // We've already processed this node
+    if (RootWeights.count(BasePtr.getNode()))
+      continue;
+
+    DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
+    DEBUG(N->dump());
+
+    // FindRoots
+    SmallVector<SDNode *, 4> Worklist;
+
+    Worklist.push_back(BasePtr.getOperand(0).getNode());
+    Worklist.push_back(BasePtr.getOperand(1).getNode());
+
+    while (!Worklist.empty()) {
+      SDNode *N = Worklist.pop_back_val();
+      unsigned Opcode = N->getOpcode();
+
+      if (!isOpcodeHandled(N))
+        continue;
+
+      Worklist.push_back(N->getOperand(0).getNode());
+      Worklist.push_back(N->getOperand(1).getNode());
+
+      // Not a root if it has only one use and same opcode as its parent
+      if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
+        continue;
+
+      // This root node has already been processed
+      if (RootWeights.count(N))
+        continue;
+
+      RootWeights[N] = -1;
+    }
+
+    // Balance node itself
+    RootWeights[BasePtr.getNode()] = -1;
+    SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
+
+    if (N->getOpcode() == ISD::LOAD)
+      N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+            NewBasePtr, N->getOperand(2));
+    else
+      N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+            NewBasePtr, N->getOperand(3));
+
+    DEBUG(dbgs() << "--> Final node: ");
+    DEBUG(N->dump());
+  }
+
+  CurDAG->RemoveDeadNodes();
+  GAUsesInFunction.clear();
+  RootHeights.clear();
+  RootWeights.clear();
+}
+
+

Added: llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll?rev=277151&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll Fri Jul 29 10:15:35 2016
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Test whether we can produce minimal code for this complex address
+; calculation.
+;
+
+; CHECK: r0 = memub(r{{[0-9]+}}<<#3{{ *}}+{{ *}}##the_global+516)
+
+%0 = type { [3 x %1] }
+%1 = type { %2, i8, i8, i8, i8, i8, [4 x i8], i8, [10 x i8], [10 x i8], [10 x i8], i8, [3 x %4], i16, i16, i16, i16, i32, i8, [4 x i8], i8, i8, i8, i8, %5, i8, i8, i8, i8, i8, i16, i8, i8, i8, i16, i16, i8, i8, [2 x i8], [2 x i8], i8, i8, i8, i8, i8, i16, i16, i8, i8, i8, i8, i8, i8, %9, i8, [6 x [2 x i8]], i16, i32, %10, [28 x i8], [4 x %17] }
+%2 = type { %3 }
+%3 = type { i8, i8, i8, i8, i8, i16, i16, i16, i16, i16 }
+%4 = type { i16, i16 }
+%5 = type { [10 x %6] }
+%6 = type { [2 x %7] }
+%7 = type { i8, [2 x %8] }
+%8 = type { [4 x i8] }
+%9 = type { i8 }
+%10 = type { %11, %13 }
+%11 = type { [2 x [2 x i8]], [2 x [8 x %12]], [6 x i16], [6 x i16] }
+%12 = type { i8, i8 }
+%13 = type { [4 x %12], [4 x %12], [2 x [4 x %14]], [6 x i16] }
+%14 = type { %15, %16 }
+%15 = type { i8, i8 }
+%16 = type { i8, i8 }
+%17 = type { i8, i8, %1*, i16, i16, i16, i64, i32, i32, %18, i8, %21, i8, [2 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i16, i16, [2 x i16], i16, [2 x i32], [2 x i16], [2 x i16], i8, i8, [6 x %23], i8, i8, i8, %24, %25, %26, %28 }
+%18 = type { %19, [10 x %20] }
+%19 = type { i32 }
+%20 = type { [2 x i8], [2 x i8], i8, i8, i8, i8 }
+%21 = type { i8, i8, i8, [8 x %22] }
+%22 = type { i8, i8, i8, i32 }
+%23 = type { i32, i16, i16, [2 x i16], [2 x i16], [2 x i16], i32 }
+%24 = type { [2 x i32], [2 x i64*], [2 x i64*], [2 x i64*], [2 x i32], [2 x i32], i32 }
+%25 = type { [2 x i32], [2 x i32], [2 x i32] }
+%26 = type { i8, i8, i8, i16, i16, %27, i32, i32, i32, i16 }
+%27 = type { i64 }
+%28 = type { %29, %31, [24 x i8] }
+%29 = type { [2 x %30], [16 x i32] }
+%30 = type { [16 x i32], [8 x i32], [16 x i32], [64 x i32], [2 x i32], i64, i32, i32, i32, i32 }
+%31 = type { [2 x %32] }
+%32 = type { [4 x %33], i32 }
+%33 = type { i32, i32 }
+
+ at the_global = external global %0
+
+; Function Attrs: nounwind optsize readonly ssp
+define zeroext i8 @myFun(i8 zeroext, i8 zeroext) {
+  %3 = zext i8 %1 to i32
+  %4 = zext i8 %0 to i32
+  %5 = getelementptr inbounds %0, %0* @the_global, i32 0, i32 0, i32 %4, i32 60, i32 0, i32 9, i32 1, i32 %3, i32 0, i32 0
+  %6 = load i8, i8* %5, align 4
+  ret i8 %6
+}
+




More information about the llvm-commits mailing list