[llvm] r277151 - [Hexagon] Improve balancing of address calculation
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 29 08:15:36 PDT 2016
Author: kparzysz
Date: Fri Jul 29 10:15:35 2016
New Revision: 277151
URL: http://llvm.org/viewvc/llvm-project?rev=277151&view=rev
Log:
[Hexagon] Improve balancing of address calculation
Rebalances address calculation trees and applies Hexagon-specific
optimizations to the trees to improve instruction selection.
Patch by Tobias Edler von Koch.
Added:
llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp?rev=277151&r1=277150&r2=277151&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp Fri Jul 29 10:15:35 2016
@@ -32,6 +32,24 @@ MaxNumOfUsesForConstExtenders("ga-max-nu
cl::desc("Maximum number of uses of a global address such that we still us a"
"constant extended instruction"));
+static
+cl::opt<bool>
+EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
+ cl::desc("Rebalance address calculation trees to improve "
+ "instruction selection"));
+
+// Rebalance only if this allows e.g. combining a GA with an offset or
+// factoring out a shift.
+static
+cl::opt<bool>
+RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
+ cl::desc("Rebalance address tree only if this allows optimizations"));
+
+static
+cl::opt<bool>
+RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
+ cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
+
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
@@ -42,14 +60,13 @@ MaxNumOfUsesForConstExtenders("ga-max-nu
///
namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
- const HexagonTargetMachine &HTM;
const HexagonSubtarget *HST;
const HexagonInstrInfo *HII;
const HexagonRegisterInfo *HRI;
public:
explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+ : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr),
HRI(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -92,7 +109,6 @@ public:
std::vector<SDValue> &OutOps) override;
bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
void SelectLoad(SDNode *N);
- void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
void SelectStore(SDNode *N);
@@ -179,6 +195,17 @@ private:
bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
bool orIsAdd(const SDNode *N) const;
bool isAlignedMemNode(const MemSDNode *N) const;
+
+ SmallDenseMap<SDNode *,int> RootWeights;
+ SmallDenseMap<SDNode *,int> RootHeights;
+ SmallDenseMap<const Value *,int> GAUsesInFunction;
+ int getWeight(SDNode *N);
+ int getHeight(SDNode *N);
+ SDValue getMultiplierForSHL(SDNode *N);
+ SDValue factorOutPowerOf2(SDValue V, unsigned Power);
+ unsigned getUsesInFunction(const Value *V);
+ SDValue balanceSubTree(SDNode *N, bool Factorize = false);
+ void rebalanceAddressTrees();
}; // end HexagonDAGToDAGISel
} // end anonymous namespace
@@ -1373,6 +1400,16 @@ void HexagonDAGToDAGISel::PreprocessISel
SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
ReplaceNode(T0.getNode(), NewShl.getNode());
}
+
+ if (EnableAddressRebalancing) {
+ rebalanceAddressTrees();
+
+ DEBUG(
+ dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
+ CurDAG->dump();
+ dbgs() << "************* End SelectionDAG after preprocessing ********\n";
+ );
+ }
}
void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
@@ -1540,3 +1577,701 @@ bool HexagonDAGToDAGISel::orIsAdd(const
bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
return N->getAlignment() >= N->getMemoryVT().getStoreSize();
}
+
+////////////////////////////////////////////////////////////////////////////////
+// Rebalancing of address calculation trees
+
+static bool isOpcodeHandled(const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::MUL:
+ return true;
+ case ISD::SHL:
+ // We only handle constant shifts because these can be easily flattened
+ // into multiplications by 2^Op1.
+ return isa<ConstantSDNode>(N->getOperand(1).getNode());
+ default:
+ return false;
+ }
+}
+
+/// \brief Return the weight of an SDNode
+int HexagonDAGToDAGISel::getWeight(SDNode *N) {
+ if (!isOpcodeHandled(N))
+ return 1;
+ assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
+ assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
+ assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
+ return RootWeights[N];
+}
+
+int HexagonDAGToDAGISel::getHeight(SDNode *N) {
+ if (!isOpcodeHandled(N))
+ return 0;
+ assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
+ "Cannot query height of unvisited/RAUW'd node!");
+ return RootHeights[N];
+}
+
+struct WeightedLeaf {
+ SDValue Value;
+ int Weight;
+ int InsertionOrder;
+
+ WeightedLeaf() : Value(SDValue()) { }
+
+ WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
+ Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
+ assert(Weight >= 0 && "Weight must be >= 0");
+ }
+
+ static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
+ assert(A.Value.getNode() && B.Value.getNode());
+ return A.Weight == B.Weight ?
+ (A.InsertionOrder > B.InsertionOrder) :
+ (A.Weight > B.Weight);
+ }
+};
+
+/// A specialized priority queue for WeigthedLeaves. It automatically folds
+/// constants and allows removal of non-top elements while maintaining the
+/// priority order.
+class LeafPrioQueue {
+ SmallVector<WeightedLeaf, 8> Q;
+ bool HaveConst;
+ WeightedLeaf ConstElt;
+ unsigned Opcode;
+
+public:
+ bool empty() {
+ return (!HaveConst && Q.empty());
+ }
+
+ size_t size() {
+ return Q.size() + HaveConst;
+ }
+
+ bool hasConst() {
+ return HaveConst;
+ }
+
+ const WeightedLeaf &top() {
+ if (HaveConst)
+ return ConstElt;
+ return Q.front();
+ }
+
+ WeightedLeaf pop() {
+ if (HaveConst) {
+ HaveConst = false;
+ return ConstElt;
+ }
+ std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+ return Q.pop_back_val();
+ }
+
+ void push(WeightedLeaf L, bool SeparateConst=true) {
+ if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
+ if (Opcode == ISD::MUL &&
+ cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
+ return;
+ if (Opcode == ISD::ADD &&
+ cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
+ return;
+
+ HaveConst = true;
+ ConstElt = L;
+ } else {
+ Q.push_back(L);
+ std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+ }
+ }
+
+ /// Push L to the bottom of the queue regardless of its weight. If L is
+ /// constant, it will not be folded with other constants in the queue.
+ void pushToBottom(WeightedLeaf L) {
+ L.Weight = 1000;
+ push(L, false);
+ }
+
+ /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
+ /// lowest weight and remove it from the queue.
+ WeightedLeaf findSHL(uint64_t MaxAmount);
+
+ WeightedLeaf findMULbyConst();
+
+ LeafPrioQueue(unsigned Opcode) :
+ HaveConst(false), Opcode(Opcode) { }
+};
+
+WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
+ int ResultPos;
+ WeightedLeaf Result;
+
+ for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
+ const WeightedLeaf &L = Q[Pos];
+ const SDValue &Val = L.Value;
+ if (Val.getOpcode() != ISD::SHL ||
+ !isa<ConstantSDNode>(Val.getOperand(1)) ||
+ Val.getConstantOperandVal(1) > MaxAmount)
+ continue;
+ if (!Result.Value.getNode() || Result.Weight > L.Weight ||
+ (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
+ {
+ Result = L;
+ ResultPos = Pos;
+ }
+ }
+
+ if (Result.Value.getNode()) {
+ Q.erase(&Q[ResultPos]);
+ std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+ }
+
+ return Result;
+}
+
+WeightedLeaf LeafPrioQueue::findMULbyConst() {
+ int ResultPos;
+ WeightedLeaf Result;
+
+ for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
+ const WeightedLeaf &L = Q[Pos];
+ const SDValue &Val = L.Value;
+ if (Val.getOpcode() != ISD::MUL ||
+ !isa<ConstantSDNode>(Val.getOperand(1)) ||
+ Val.getConstantOperandVal(1) > 127)
+ continue;
+ if (!Result.Value.getNode() || Result.Weight > L.Weight ||
+ (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
+ {
+ Result = L;
+ ResultPos = Pos;
+ }
+ }
+
+ if (Result.Value.getNode()) {
+ Q.erase(&Q[ResultPos]);
+ std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+ }
+
+ return Result;
+}
+
+SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
+ uint64_t MulFactor = 1 << N->getConstantOperandVal(1);
+ return CurDAG->getConstant(MulFactor, SDLoc(N),
+ N->getOperand(1).getValueType());
+}
+
+/// @returns the value x for which 2^x is a factor of Val
+static unsigned getPowerOf2Factor(SDValue Val) {
+ if (Val.getOpcode() == ISD::MUL) {
+ unsigned MaxFactor = 0;
+ for (int i=0; i < 2; ++i) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
+ if (!C)
+ continue;
+ const APInt &CInt = C->getAPIntValue();
+ if (CInt.getBoolValue())
+ MaxFactor = CInt.countTrailingZeros();
+ }
+ return MaxFactor;
+ }
+ if (Val.getOpcode() == ISD::SHL) {
+ if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
+ return 0;
+ return (unsigned) Val.getConstantOperandVal(1);
+ }
+
+ return 0;
+}
+
+/// @returns true if V>>Amount will eliminate V's operation on its child
+static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
+ if (V.getOpcode() == ISD::MUL) {
+ SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
+ for (int i=0; i < 2; ++i)
+ if (isa<ConstantSDNode>(Ops[i].getNode()) &&
+ V.getConstantOperandVal(i) % ((uint64_t)1 << Amount) == 0) {
+ uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
+ return (NewConst == 1);
+ }
+ } else if (V.getOpcode() == ISD::SHL) {
+ return (Amount == V.getConstantOperandVal(1));
+ }
+
+ return false;
+}
+
+SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
+ SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
+ if (V.getOpcode() == ISD::MUL) {
+ for (int i=0; i < 2; ++i) {
+ if (isa<ConstantSDNode>(Ops[i].getNode()) &&
+ V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
+ uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
+ if (NewConst == 1)
+ return Ops[!i];
+ Ops[i] = CurDAG->getConstant(NewConst,
+ SDLoc(V), V.getValueType());
+ break;
+ }
+ }
+ } else if (V.getOpcode() == ISD::SHL) {
+ uint64_t ShiftAmount = V.getConstantOperandVal(1);
+ if (ShiftAmount == Power)
+ return Ops[0];
+ Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
+ SDLoc(V), V.getValueType());
+ }
+
+ return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
+}
+
+static bool isTargetConstant(const SDValue &V) {
+ return V.getOpcode() == HexagonISD::CONST32 ||
+ V.getOpcode() == HexagonISD::CONST32_GP;
+}
+
+unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
+ if (GAUsesInFunction.count(V))
+ return GAUsesInFunction[V];
+
+ unsigned Result = 0;
+ const Function *CurF = CurDAG->getMachineFunction().getFunction();
+ for (const User *U : V->users()) {
+ if (isa<Instruction>(U) &&
+ cast<Instruction>(U)->getParent()->getParent() == CurF)
+ ++Result;
+ }
+
+ GAUsesInFunction[V] = Result;
+
+ return Result;
+}
+
+/// Note - After calling this, N may be dead. It may have been replaced by a
+/// new node, so always use the returned value in place of N.
+///
+/// @returns The SDValue taking the place of N (which could be N if it is
+/// unchanged)
+SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
+ assert(RootWeights.count(N) && "Cannot balance non-root node.");
+ assert(RootWeights[N] != -2 && "This node was RAUW'd!");
+ assert(!TopLevel || N->getOpcode() == ISD::ADD);
+
+ // Return early if this node was already visited
+ if (RootWeights[N] != -1)
+ return SDValue(N, 0);
+
+ assert(isOpcodeHandled(N));
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Return early if the operands will remain unchanged or are all roots
+ if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
+ (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
+ SDNode *Op0N = Op0.getNode();
+ int Weight;
+ if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
+ Weight = getWeight(balanceSubTree(Op0N).getNode());
+ // Weight = calculateWeight(Op0N);
+ } else
+ Weight = getWeight(Op0N);
+
+ SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
+ if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
+ Weight += getWeight(balanceSubTree(Op1N).getNode());
+ // Weight += calculateWeight(Op1N);
+ } else
+ Weight += getWeight(Op1N);
+
+ RootWeights[N] = Weight;
+ RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
+ getHeight(N->getOperand(1).getNode())) + 1;
+
+ DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
+ << " Height=" << RootHeights[N] << "): ");
+ DEBUG(N->dump());
+
+ return SDValue(N, 0);
+ }
+
+ DEBUG(dbgs() << "** Balancing root node: ");
+ DEBUG(N->dump());
+
+ unsigned NOpcode = N->getOpcode();
+
+ LeafPrioQueue Leaves(NOpcode);
+ SmallVector<SDValue, 4> Worklist;
+ Worklist.push_back(SDValue(N, 0));
+
+ // SHL nodes will be converted to MUL nodes
+ if (NOpcode == ISD::SHL)
+ NOpcode = ISD::MUL;
+
+ bool CanFactorize = false;
+ WeightedLeaf Mul1, Mul2;
+ unsigned MaxPowerOf2 = 0;
+ WeightedLeaf GA;
+
+ // Do not try to factor out a shift if there is already a shift at the tip of
+ // the tree.
+ bool HaveTopLevelShift = false;
+ if (TopLevel &&
+ ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
+ Op0.getConstantOperandVal(1) < 4) ||
+ (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
+ Op1.getConstantOperandVal(1) < 4)))
+ HaveTopLevelShift = true;
+
+ // Flatten the subtree into an ordered list of leaves; at the same time
+ // determine whether the tree is already balanced.
+ int InsertionOrder = 0;
+ SmallDenseMap<SDValue, int> NodeHeights;
+ bool Imbalanced = false;
+ int CurrentWeight = 0;
+ while (!Worklist.empty()) {
+ SDValue Child = Worklist.pop_back_val();
+
+ if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
+ // CASE 1: Child is a root note
+
+ int Weight = RootWeights[Child.getNode()];
+ if (Weight == -1) {
+ Child = balanceSubTree(Child.getNode());
+ // calculateWeight(Child.getNode());
+ Weight = getWeight(Child.getNode());
+ } else if (Weight == -2) {
+ // Whoops, this node was RAUWd by one of the balanceSubTree calls we
+ // made. Our worklist isn't up to date anymore.
+ // Restart the whole process.
+ DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+ return balanceSubTree(N, TopLevel);
+ }
+
+ NodeHeights[Child] = 1;
+ CurrentWeight += Weight;
+
+ unsigned PowerOf2;
+ if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
+ (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
+ Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
+ // Try to identify two factorizable MUL/SHL children greedily. Leave
+ // them out of the priority queue for now so we can deal with them
+ // after.
+ if (!Mul1.Value.getNode()) {
+ Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
+ MaxPowerOf2 = PowerOf2;
+ } else {
+ Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
+ MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
+
+ // Our addressing modes can only shift by a maximum of 3
+ if (MaxPowerOf2 > 3)
+ MaxPowerOf2 = 3;
+
+ CanFactorize = true;
+ }
+ } else
+ Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
+ } else if (!isOpcodeHandled(Child.getNode())) {
+ // CASE 2: Child is an unhandled kind of node (e.g. constant)
+ int Weight = getWeight(Child.getNode());
+
+ NodeHeights[Child] = getHeight(Child.getNode());
+ CurrentWeight += Weight;
+
+ if (isTargetConstant(Child) && !GA.Value.getNode())
+ GA = WeightedLeaf(Child, Weight, InsertionOrder++);
+ else
+ Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
+ } else {
+ // CASE 3: Child is a subtree of same opcode
+ // Visit children first, then flatten.
+ unsigned ChildOpcode = Child.getOpcode();
+ assert(ChildOpcode == NOpcode ||
+ (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
+
+ // Convert SHL to MUL
+ SDValue Op1;
+ if (ChildOpcode == ISD::SHL)
+ Op1 = getMultiplierForSHL(Child.getNode());
+ else
+ Op1 = Child->getOperand(1);
+
+ if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
+ assert(!NodeHeights.count(Child) && "Parent visited before children?");
+ // Visit children first, then re-visit this node
+ Worklist.push_back(Child);
+ Worklist.push_back(Op1);
+ Worklist.push_back(Child->getOperand(0));
+ } else {
+ // Back at this node after visiting the children
+ if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
+ Imbalanced = true;
+
+ NodeHeights[Child] = std::max(NodeHeights[Op1],
+ NodeHeights[Child->getOperand(0)]) + 1;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
+ << " weight=" << CurrentWeight << " imbalanced="
+ << Imbalanced << "\n");
+
+ // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
+ // This factors out a shift in order to match memw(a<<Y+b).
+ if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
+ willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
+ DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
+ int Weight = Mul1.Weight + Mul2.Weight;
+ int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
+ SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
+ SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
+ SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
+ Mul1Factored, Mul2Factored);
+ SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
+ Mul1.Value.getValueType());
+ SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
+ Sum, Const);
+ NodeHeights[New] = Height;
+ Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
+ } else if (Mul1.Value.getNode()) {
+ // We failed to factorize two MULs, so now the Muls are left outside the
+ // queue... add them back.
+ Leaves.push(Mul1);
+ if (Mul2.Value.getNode())
+ Leaves.push(Mul2);
+ CanFactorize = false;
+ }
+
+ // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
+ // and the root node itself is not used more than twice. This reduces the
+ // amount of additional constant extenders introduced by this optimization.
+ bool CombinedGA = false;
+ if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
+ GA.Value.hasOneUse() && N->use_size() < 3) {
+ GlobalAddressSDNode *GANode =
+ cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
+ ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
+
+ if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
+ getTargetLowering()->isOffsetFoldingLegal(GANode)) {
+ DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
+ << "): ");
+ DEBUG(GANode->dump());
+
+ SDValue NewTGA =
+ CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
+ GANode->getValueType(0),
+ GANode->getOffset() + (uint64_t)Offset->getSExtValue());
+ GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
+ GA.Value.getValueType(), NewTGA);
+ GA.Weight += Leaves.top().Weight;
+
+ NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
+ CombinedGA = true;
+
+ Leaves.pop(); // Remove the offset constant from the queue
+ }
+ }
+
+ if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
+ (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
+ RootWeights[N] = CurrentWeight;
+ RootHeights[N] = NodeHeights[SDValue(N, 0)];
+
+ return SDValue(N, 0);
+ }
+
+ // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
+ if (NOpcode == ISD::ADD && GA.Value.getNode()) {
+ WeightedLeaf SHL = Leaves.findSHL(31);
+ if (SHL.Value.getNode()) {
+ int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
+ GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
+ GA.Value.getValueType(),
+ GA.Value, SHL.Value);
+ GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
+ NodeHeights[GA.Value] = Height;
+ }
+ }
+
+ if (GA.Value.getNode())
+ Leaves.push(GA);
+
+ // If this is the top level and we haven't factored out a shift, we should try
+ // to move a constant to the bottom to match addressing modes like memw(rX+C)
+ if (TopLevel && !CanFactorize && Leaves.hasConst()) {
+ DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
+ Leaves.pushToBottom(Leaves.pop());
+ }
+
+ // Rebuild the tree using Huffman's algorithm
+ while (Leaves.size() > 1) {
+ WeightedLeaf L0 = Leaves.pop();
+
+ // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
+ // otherwise just get the next leaf
+ WeightedLeaf L1 = Leaves.findMULbyConst();
+ if (!L1.Value.getNode())
+ L1 = Leaves.pop();
+
+ assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
+
+ SDValue V0 = L0.Value;
+ int V0Weight = L0.Weight;
+ SDValue V1 = L1.Value;
+ int V1Weight = L1.Weight;
+
+ // Make sure that none of these nodes have been RAUW'd
+ if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
+ (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
+ DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+ return balanceSubTree(N, TopLevel);
+ }
+
+ ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
+ ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
+ EVT VT = N->getValueType(0);
+ SDValue NewNode;
+
+ if (V0C && !V1C) {
+ std::swap(V0, V1);
+ std::swap(V0C, V1C);
+ }
+
+ // Calculate height of this node
+ assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
+ "Children must have been visited before re-combining them!");
+ int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
+
+ // Rebuild this node (and restore SHL from MUL if needed)
+ if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
+ NewNode = CurDAG->getNode(
+ ISD::SHL, SDLoc(V0), VT, V0,
+ CurDAG->getConstant(
+ V1C->getAPIntValue().logBase2(), SDLoc(N),
+ getTargetLowering()->getScalarShiftAmountTy(CurDAG->getDataLayout(), V0.getValueType())));
+ else
+ NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
+
+ NodeHeights[NewNode] = Height;
+
+ int Weight = V0Weight + V1Weight;
+ Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
+
+ DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
+ << Height << "):\n");
+ DEBUG(NewNode.dump());
+ }
+
+ assert(Leaves.size() == 1);
+ SDValue NewRoot = Leaves.top().Value;
+
+ assert(NodeHeights.count(NewRoot));
+ int Height = NodeHeights[NewRoot];
+
+ // Restore SHL if we earlier converted it to a MUL
+ if (NewRoot.getOpcode() == ISD::MUL) {
+ ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
+ if (V1C && V1C->getAPIntValue().isPowerOf2()) {
+ EVT VT = NewRoot.getValueType();
+ SDValue V0 = NewRoot.getOperand(0);
+ NewRoot = CurDAG->getNode(
+ ISD::SHL, SDLoc(NewRoot), VT, V0,
+ CurDAG->getConstant(V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
+ getTargetLowering()->getScalarShiftAmountTy(
+ CurDAG->getDataLayout(), V0.getValueType())));
+ }
+ }
+
+ if (N != NewRoot.getNode()) {
+ DEBUG(dbgs() << "--> Root is now: ");
+ DEBUG(NewRoot.dump());
+
+ // Replace all uses of old root by new root
+ CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
+ // Mark that we have RAUW'd N
+ RootWeights[N] = -2;
+ } else {
+ DEBUG(dbgs() << "--> Root unchanged.\n");
+ }
+
+ RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
+ RootHeights[NewRoot.getNode()] = Height;
+
+ return NewRoot;
+}
+
+void HexagonDAGToDAGISel::rebalanceAddressTrees() {
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E;) {
+ SDNode *N = &*I++;
+ if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
+ continue;
+
+ SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
+ if (BasePtr.getOpcode() != ISD::ADD)
+ continue;
+
+ // We've already processed this node
+ if (RootWeights.count(BasePtr.getNode()))
+ continue;
+
+ DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
+ DEBUG(N->dump());
+
+ // FindRoots
+ SmallVector<SDNode *, 4> Worklist;
+
+ Worklist.push_back(BasePtr.getOperand(0).getNode());
+ Worklist.push_back(BasePtr.getOperand(1).getNode());
+
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.pop_back_val();
+ unsigned Opcode = N->getOpcode();
+
+ if (!isOpcodeHandled(N))
+ continue;
+
+ Worklist.push_back(N->getOperand(0).getNode());
+ Worklist.push_back(N->getOperand(1).getNode());
+
+ // Not a root if it has only one use and same opcode as its parent
+ if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
+ continue;
+
+ // This root node has already been processed
+ if (RootWeights.count(N))
+ continue;
+
+ RootWeights[N] = -1;
+ }
+
+ // Balance node itself
+ RootWeights[BasePtr.getNode()] = -1;
+ SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
+
+ if (N->getOpcode() == ISD::LOAD)
+ N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+ NewBasePtr, N->getOperand(2));
+ else
+ N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ NewBasePtr, N->getOperand(3));
+
+ DEBUG(dbgs() << "--> Final node: ");
+ DEBUG(N->dump());
+ }
+
+ CurDAG->RemoveDeadNodes();
+ GAUsesInFunction.clear();
+ RootHeights.clear();
+ RootWeights.clear();
+}
+
+
Added: llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll?rev=277151&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/addr-calc-opt.ll Fri Jul 29 10:15:35 2016
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Test whether we can produce minimal code for this complex address
+; calculation.
+;
+
+; CHECK: r0 = memub(r{{[0-9]+}}<<#3{{ *}}+{{ *}}##the_global+516)
+
+%0 = type { [3 x %1] }
+%1 = type { %2, i8, i8, i8, i8, i8, [4 x i8], i8, [10 x i8], [10 x i8], [10 x i8], i8, [3 x %4], i16, i16, i16, i16, i32, i8, [4 x i8], i8, i8, i8, i8, %5, i8, i8, i8, i8, i8, i16, i8, i8, i8, i16, i16, i8, i8, [2 x i8], [2 x i8], i8, i8, i8, i8, i8, i16, i16, i8, i8, i8, i8, i8, i8, %9, i8, [6 x [2 x i8]], i16, i32, %10, [28 x i8], [4 x %17] }
+%2 = type { %3 }
+%3 = type { i8, i8, i8, i8, i8, i16, i16, i16, i16, i16 }
+%4 = type { i16, i16 }
+%5 = type { [10 x %6] }
+%6 = type { [2 x %7] }
+%7 = type { i8, [2 x %8] }
+%8 = type { [4 x i8] }
+%9 = type { i8 }
+%10 = type { %11, %13 }
+%11 = type { [2 x [2 x i8]], [2 x [8 x %12]], [6 x i16], [6 x i16] }
+%12 = type { i8, i8 }
+%13 = type { [4 x %12], [4 x %12], [2 x [4 x %14]], [6 x i16] }
+%14 = type { %15, %16 }
+%15 = type { i8, i8 }
+%16 = type { i8, i8 }
+%17 = type { i8, i8, %1*, i16, i16, i16, i64, i32, i32, %18, i8, %21, i8, [2 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i16, i16, [2 x i16], i16, [2 x i32], [2 x i16], [2 x i16], i8, i8, [6 x %23], i8, i8, i8, %24, %25, %26, %28 }
+%18 = type { %19, [10 x %20] }
+%19 = type { i32 }
+%20 = type { [2 x i8], [2 x i8], i8, i8, i8, i8 }
+%21 = type { i8, i8, i8, [8 x %22] }
+%22 = type { i8, i8, i8, i32 }
+%23 = type { i32, i16, i16, [2 x i16], [2 x i16], [2 x i16], i32 }
+%24 = type { [2 x i32], [2 x i64*], [2 x i64*], [2 x i64*], [2 x i32], [2 x i32], i32 }
+%25 = type { [2 x i32], [2 x i32], [2 x i32] }
+%26 = type { i8, i8, i8, i16, i16, %27, i32, i32, i32, i16 }
+%27 = type { i64 }
+%28 = type { %29, %31, [24 x i8] }
+%29 = type { [2 x %30], [16 x i32] }
+%30 = type { [16 x i32], [8 x i32], [16 x i32], [64 x i32], [2 x i32], i64, i32, i32, i32, i32 }
+%31 = type { [2 x %32] }
+%32 = type { [4 x %33], i32 }
+%33 = type { i32, i32 }
+
+ at the_global = external global %0
+
+; Function Attrs: nounwind optsize readonly ssp
+define zeroext i8 @myFun(i8 zeroext, i8 zeroext) {
+ %3 = zext i8 %1 to i32
+ %4 = zext i8 %0 to i32
+ %5 = getelementptr inbounds %0, %0* @the_global, i32 0, i32 0, i32 %4, i32 60, i32 0, i32 9, i32 1, i32 %3, i32 0, i32 0
+ %6 = load i8, i8* %5, align 4
+ ret i8 %6
+}
+
More information about the llvm-commits
mailing list