Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h (revision 149437) +++ include/llvm/Target/TargetInstrInfo.h (working copy) @@ -811,6 +811,12 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {} + /// regHasSizeImpact - return the number of extra bytes if any operand of MI + /// is assigned a physical register that has a costPerUse. + virtual unsigned regHasSizeImpact(MachineInstr *MI) const { + return 0; + } + private: int CallFrameSetupOpcode, CallFrameDestroyOpcode; }; Index: include/llvm/CodeGen/LiveInterval.h =================================================================== --- include/llvm/CodeGen/LiveInterval.h (revision 149437) +++ include/llvm/CodeGen/LiveInterval.h (working copy) @@ -208,6 +208,8 @@ const unsigned reg; // the register or stack slot of this interval. float weight; // weight of this interval + unsigned bytes; // number of extra bytes if reg is assigned + // a physical register that has a costPeruse. Ranges ranges; // the ranges in which this register is live VNInfoList valnos; // value#'s Index: lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.cpp (revision 149437) +++ lib/Target/ARM/ARMBaseInstrInfo.cpp (working copy) @@ -3000,3 +3000,90 @@ // This will go before any implicit ops. AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } + +unsigned ARMBaseInstrInfo::regHasSizeImpact(MachineInstr *MI) const { + unsigned DefOpc = MI->getOpcode(); + + switch(DefOpc) { + default: + return 0; + case ARM::t2MOVi: + return (MI->getOperand(1).getImm() >= 0 && + MI->getOperand(1).getImm() <= 255) ? 2 : 0; + // Negation + case ARM::t2RSBri: + return (MI->getOperand(2).getImm() == 0) ? 2 : 0; + case ARM::t2CMPri: + // If comparing to 0, than cmp can potentially be propagated + // into branch instructions that consumes its result. + if (MI->getOperand(1).getImm() == 0) + return 6; + return (MI->getOperand(1).getImm() >= 0 && + MI->getOperand(1).getImm() <= 255) ? 2 : 0; + case ARM::t2ADDri: + case ARM::t2SUBri: + // TODO: Can be more precise by excluding local symbols + // because their base address is SP. + if (!MI->getOperand(1).isReg() || MI->getOperand(1).isKill()) + return (MI->getOperand(2).getImm() >= 0 && + MI->getOperand(2).getImm() <= 255) ? 2 : 0; + return (MI->getOperand(2).getImm() >= 0 && + MI->getOperand(2).getImm() <= 7) ? 2 : 0; + case ARM::t2LSLri: + case ARM::t2LSRri: + case ARM::t2ASRri: + return 2; + // Commutative binary intructions. + case ARM::t2EORrr: + case ARM::t2ANDrr: + case ARM::t2ADCrr: + case ARM::t2ORRrr: + case ARM::t2MUL: + return MI->getOperand(1).isKill() || MI->getOperand(2).isKill() ? 2 : 0; + // When both operand are not kill, register picking matters for Add instruction. + case ARM::t2ADDrr: + return !MI->getOperand(1).isKill() && !MI->getOperand(2).isKill() ? 2 : 0; + + // Non commutative binary instructions. + case ARM::t2BICrr: + case ARM::t2SBCrr: + case ARM::t2RORrr: + case ARM::t2ASRrr: + case ARM::t2LSRrr: + case ARM::t2LSLrr: + return MI->getOperand(1).isKill() ? 2 : 0; + + // Unary operations + case ARM::t2SXTH: + case ARM::t2UXTH: + case ARM::t2SXTB: + case ARM::t2UXTB: + case ARM::t2MVNr: + case ARM::t2TSTrr: + case ARM::t2CMNzrr: + case ARM::t2SUBrr: + return 2; + + // Load and store using a positive offsets + case ARM::t2LDRBi12: + case ARM::t2STRBi12: + return (MI->getOperand(2).getImm() <=31) ? 2 : 0; + case ARM::t2LDRHi12: + case ARM::t2STRHi12: + return ((MI->getOperand(2).getImm() & 1) == 0 && + MI->getOperand(2).getImm() <= 62) ? 2 :0; + case ARM::t2LDRi12: + case ARM::t2STRi12: + return ((MI->getOperand(2).getImm() & 3) == 0 && + MI->getOperand(2).getImm() <= 124) ? 2 :0; + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRs: + return (MI->getOperand(3).getImm()) ? 0 : 2; + case ARM::t2STRBs: + case ARM::t2STRs: + case ARM::t2STRHs: + return (MI->getOperand(4).getImm()) ? 0 : 2; + } + return 0; +} Index: lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.h (revision 149437) +++ lib/Target/ARM/ARMBaseInstrInfo.h (working copy) @@ -219,6 +219,11 @@ getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + /// regHasSizeImpact - instruction that are emitted in ".w" form if + /// one of its operands uses high GPR reg (r8-r15). + virtual + unsigned regHasSizeImpact(MachineInstr *MI) const; + private: unsigned getInstBundleLength(const MachineInstr *MI) const; Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp (revision 149437) +++ lib/CodeGen/RegAllocGreedy.cpp (working copy) @@ -67,6 +67,11 @@ clEnumValEnd), cl::init(SplitEditor::SM_Partition)); +static cl::opt +DisableFavorSizeReg("disable-favor-r0-7", cl::Hidden, + cl::desc("Disable favoring R0-7 registers."), + cl::init(false)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -164,13 +169,24 @@ /// Cost of evicting interference. struct EvictionCost { unsigned BrokenHints; ///< Total number of broken hints. + unsigned Bytes; ///< Total Extra bytes float MaxWeight; ///< Maximum spill weight evicted. - EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + EvictionCost(unsigned B = 0, unsigned N = 0) : BrokenHints(B), Bytes(N), MaxWeight(0) {} bool operator<(const EvictionCost &O) const { - if (BrokenHints != O.BrokenHints) - return BrokenHints < O.BrokenHints; + if (BrokenHints != O.BrokenHints) { + if (BrokenHints == ~0u) + return false; + else + // Use a heuristic to balance between favoring of register coalescing + // and favoring for size registers. Each brocken hint is given a weight + // of 10 bytes. + return BrokenHints < O.BrokenHints || + ((Bytes+10*BrokenHints) < (O.Bytes+O.BrokenHints)); + } + if (Bytes != O.Bytes) + return Bytes < O.Bytes; return MaxWeight < O.MaxWeight; } }; @@ -268,14 +284,14 @@ void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); - bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&, bool = false); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl&); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl&, unsigned = ~0u); + SmallVectorImpl&, unsigned = ~0u, bool = false); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, @@ -470,7 +486,7 @@ } } - // Try to evict interference from a cheaper alternative. + // Try to trade this register with interference from a cheaper "size" alternative. unsigned Cost = TRI->getCostPerUse(PhysReg); // Most registers have 0 additional cost. @@ -479,7 +495,13 @@ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost << '\n'); - unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, !DisableFavorSizeReg); + +#ifndef NDEBUG + if (!CheapReg) + DEBUG(dbgs () << "Missed Eviction for size: " << PrintReg(PhysReg, TRI) << "bytes " + << VirtReg.bytes << '\n'); +#endif return CheapReg ? CheapReg : PhysReg; } @@ -523,7 +545,8 @@ /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - bool IsHint, EvictionCost &MaxCost) { + bool IsHint, EvictionCost &MaxCost, + bool FavorSize) { // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -567,12 +590,13 @@ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); // Update eviction cost. Cost.BrokenHints += BreaksHint; + Cost.Bytes += FavorSize ? Intf->bytes : 0; Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; // Finally, apply the eviction policy for non-urgent evictions. - if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + if (!FavorSize && !Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; } } @@ -617,7 +641,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs, - unsigned CostPerUseLimit) { + unsigned CostPerUseLimit, bool FavorSize) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); // Keep track of the cheapest interference seen so far. @@ -628,6 +652,7 @@ // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; + BestCost.Bytes = FavorSize ? VirtReg.bytes : 0; BestCost.MaxWeight = VirtReg.weight; } @@ -645,7 +670,7 @@ continue; } - if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost, FavorSize)) continue; // Best so far. @@ -1572,8 +1597,22 @@ // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) - return PhysReg; + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) { + if (VirtReg.bytes == 0 || DisableFavorSizeReg) + return PhysReg; + + // Try to trade this register with interference from a cheaper "size" alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); + if (!Cost) + return PhysReg; + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, !DisableFavorSizeReg); +#ifndef NDEBUG + if (!CheapReg) + DEBUG(dbgs () << "Missed Late Eviction: " << PrintReg(PhysReg, TRI) + << "bytes " << VirtReg.bytes << '\n'); +#endif + return CheapReg ? CheapReg : PhysReg; + } assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp (revision 149437) +++ lib/CodeGen/CalcSpillWeights.cpp (working copy) @@ -89,6 +89,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; unsigned loopDepth = 0; @@ -106,6 +107,10 @@ // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + // Number of extra bytes if li.reg is assigned a physical + // register that has a costperuse. + unsigned bytes = 0; + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); MachineInstr *mi = I.skipInstruction();) { if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) @@ -135,6 +140,11 @@ totalWeight += weight; } + // get extra bytes if li.reg is assigned a physreg that has a costPerUse. + if (!mi->isCopy()) { + bytes += TII.regHasSizeImpact(mi); + } + // Get allocation hints from copies. if (noHint || !mi->isCopy()) continue; @@ -153,6 +163,9 @@ Hint.clear(); + // Save extra bytes. + li.bytes = bytes; + // Always prefer the physreg hint. if (unsigned hint = hintPhys ? hintPhys : hintVirt) { mri.setRegAllocationHint(li.reg, 0, hint);