[llvm-commits] [llvm] r99848 - in /llvm/trunk/lib/Target/X86: SSEDomainFix.cpp X86InstrInfo.cpp X86InstrInfo.h
Jakob Stoklund Olesen
stoklund at 2pi.dk
Mon Mar 29 16:24:21 PDT 2010
Author: stoklund
Date: Mon Mar 29 18:24:21 2010
New Revision: 99848
URL: http://llvm.org/viewvc/llvm-project?rev=99848&view=rev
Log:
Basic implementation of SSEDomainFix pass.
Cross-block inference is primitive and wrong, but the pass is working otherwise.
Modified:
llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.h
Modified: llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/SSEDomainFix.cpp?rev=99848&r1=99847&r2=99848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original)
+++ llvm/trunk/lib/Target/X86/SSEDomainFix.cpp Mon Mar 29 18:24:21 2010
@@ -29,12 +29,108 @@
using namespace llvm;
namespace {
+
+/// Allocate objects from a pool, allow objects to be recycled, and provide a
+/// way of deleting everything.
+template<typename T, unsigned PageSize = 64>
+class PoolAllocator {
+ std::vector<T*> Pages, Avail;
+public:
+ ~PoolAllocator() { Clear(); }
+
+ T* Alloc() {
+ if (Avail.empty()) {
+ T *p = new T[PageSize];
+ Pages.push_back(p);
+ Avail.reserve(PageSize);
+ for (unsigned n = 0; n != PageSize; ++n)
+ Avail.push_back(p+n);
+ }
+ T *p = Avail.back();
+ Avail.pop_back();
+ return p;
+ }
+
+ // Allow object to be reallocated. It won't be reconstructed.
+ void Recycle(T *p) {
+ p->clear();
+ Avail.push_back(p);
+ }
+
+ // Destroy all objects, make sure there are no external pointers to them.
+ void Clear() {
+ Avail.clear();
+ while (!Pages.empty()) {
+ delete[] Pages.back();
+ Pages.pop_back();
+ }
+ }
+};
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Available domains. For an open DomainValue, it is the still possible
+ // domains for collapsing. For a collapsed DomainValue it is the domains where
+ // the register is available for free.
+ unsigned Mask;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // Collapsed DomainValue have no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool collapsed() const { return Instrs.empty(); }
+
+ // Is any domain in mask available?
+ bool compat(unsigned mask) const {
+ return Mask & mask;
+ }
+
+ // Mark domain as available
+ void add(unsigned domain) {
+ Mask |= 1u << domain;
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = Mask = Dist = 0;
+ Instrs.clear();
+ }
+};
+
class SSEDomainFixPass : public MachineFunctionPass {
static char ID;
- const X86InstrInfo *TII;
+ PoolAllocator<DomainValue> Pool;
MachineFunction *MF;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
MachineBasicBlock *MBB;
+ bool hasLiveRegs;
+ DomainValue *LiveRegs[16];
+
public:
SSEDomainFixPass() : MachineFunctionPass(&ID) {}
@@ -50,47 +146,288 @@
}
private:
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
void enterBasicBlock(MachineBasicBlock *MBB);
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+
};
}
char SSEDomainFixPass::ID = 0;
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+ // Registers are sorted lexicographically.
+ // We just need them to be consecutive, ordering doesn't matter.
+ assert(X86::XMM9 == X86::XMM0+15 && "Unexpected sort");
+ reg -= X86::XMM0;
+ return reg < 16 ? reg : -1;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+ if (!LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
+ Collapse(LiveRegs[rx], CountTrailingZeros_32(LiveRegs[rx]->Mask));
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+ hasLiveRegs = true;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ if (dv->collapsed())
+ dv->add(domain);
+ else
+ Collapse(dv, domain);
+ } else {
+ // Set up basic collapsed DomainValue
+ DomainValue *dv = Pool.Alloc();
+ dv->add(domain);
+ SetLiveReg(rx, dv);
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->compat(1u << domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty()) {
+ MachineInstr *mi = dv->Instrs.back();
+ TII->SetSSEDomain(mi, domain);
+ dv->Instrs.pop_back();
+ }
+ dv->Mask = 1u << domain;
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (dv->Refs > 1) {
+ for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
+ if (LiveRegs[rx] == dv) {
+ DomainValue *dv2 = Pool.Alloc();
+ dv2->add(domain);
+ SetLiveReg(rx, dv2);
+ }
+ Pool.Recycle(dv);
+ }
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->collapsed() && "Cannot merge into collapsed");
+ assert(!B->collapsed() && "Cannot merge from collapsed");
+ if (!A->compat(B->Mask))
+ return false;
+ A->Mask &= B->Mask;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
MBB = mbb;
- DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Scan the explicit use operands for incoming domains.
+ unsigned collmask = mask;
+ SmallVector<int, 4> used;
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Is it possible to use this collapsed register for free?
+ if (dv->collapsed()) {
+ if (unsigned m = collmask & dv->Mask)
+ collmask = m;
+ } else if (dv->compat(collmask))
+ used.push_back(rx);
+ else
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(collmask)) {
+ unsigned domain = CountTrailingZeros_32(collmask);
+ TII->SetSSEDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match collmask, and build a list of
+ // incoming DomainValue that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above
+ if (!dv->compat(collmask)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv)
+ dv = doms.back();
+ else if (!Merge(dv, doms.back()))
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
+ if (LiveRegs[*i] == doms.back())
+ Kill(*i);
+ doms.pop_back();
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Pool.Alloc();
+ dv->Mask = collmask;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any XMM registers redefined
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
}
bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+
+ hasLiveRegs = false;
+ for (unsigned i=0, e = array_lengthof(LiveRegs); i != e; ++i)
+ LiveRegs[i] = 0;
// If no XMM registers are used in the function, we can skip it completely.
- bool XMMIsUsed = false;
+ bool anyregs = false;
for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
E = X86::VR128RegClass.end(); I != E; ++I)
if (MF->getRegInfo().isPhysRegUsed(*I)) {
- XMMIsUsed = true;
+ anyregs = true;
break;
}
- if (!XMMIsUsed) return false;
+ if (!anyregs) return false;
MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*, 16> Visited;
- for (df_ext_iterator<MachineBasicBlock*,
- SmallPtrSet<MachineBasicBlock*, 16> >
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
- DFI != DFE; ++DFI) {
+ DFI != DFE; ++DFI) {
enterBasicBlock(*DFI);
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I) {
- MachineInstr *MI = I;
- const unsigned *equiv = 0;
- X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
- (void) domain;
- DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : " ") << *MI);
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (hasLiveRegs)
+ visitGenericInstr(mi);
}
}
+
+ Pool.Clear();
+
return false;
}
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=99848&r1=99847&r2=99848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Mar 29 18:24:21 2010
@@ -3659,45 +3659,49 @@
return GlobalBaseReg;
}
-X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
- const unsigned *&equiv) const {
- // These are the replaceable SSE instructions. Some of these have Int variants
- // that we don't include here. We don't want to replace instructions selected
- // by intrinsics.
- static const unsigned ReplaceableInstrs[][3] = {
- //PackedInt PackedSingle PackedDouble
- { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
- { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
- { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
- { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
- { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
- { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
- { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
- { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
- { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
- { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
- { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
- { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
- { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
- { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
- { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
- { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
- { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
- { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
- };
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
+ { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
+ { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
+ { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
+ { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
+ { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
+ { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
+ { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
+ { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
+ { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
+ { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
+ { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
+ { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
+ { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
+};
- const SSEDomain domain =
- SSEDomain((MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3);
- if (domain == NotSSEDomain)
- return domain;
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
- // Linear search FTW!
- const unsigned opc = MI->getOpcode();
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
- if (ReplaceableInstrs[i][domain-1] == opc) {
- equiv = ReplaceableInstrs[i];
- return domain;
- }
- equiv = 0;
- return domain;
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain, domain != NotSSEDomain &&
+ lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
}
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=99848&r1=99847&r2=99848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Mon Mar 29 18:24:21 2010
@@ -722,11 +722,12 @@
/// Some SSE instructions come in variants for three domains.
enum SSEDomain { NotSSEDomain, PackedInt, PackedSingle, PackedDouble };
- /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for
- /// unknown instructions. If the instruction has equivalents for other
- /// domains, equiv points to a list of opcodes for [PackedInt, PackedSingle,
- /// PackedDouble].
- SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const;
+ /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+ /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+ std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+ /// SetSSEDomain - Set the SSEDomain of MI.
+ void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
More information about the llvm-commits
mailing list