[llvm-commits] [llvm] r99848 - in /llvm/trunk/lib/Target/X86: SSEDomainFix.cpp X86InstrInfo.cpp X86InstrInfo.h

Jakob Stoklund Olesen stoklund at 2pi.dk
Mon Mar 29 16:24:21 PDT 2010


Author: stoklund
Date: Mon Mar 29 18:24:21 2010
New Revision: 99848

URL: http://llvm.org/viewvc/llvm-project?rev=99848&view=rev
Log:
Basic implementation of SSEDomainFix pass.

Cross-block inference is primitive and wrong, but the pass is working otherwise.

Modified:
    llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.h

Modified: llvm/trunk/lib/Target/X86/SSEDomainFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/SSEDomainFix.cpp?rev=99848&r1=99847&r2=99848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/SSEDomainFix.cpp (original)
+++ llvm/trunk/lib/Target/X86/SSEDomainFix.cpp Mon Mar 29 18:24:21 2010
@@ -29,12 +29,108 @@
 using namespace llvm;
 
 namespace {
+
+/// Allocate objects from a pool, allow objects to be recycled, and provide a
+/// way of deleting everything.
+template<typename T, unsigned PageSize = 64>
+class PoolAllocator {
+  std::vector<T*> Pages, Avail;
+public:
+  ~PoolAllocator() { Clear(); }
+
+  T* Alloc() {
+    if (Avail.empty()) {
+      T *p = new T[PageSize];
+      Pages.push_back(p);
+      Avail.reserve(PageSize);
+      for (unsigned n = 0; n != PageSize; ++n)
+        Avail.push_back(p+n);
+    }
+    T *p = Avail.back();
+    Avail.pop_back();
+    return p;
+  }
+
+  // Allow object to be reallocated. It won't be reconstructed.
+  void Recycle(T *p) {
+    p->clear();
+    Avail.push_back(p);
+  }
+
+  // Destroy all objects, make sure there are no external pointers to them.
+  void Clear() {
+    Avail.clear();
+    while (!Pages.empty()) {
+      delete[] Pages.back();
+      Pages.pop_back();
+    }
+  }
+};
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+struct DomainValue {
+  // Basic reference counting.
+  unsigned Refs;
+
+  // Available domains. For an open DomainValue, it is the still possible
+  // domains for collapsing. For a collapsed DomainValue it is the domains where
+  // the register is available for free.
+  unsigned Mask;
+
+  // Position of the last defining instruction.
+  unsigned Dist;
+
+  // Twiddleable instructions using or defining these registers.
+  SmallVector<MachineInstr*, 8> Instrs;
+
+  // Collapsed DomainValue have no instructions to twiddle - it simply keeps
+  // track of the domains where the registers are already available.
+  bool collapsed() const { return Instrs.empty(); }
+
+  // Is any domain in mask available?
+  bool compat(unsigned mask) const {
+    return Mask & mask;
+  }
+
+  // Mark domain as available
+  void add(unsigned domain) {
+    Mask |= 1u << domain;
+  }
+
+  DomainValue() { clear(); }
+
+  void clear() {
+    Refs = Mask = Dist = 0;
+    Instrs.clear();
+  }
+};
+
 class SSEDomainFixPass : public MachineFunctionPass {
   static char ID;
-  const X86InstrInfo *TII;
+  PoolAllocator<DomainValue> Pool;
 
   MachineFunction *MF;
+  const X86InstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
   MachineBasicBlock *MBB;
+  bool hasLiveRegs;
+  DomainValue *LiveRegs[16];
+
 public:
   SSEDomainFixPass() : MachineFunctionPass(&ID) {}
 
@@ -50,47 +146,288 @@
   }
 
 private:
+  // Register mapping.
+  int RegIndex(unsigned Reg);
+
+  // LiveRegs manipulations.
+  void SetLiveReg(int rx, DomainValue *DV);
+  void Kill(int rx);
+  void Force(int rx, unsigned domain);
+  void Collapse(DomainValue *dv, unsigned domain);
+  bool Merge(DomainValue *A, DomainValue *B);
+
   void enterBasicBlock(MachineBasicBlock *MBB);
+  void visitGenericInstr(MachineInstr*);
+  void visitSoftInstr(MachineInstr*, unsigned mask);
+  void visitHardInstr(MachineInstr*, unsigned domain);
+
 };
 }
 
 char SSEDomainFixPass::ID = 0;
 
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+  // Registers are sorted lexicographically.
+  // We just need them to be consecutive, ordering doesn't matter.
+  assert(X86::XMM9 == X86::XMM0+15 && "Unexpected sort");
+  reg -= X86::XMM0;
+  return reg < 16 ? reg : -1;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+  if (LiveRegs[rx] == dv)
+    return;
+  if (LiveRegs[rx]) {
+    assert(LiveRegs[rx]->Refs && "Bad refcount");
+    if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
+  }
+  LiveRegs[rx] = dv;
+  if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+  if (!LiveRegs[rx]) return;
+
+  // Before killing the last reference to an open DomainValue, collapse it to
+  // the first available domain.
+  if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
+    Collapse(LiveRegs[rx], CountTrailingZeros_32(LiveRegs[rx]->Mask));
+  else
+    SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+  hasLiveRegs = true;
+  if (DomainValue *dv = LiveRegs[rx]) {
+    if (dv->collapsed())
+      dv->add(domain);
+    else
+      Collapse(dv, domain);
+  } else {
+    // Set up basic collapsed DomainValue
+    DomainValue *dv = Pool.Alloc();
+    dv->add(domain);
+    SetLiveReg(rx, dv);
+  }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+  assert(dv->compat(1u << domain) && "Cannot collapse");
+
+  // Collapse all the instructions.
+  while (!dv->Instrs.empty()) {
+    MachineInstr *mi = dv->Instrs.back();
+    TII->SetSSEDomain(mi, domain);
+    dv->Instrs.pop_back();
+  }
+  dv->Mask = 1u << domain;
+
+  // If there are multiple users, give them new, unique DomainValues.
+  if (dv->Refs > 1) {
+    for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
+      if (LiveRegs[rx] == dv) {
+        DomainValue *dv2 = Pool.Alloc();
+        dv2->add(domain);
+        SetLiveReg(rx, dv2);
+      }
+    Pool.Recycle(dv);
+  }
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+  assert(!A->collapsed() && "Cannot merge into collapsed");
+  assert(!B->collapsed() && "Cannot merge from collapsed");
+  if (!A->compat(B->Mask))
+    return false;
+  A->Mask &= B->Mask;
+  A->Dist = std::max(A->Dist, B->Dist);
+  A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+  for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
+    if (LiveRegs[rx] == B)
+      SetLiveReg(rx, A);
+  return true;
+}
+
 void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
   MBB = mbb;
-  DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+  // Collapse all uses.
+  for (unsigned i = mi->getDesc().getNumDefs(),
+                e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Force(rx, domain);
+  }
+
+  // Kill all defs and force them.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+    Force(rx, domain);
+  }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+  // Scan the explicit use operands for incoming domains.
+  unsigned collmask = mask;
+  SmallVector<int, 4> used;
+  for (unsigned i = mi->getDesc().getNumDefs(),
+                e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    if (DomainValue *dv = LiveRegs[rx]) {
+      // Is it possible to use this collapsed register for free?
+      if (dv->collapsed()) {
+        if (unsigned m = collmask & dv->Mask)
+          collmask = m;
+      } else if (dv->compat(collmask))
+        used.push_back(rx);
+      else
+        Kill(rx);
+    }
+  }
+
+  // If the collapsed operands force a single domain, propagate the collapse.
+  if (isPowerOf2_32(collmask)) {
+    unsigned domain = CountTrailingZeros_32(collmask);
+    TII->SetSSEDomain(mi, domain);
+    visitHardInstr(mi, domain);
+    return;
+  }
+
+  // Kill off any remaining uses that don't match collmask, and build a list of
+  // incoming DomainValue that we want to merge.
+  SmallVector<DomainValue*,4> doms;
+  for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+    int rx = *i;
+    DomainValue *dv = LiveRegs[rx];
+    // This useless DomainValue could have been missed above
+    if (!dv->compat(collmask)) {
+      Kill(*i);
+      continue;
+    }
+    // sorted, uniqued insert.
+    bool inserted = false;
+    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+           i != e && !inserted; ++i) {
+      if (dv == *i)
+        inserted = true;
+      else if (dv->Dist < (*i)->Dist) {
+        inserted = true;
+        doms.insert(i, dv);
+      }
+    }
+    if (!inserted)
+      doms.push_back(dv);
+  }
+
+  //  doms are now sorted in order of appearance. Try to merge them all, giving
+  //  priority to the latest ones.
+  DomainValue *dv = 0;
+  while (!doms.empty()) {
+    if (!dv)
+      dv = doms.back();
+    else if (!Merge(dv, doms.back()))
+      for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
+        if (LiveRegs[*i] == doms.back())
+          Kill(*i);
+    doms.pop_back();
+  }
+
+  // dv is the DomainValue we are going to use for this instruction.
+  if (!dv)
+    dv = Pool.Alloc();
+  dv->Mask = collmask;
+  dv->Instrs.push_back(mi);
+
+  // Finally set all defs and non-collapsed uses to dv.
+  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+      Kill(rx);
+      SetLiveReg(rx, dv);
+    }
+  }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+  // Process explicit defs, kill any XMM registers redefined
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+  }
 }
 
 bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+  TRI = MF->getTarget().getRegisterInfo();
+  MBB = 0;
+
+  hasLiveRegs = false;
+  for (unsigned i=0, e = array_lengthof(LiveRegs); i != e; ++i)
+    LiveRegs[i] = 0;
 
   // If no XMM registers are used in the function, we can skip it completely.
-  bool XMMIsUsed = false;
+  bool anyregs = false;
   for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
          E = X86::VR128RegClass.end(); I != E; ++I)
     if (MF->getRegInfo().isPhysRegUsed(*I)) {
-      XMMIsUsed = true;
+      anyregs = true;
       break;
     }
-  if (!XMMIsUsed) return false;
+  if (!anyregs) return false;
 
   MachineBasicBlock *Entry = MF->begin();
   SmallPtrSet<MachineBasicBlock*, 16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*,
-         SmallPtrSet<MachineBasicBlock*, 16> >
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
          DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
-       DFI != DFE; ++DFI) {
+         DFI != DFE; ++DFI) {
     enterBasicBlock(*DFI);
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
         ++I) {
-      MachineInstr *MI = I;
-      const unsigned *equiv = 0;
-      X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
-      (void) domain;
-      DEBUG(dbgs() << "-isd"[domain] << (equiv ? "* " : "  ") << *MI);
+      MachineInstr *mi = I;
+      if (mi->isDebugValue()) continue;
+      std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+      if (domp.first)
+        if (domp.second)
+          visitSoftInstr(mi, domp.second);
+        else
+          visitHardInstr(mi, domp.first);
+      else if (hasLiveRegs)
+        visitGenericInstr(mi);
     }
   }
+
+  Pool.Clear();
+
   return false;
 }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=99848&r1=99847&r2=99848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Mar 29 18:24:21 2010
@@ -3659,45 +3659,49 @@
   return GlobalBaseReg;
 }
 
-X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
-                                                 const unsigned *&equiv) const {
-  // These are the replaceable SSE instructions. Some of these have Int variants
-  // that we don't include here. We don't want to replace instructions selected
-  // by intrinsics.
-  static const unsigned ReplaceableInstrs[][3] = {
-    //PackedInt          PackedSingle     PackedDouble
-    { X86::MOVDQAmr,     X86::MOVAPSmr,   X86::MOVAPDmr   },
-    { X86::MOVDQArm,     X86::MOVAPSrm,   X86::MOVAPDrm   },
-    { X86::MOVDQArr,     X86::MOVAPSrr,   X86::MOVAPDrr   },
-    { X86::MOVDQUmr,     X86::MOVUPSmr,   X86::MOVUPDmr   },
-    { X86::MOVDQUrm,     X86::MOVUPSrm,   X86::MOVUPDrm   },
-    { X86::MOVNTDQmr,    X86::MOVNTPSmr,  X86::MOVNTPDmr  },
-    { X86::PANDNrm,      X86::ANDNPSrm,   X86::ANDNPDrm   },
-    { X86::PANDNrr,      X86::ANDNPSrr,   X86::ANDNPDrr   },
-    { X86::PANDrm,       X86::ANDPSrm,    X86::ANDPDrm    },
-    { X86::PANDrr,       X86::ANDPSrr,    X86::ANDPDrr    },
-    { X86::PORrm,        X86::ORPSrm,     X86::ORPDrm     },
-    { X86::PORrr,        X86::ORPSrr,     X86::ORPDrr     },
-    { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
-    { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
-    { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
-    { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
-    { X86::PXORrm,       X86::XORPSrm,    X86::XORPDrm    },
-    { X86::PXORrr,       X86::XORPSrr,    X86::XORPDrr    },
-  };
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+  //PackedInt       PackedSingle     PackedDouble
+  { X86::MOVDQAmr,  X86::MOVAPSmr,   X86::MOVAPDmr  },
+  { X86::MOVDQArm,  X86::MOVAPSrm,   X86::MOVAPDrm  },
+  { X86::MOVDQArr,  X86::MOVAPSrr,   X86::MOVAPDrr  },
+  { X86::MOVDQUmr,  X86::MOVUPSmr,   X86::MOVUPDmr  },
+  { X86::MOVDQUrm,  X86::MOVUPSrm,   X86::MOVUPDrm  },
+  { X86::MOVNTDQmr, X86::MOVNTPSmr,  X86::MOVNTPDmr },
+  { X86::PANDNrm,   X86::ANDNPSrm,   X86::ANDNPDrm  },
+  { X86::PANDNrr,   X86::ANDNPSrr,   X86::ANDNPDrr  },
+  { X86::PANDrm,    X86::ANDPSrm,    X86::ANDPDrm   },
+  { X86::PANDrr,    X86::ANDPSrr,    X86::ANDPDrr   },
+  { X86::PORrm,     X86::ORPSrm,     X86::ORPDrm    },
+  { X86::PORrr,     X86::ORPSrr,     X86::ORPDrr    },
+  { X86::PXORrm,    X86::XORPSrm,    X86::XORPDrm   },
+  { X86::PXORrr,    X86::XORPSrr,    X86::XORPDrr   },
+};
 
-  const SSEDomain domain =
-    SSEDomain((MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3);
-  if (domain == NotSSEDomain)
-    return domain;
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
 
-  // Linear search FTW!
-  const unsigned opc = MI->getOpcode();
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
   for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
-    if (ReplaceableInstrs[i][domain-1] == opc) {
-      equiv = ReplaceableInstrs[i];
-      return domain;
-    }
-  equiv = 0;
-  return domain;
+    if (ReplaceableInstrs[i][domain-1] == opcode)
+      return ReplaceableInstrs[i];
+  return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+  uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  return std::make_pair(domain, domain != NotSSEDomain &&
+                                lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+  assert(Domain>0 && Domain<4 && "Invalid execution domain");
+  uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  assert(dom && "Not an SSE instruction");
+  const unsigned *table = lookup(MI->getOpcode(), dom);
+  assert(table && "Cannot change domain");
+  MI->setDesc(get(table[Domain-1]));
 }

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=99848&r1=99847&r2=99848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Mon Mar 29 18:24:21 2010
@@ -722,11 +722,12 @@
   /// Some SSE instructions come in variants for three domains.
   enum SSEDomain { NotSSEDomain, PackedInt, PackedSingle, PackedDouble };
 
-  /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for
-  /// unknown instructions. If the instruction has equivalents for other
-  /// domains, equiv points to a list of opcodes for [PackedInt, PackedSingle,
-  /// PackedDouble].
-  SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const;
+  /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+  /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+  std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+  /// SetSSEDomain - Set the SSEDomain of MI.
+  void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
 
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,





More information about the llvm-commits mailing list