[PATCH 2/2] R600: initial scheduler code

Mon Feb 11 10:39:33 PST 2013

From: Vadim Girlin <vadimgirlin at gmail.com>

This is a skeleton for a pre-RA MachineInstr scheduler strategy. Currently
it only tries to expose more parallelism for ALU instructions (this also
makes the distribution of GPR channels more uniform and increases the
chances of ALU instructions to be packed together in a single VLIW group).
Also it tries to reduce clause switching by grouping instruction of the
same kind (ALU/FETCH/CF) together.

Vincent Lejeune:
 - Support for VLIW4 Slot assignement
 - Recomputation of ScheduleDAG to get more parallelism opportunities
---
 lib/Target/R600/AMDGPUTargetMachine.cpp  |  17 +-
 lib/Target/R600/R600MachineScheduler.cpp | 452 +++++++++++++++++++++++++++++++
 lib/Target/R600/R600MachineScheduler.h   | 119 ++++++++
 3 files changed, 587 insertions(+), 1 deletion(-)
 create mode 100644 lib/Target/R600/R600MachineScheduler.cpp
 create mode 100644 lib/Target/R600/R600MachineScheduler.h

diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 821e864..e6070cd 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -17,6 +17,7 @@
 #include "AMDGPU.h"
 #include "R600ISelLowering.h"
 #include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "llvm/Analysis/Passes.h"
@@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() {
   RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
 }
 
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+                    createR600MachineScheduler);
+
 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
     StringRef CPU, StringRef FS,
   TargetOptions Options,
@@ -70,7 +79,13 @@ namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
-    : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {
+    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+      enablePass(&MachineSchedulerID);
+      MachineSchedRegistry::setDefault(createR600MachineScheduler);
+    }
+  }
 
   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
     return getTM<AMDGPUTargetMachine>();
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 0000000..229374c
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,452 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include <set>
+#include <iostream>
+using namespace llvm;
+
+/// \brief Recompute Output and Anti dependencies of incoming dag
+/// ScheduleDAGInstrs has a conservative policy about subregisters dependencies.
+/// All subreg write of a same superreg will be chained by Output/Anti deps.
+/// These artificial deps delay releases of MI and thus reduce parallelism
+/// oportunities. This function recompute the ScheduleDag to produce proper
+/// subreg aware dependencies.
+// Todo : It should also recompute Data dependencies
+static
+void RecomputeScheduleDAGMI(ScheduleDAGMI *dag) {
+
+  // Remove all Output/Anti deps
+  for (unsigned i = 0; i < dag->SUnits.size(); ++i) {
+    SUnit &SU = dag->SUnits[i];
+    for (SUnit::pred_iterator SUIt = SU.Preds.begin(), SUE = SU.Preds.end(); 
+        SUIt != SUE; ++SUIt) {
+      SDep &SD = *SUIt;
+      SUnit *SUPred = SD.getSUnit();
+      if (SD.getKind() == SDep::Output) {
+        SUPred->removePred(SD);
+      }
+    }
+  }
+
+    // Now recompute output/anti dependencies
+  for (unsigned i = 0; i < dag->SUnits.size(); ++i) {
+    SUnit &SU = dag->SUnits[i];
+    MachineOperand &DestMO = SU.getInstr()->getOperand(0);
+    unsigned DestReg = SU.getInstr()->getOperand(0).getReg();
+    DEBUG(dbgs() << "Recomputing deps for "; SU.dump(dag); dbgs() << "\n";);
+    // Using LiveInterval should make things a lot more efficient, but we
+    // can't access them inside a MachineSchedStrategy.
+    // Scheduling occurs on a per MBB basis, so it is sufficient to get deps
+    // inside a MBB.
+    MachineBasicBlock *MBB = SU.getInstr()->getParent();
+    MachineBasicBlock::iterator SUPos = SU.getInstr();
+    // We parse MI from MBB's start to SU instruction ; following deps will
+    // be caught when parsing later SU, and add preds takes care of both ends of
+    // a SDep.
+    for (MachineBasicBlock::iterator It = MBB->begin(), E = SUPos; It != E;
+        ++It) {
+      MachineInstr &MI = *It;
+      for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+        MachineOperand &MO = MI.getOperand(i);
+        if (!MO.isReg()  || MO.getReg() != DestReg)
+          continue;
+        if (MO.isUse() &&
+            (DestMO.getSubReg() == AMDGPU::NoSubRegister ||
+             MO.getSubReg() == DestMO.getSubReg())
+            ) {
+          SUnit *Predecessor = dag->getSUnit(&MI);
+          SU.addPred(SDep(Predecessor, SDep::Anti, DestReg));
+        }
+        if (MO.isDef() &&
+            (MO.getSubReg() == AMDGPU::NoSubRegister ||
+             MO.getSubReg() == DestMO.getSubReg())
+            ) {
+          SUnit *Predecessor = dag->getSUnit(&MI);
+          SU.addPred(SDep(Predecessor, SDep::Output, DestReg));
+        }
+      }
+    }
+  }
+
+  DEBUG(
+    dbgs() << "\n\n Recomputed DAG is :";
+    for (unsigned i = 0; i < dag->SUnits.size(); ++i) {
+      SUnit &SU = dag->SUnits[i];
+      dbgs() << "\n\n";
+      dag->SUnits[i].dump(dag);
+      dbgs() << "\nSuccs (" << SU.NumSuccsLeft << "):\n";
+      for (unsigned j = 0; j < SU.Succs.size(); j++) {
+        dbgs() << "- ";
+        SU.Succs[j].getSUnit()->dump(dag);
+        dbgs() << "\n";
+      }
+      dbgs() << " and Preds (" << SU.NumPredsLeft << ") :\n";
+      for (unsigned j = 0; j < SU.Preds.size(); j++) {
+        dbgs() << "- ";
+        SU.Preds[j].getSUnit()->dump(dag);
+        dbgs() << "\n";
+      }
+    }
+  );
+
+}
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+  RecomputeScheduleDAGMI(dag);
+
+  DAG = dag;
+  TII = static_cast<const R600InstrInfo*>(DAG->TII);
+  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+  MRI = &DAG->MRI;
+  Available[IDAlu]->clear();
+  Available[IDFetch]->clear();
+  Available[IDOther]->clear();
+  CurInstKind = IDOther;
+  CurEmitted = 0;
+  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+  InstKindLimit[IDAlu] = 123; // 128 minus 5 for security
+
+
+  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
+    InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
+  } else {
+    InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
+  }
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+  if (QSrc->empty())
+    return;
+  for (ReadyQueue::iterator I = QSrc->begin(),
+      E = QSrc->end(); I != E; ++I) {
+    (*I)->NodeQueueId &= ~QSrc->getID();
+    QDst->push(*I);
+  }
+  QSrc->clear();
+}
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+  SUnit *SU = 0;
+  IsTopNode = true;
+  NextInstKind = IDOther;
+
+  // check if we might want to switch current clause type
+  bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
+      (CurEmitted > InstKindLimit[CurInstKind]) ||
+      (Available[CurInstKind]->empty());
+  bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
+      (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
+
+  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+    // try to pick ALU
+    SU = pickAlu();
+    if (SU)
+      NextInstKind = IDAlu;
+  }
+
+  if (!SU) {
+    // try to pick FETCH
+    SU = pickOther(IDFetch);
+    if (SU)
+      NextInstKind = IDFetch;
+  }
+
+  // try to pick other
+  if (!SU) {
+    SU = pickOther(IDOther);
+    if (SU)
+      NextInstKind = IDOther;
+  }
+
+  DEBUG(
+      if (SU) {
+        dbgs() << "picked node: ";
+        SU->dump(DAG);
+      } else {
+        dbgs() << "NO NODE ";
+        for (int i = 0; i < IDLast; ++i) {
+          Available[i]->dump();
+          Pending[i]->dump();
+        }
+      }
+  );
+  return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+
+  DEBUG(dbgs() << "scheduled: ");
+  DEBUG(SU->dump(DAG));
+
+  if (NextInstKind != CurInstKind) {
+    DEBUG(dbgs() << "Instruction Type Switch\n");
+    if (NextInstKind != IDAlu)
+      OccupedSlotsMask = 0;
+    CurEmitted = 0;
+    CurInstKind = NextInstKind;
+  }
+
+  if (CurInstKind == IDAlu) {
+    AluKind AK = getAluKind(SU);
+    if (AK == AluT_XYZW)
+      CurEmitted += 3;
+  }
+
+  ++CurEmitted;
+
+  if (CurInstKind != IDFetch) {
+    MoveUnits(Pending[IDFetch], Available[IDFetch]);
+  }
+  MoveUnits(Pending[IDOther], Available[IDOther]);
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+  int IK = getInstKind(SU);
+
+  DEBUG(dbgs() << IK << " <= ");
+  DEBUG(SU->dump(DAG));
+
+  Pending[IK]->push(SU);
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+}
+
+R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
+  MachineInstr *MI = SU->getInstr();
+
+    switch (MI->getOpcode()) {
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+      return AluT_XYZW;
+    case AMDGPU::COPY:
+      if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+        // %vregX = COPY Tn_X is likely to be discarded in favor of an
+        // assignement of Tn_X to %vregX, don't considers it in scheduling
+        return AluDiscarded;
+      }
+      else if (MI->getOperand(1).isUndef()) {
+        // MI will become a KILL, don't considers it in scheduling
+        return AluDiscarded;
+      }
+    default:
+      break;
+    }
+
+    // Does the instruction take a whole IG ?
+    if(TII->isVector(*MI) ||
+        TII->isCubeOp(MI->getOpcode()) ||
+        TII->isReductionOp(MI->getOpcode()))
+      return AluT_XYZW;
+
+    // Is the result already assigned to a channel ?
+    unsigned DestSubReg = MI->getOperand(0).getSubReg();
+    switch (DestSubReg) {
+    case AMDGPU::sub0:
+      return AluT_X;
+    case AMDGPU::sub1:
+      return AluT_Y;
+    case AMDGPU::sub2:
+      return AluT_Z;
+    case AMDGPU::sub3:
+      return AluT_W;
+    default:
+      break;
+    }
+
+    // Is the result already member of a X/Y/Z/W class ?
+    unsigned DestReg = MI->getOperand(0).getReg();
+    if (MRI->getRegClass(DestReg) == &AMDGPU::R600_TReg32_XRegClass)
+      return AluT_X;
+    if (MRI->getRegClass(DestReg) == &AMDGPU::R600_TReg32_YRegClass)
+      return AluT_Y;
+    if (MRI->getRegClass(DestReg) == &AMDGPU::R600_TReg32_ZRegClass)
+      return AluT_Z;
+    if (MRI->getRegClass(DestReg) == &AMDGPU::R600_TReg32_WRegClass)
+      return AluT_W;
+    if (MRI->getRegClass(DestReg) == &AMDGPU::R600_Reg128RegClass)
+      return AluT_XYZW;
+
+    return AluAny;
+
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+  int Opcode = SU->getInstr()->getOpcode();
+
+  if (TII->isALUInstr(Opcode)) {
+    return IDAlu;
+  }
+
+  switch (Opcode) {
+  case AMDGPU::COPY:
+  case AMDGPU::CONST_COPY:
+  case AMDGPU::INTERP_PAIR_XY:
+  case AMDGPU::INTERP_PAIR_ZW:
+  case AMDGPU::INTERP_VEC_LOAD:
+  case AMDGPU::DOT4_eg_pseudo:
+  case AMDGPU::DOT4_r600_pseudo:
+    return IDAlu;
+  case AMDGPU::TEX_LD:
+  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+  case AMDGPU::TEX_GET_GRADIENTS_H:
+  case AMDGPU::TEX_GET_GRADIENTS_V:
+  case AMDGPU::TEX_SET_GRADIENTS_H:
+  case AMDGPU::TEX_SET_GRADIENTS_V:
+  case AMDGPU::TEX_SAMPLE:
+  case AMDGPU::TEX_SAMPLE_C:
+  case AMDGPU::TEX_SAMPLE_L:
+  case AMDGPU::TEX_SAMPLE_C_L:
+  case AMDGPU::TEX_SAMPLE_LB:
+  case AMDGPU::TEX_SAMPLE_C_LB:
+  case AMDGPU::TEX_SAMPLE_G:
+  case AMDGPU::TEX_SAMPLE_C_G:
+  case AMDGPU::TXD:
+  case AMDGPU::TXD_SHADOW:
+    return IDFetch;
+  default:
+    DEBUG(
+        dbgs() << "other inst: ";
+        SU->dump(DAG);
+    );
+    return IDOther;
+  }
+}
+
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
+  if (Q.empty())
+    return NULL;
+  SUnit *Result = *Q.begin();
+  Q.erase(Q.begin());
+  return Result;
+}
+
+void R600SchedStrategy::LoadAlu() {
+  ReadyQueue *QSrc = Pending[IDAlu];
+  for (ReadyQueue::iterator I = QSrc->begin(),
+        E = QSrc->end(); I != E; ++I) {
+      (*I)->NodeQueueId &= ~QSrc->getID();
+      AluKind AK = getAluKind(*I);
+      AvailableAlus[AK].push_back(*I);
+    }
+    QSrc->clear();
+}
+
+void R600SchedStrategy::PrepareNextSlot() {
+  DEBUG(dbgs() << "New Slot\n");
+  OccupedSlotsMask = 0;
+  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+  LoadAlu();
+}
+
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
+  SUnit *SU = 0;
+  if (SU = PopInst(AvailableAlus[IndexToID[Slot]])) {
+    // Fill with available channel determined ALU
+    return SU;
+  } else if (SU = PopInst(AvailableAlus[AluAny])) {
+    unsigned DestReg = SU->getInstr()->getOperand(0).getReg();
+    // PressureRegister crashes if an operand is def and used in the same inst
+    // and we try to constraint its regclass
+    for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
+        E = SU->getInstr()->operands_end(); It != E; ++It) {
+      MachineOperand &MO = *It;
+      if (MO.isReg() && !MO.isDef() &&
+          MO.getReg() == SU->getInstr()->getOperand(0).getReg())
+        return SU;
+    }
+    // Constrains the regclass of DestReg to assign it to Slot
+    switch (Slot) {
+    case 0:
+      MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+      break;
+    case 1:
+      MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+      break;
+    case 2:
+      MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+      break;
+    case 3:
+      MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+      break;
+    }
+    return SU;
+  } else {
+    return NULL;
+  }
+}
+
+bool R600SchedStrategy::isAvailablesAluEmpty() const {
+  return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
+      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && 
+      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
+      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+  while (!isAvailablesAluEmpty()) {
+    if (!OccupedSlotsMask) {
+      // Flush physical reg copies (RA will discard them)
+      if (!AvailableAlus[AluDiscarded].empty()) {
+        return PopInst(AvailableAlus[AluDiscarded]);
+      }
+      // If there is a T_XYZW alu available, use it
+      if (!AvailableAlus[AluT_XYZW].empty()) {
+        OccupedSlotsMask = 15;
+        return PopInst(AvailableAlus[AluT_XYZW]);
+      }
+    }
+    for (unsigned Chan = 0; Chan < 4; ++Chan) {
+      bool isOccupied = OccupedSlotsMask & (1 << Chan);
+      if (!isOccupied) {
+        SUnit *SU = AttemptFillSlot(Chan);
+        if (SU) {
+          OccupedSlotsMask |= (1 << Chan);
+          InstructionsGroupCandidate[Chan] = SU;
+          return SU;
+        }
+      }
+    }
+    PrepareNextSlot();
+  }
+  return NULL;
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+  SUnit *SU = 0;
+  ReadyQueue *AQ = Available[QID];
+
+  if (AQ->empty()) {
+    MoveUnits(Pending[QID], AQ);
+  }
+  if (!AQ->empty()) {
+    SU = *AQ->begin();
+    AQ->remove(AQ->begin());
+  }
+  return SU;
+}
+
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
new file mode 100644
index 0000000..9b38130
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -0,0 +1,119 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINESCHEDULER_H_
+#define R600MACHINESCHEDULER_H_
+
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class CompareSUnit {
+public:
+  bool operator()(const SUnit *S1, const SUnit *S2) {
+    return S1->NumSuccs > S2->NumSuccs;
+  }
+};
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+  const ScheduleDAGMI *DAG;
+  const R600InstrInfo *TII;
+  const R600RegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+
+  enum InstQueue {
+    QAlu = 1,
+    QFetch = 2,
+    QOther = 4
+  };
+
+  enum InstKind {
+    IDAlu,
+    IDFetch,
+    IDOther,
+    IDLast
+  };
+
+  enum AluKind {
+    AluAny,
+    AluT_X,
+    AluT_Y,
+    AluT_Z,
+    AluT_W,
+    AluT_XYZW,
+    AluDiscarded, // LLVM Instructions that are going to be eliminated
+    AluLast
+  };
+
+  ReadyQueue *Available[IDLast], *Pending[IDLast];
+  std::vector<SUnit *> AvailableAlus[AluLast];
+
+  InstKind CurInstKind;
+  int CurEmitted;
+  InstKind NextInstKind;
+
+  int InstKindLimit[IDLast];
+
+  int OccupedSlotsMask;
+
+public:
+  R600SchedStrategy() :
+    DAG(0), TII(0), TRI(0), MRI(0) {
+    Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
+    Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
+    Available[IDOther] = new ReadyQueue(QOther, "AOther");
+    Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
+    Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
+    Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
+  }
+
+  virtual ~R600SchedStrategy() {
+    for (unsigned I = 0; I < IDLast; ++I) {
+      delete Available[I];
+      delete Pending[I];
+    }
+  }
+
+  virtual void initialize(ScheduleDAGMI *dag);
+  virtual SUnit *pickNode(bool &IsTopNode);
+  virtual void schedNode(SUnit *SU, bool IsTopNode);
+  virtual void releaseTopNode(SUnit *SU);
+  virtual void releaseBottomNode(SUnit *SU);
+
+private:
+  SUnit *InstructionsGroupCandidate[4];
+
+  int getInstKind(SUnit *SU);
+  AluKind getAluKind(SUnit *SU) const;
+  void LoadAlu();
+  bool isAvailablesAluEmpty() const;
+  SUnit *AttemptFillSlot (unsigned Slot);
+  void PrepareNextSlot();
+  SUnit *PopInst(std::vector<SUnit *> &Q);
+
+  SUnit* pickAlu();
+  SUnit* pickOther(int QID);
+
+  void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
-- 
1.8.1.2