<div dir="ltr"><div>I see the following error in self-hosted -Werror build, could you please fix it?</div><div><br></div><div>llvm/lib/Target/R600/R600InstrInfo.cpp:462:69: error: variable 'TransBS' is uninitialized when used here [-Werror,-Wuninitialized]</div>
<div>    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);</div><div>                                                                    ^~~~~~~</div><div>llvm/lib/Target/R600/R600InstrInfo.cpp:452:3: note: variable 'TransBS' is declared here</div>
<div>  BankSwizzle TransBS;</div><div>  ^</div><div><br></div><div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Sat, Jun 29, 2013 at 11:32 PM, Vincent Lejeune <span dir="ltr"><<a href="mailto:vljn@ovi.com" target="_blank">vljn@ovi.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: vljn<br>
Date: Sat Jun 29 14:32:43 2013<br>
New Revision: 185268<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=185268&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=185268&view=rev</a><br>
Log:<br>
R600: Support schedule and packetization of trans-only inst<br>
<br>
Modified:<br>
    llvm/trunk/lib/Target/R600/R600InstrInfo.cpp<br>
    llvm/trunk/lib/Target/R600/R600InstrInfo.h<br>
    llvm/trunk/lib/Target/R600/R600Instructions.td<br>
    llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp<br>
    llvm/trunk/lib/Target/R600/R600MachineScheduler.h<br>
    llvm/trunk/lib/Target/R600/R600Packetizer.cpp<br>
    llvm/trunk/lib/Target/R600/R600RegisterInfo.td<br>
    llvm/trunk/test/CodeGen/R600/fdiv.ll<br>
    llvm/trunk/test/CodeGen/R600/fp_to_sint.ll<br>
    llvm/trunk/test/CodeGen/R600/fp_to_uint.ll<br>
    llvm/trunk/test/CodeGen/R600/llvm.cos.ll<br>
    llvm/trunk/test/CodeGen/R600/llvm.pow.ll<br>
    llvm/trunk/test/CodeGen/R600/llvm.sin.ll<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)<br>
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Sat Jun 29 14:32:43 2013<br>
@@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI)<br>
<br>
 std::vector<std::pair<int, unsigned> ><br>
 R600InstrInfo::ExtractSrcs(MachineInstr *MI,<br>
-                           const DenseMap<unsigned, unsigned> &PV)<br>
-    const {<br>
+                           const DenseMap<unsigned, unsigned> &PV,<br>
+                           unsigned &ConstCount) const {<br>
+  ConstCount = 0;<br>
   const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);<br>
   const std::pair<int, unsigned> DummyPair(-1, 0);<br>
   std::vector<std::pair<int, unsigned> > Result;<br>
@@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr<br>
   for (unsigned n = Srcs.size(); i < n; ++i) {<br>
     unsigned Reg = Srcs[i].first->getReg();<br>
     unsigned Index = RI.getEncodingValue(Reg) & 0xff;<br>
-    unsigned Chan = RI.getHWRegChan(Reg);<br>
     if (Reg == AMDGPU::OQAP) {<br>
       Result.push_back(std::pair<int, unsigned>(Index, 0));<br>
     }<br>
-    if (Index > 127) {<br>
-      Result.push_back(DummyPair);<br>
+    if (PV.find(Reg) != PV.end()) {<br>
+      // 255 is used to tells its a PS/PV reg<br>
+      Result.push_back(std::pair<int, unsigned>(255, 0));<br>
       continue;<br>
     }<br>
-    if (PV.find(Reg) != PV.end()) {<br>
+    if (Index > 127) {<br>
+      ConstCount++;<br>
       Result.push_back(DummyPair);<br>
       continue;<br>
     }<br>
+    unsigned Chan = RI.getHWRegChan(Reg);<br>
     Result.push_back(std::pair<int, unsigned>(Index, Chan));<br>
   }<br>
   for (; i < 3; ++i)<br>
@@ -305,23 +308,51 @@ Swizzle(std::vector<std::pair<int, unsig<br>
   return Src;<br>
 }<br>
<br>
-bool<br>
-R600InstrInfo::isLegal(<br>
-             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
-             const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
-             unsigned CheckedSize) const {<br>
+static unsigned<br>
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {<br>
+  switch (Swz) {<br>
+  case R600InstrInfo::ALU_VEC_012_SCL_210: {<br>
+    unsigned Cycles[3] = { 2, 1, 0};<br>
+    return Cycles[Op];<br>
+  }<br>
+  case R600InstrInfo::ALU_VEC_021_SCL_122: {<br>
+    unsigned Cycles[3] = { 1, 2, 2};<br>
+    return Cycles[Op];<br>
+  }<br>
+  case R600InstrInfo::ALU_VEC_120_SCL_212: {<br>
+    unsigned Cycles[3] = { 2, 1, 2};<br>
+    return Cycles[Op];<br>
+  }<br>
+  case R600InstrInfo::ALU_VEC_102_SCL_221: {<br>
+    unsigned Cycles[3] = { 2, 2, 1};<br>
+    return Cycles[Op];<br>
+  }<br>
+  default:<br>
+    llvm_unreachable("Wrong Swizzle for Trans Slot");<br>
+    return 0;<br>
+  }<br>
+}<br>
+<br>
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed<br>
+/// in the same Instruction Group while meeting read port limitations given a<br>
+/// Swz swizzle sequence.<br>
+unsigned  R600InstrInfo::isLegalUpTo(<br>
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+    const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+    R600InstrInfo::BankSwizzle TransSwz) const {<br>
   int Vector[4][3];<br>
   memset(Vector, -1, sizeof(Vector));<br>
-  for (unsigned i = 0; i < CheckedSize; i++) {<br>
+  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {<br>
     const std::vector<std::pair<int, unsigned> > &Srcs =<br>
         Swizzle(IGSrcs[i], Swz[i]);<br>
     for (unsigned j = 0; j < 3; j++) {<br>
       const std::pair<int, unsigned> &Src = Srcs[j];<br>
-      if (Src.first < 0)<br>
+      if (Src.first < 0 || Src.first == 255)<br>
         continue;<br>
       if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {<br>
-        if (Swz[i] != R600InstrInfo::ALU_VEC_012 &&<br>
-            Swz[i] != R600InstrInfo::ALU_VEC_021) {<br>
+        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&<br>
+            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {<br>
             // The value from output queue A (denoted by register OQAP) can<br>
             // only be fetched during the first cycle.<br>
             return false;<br>
@@ -332,51 +363,126 @@ R600InstrInfo::isLegal(<br>
       if (Vector[Src.second][j] < 0)<br>
         Vector[Src.second][j] = Src.first;<br>
       if (Vector[Src.second][j] != Src.first)<br>
-        return false;<br>
+        return i;<br>
     }<br>
   }<br>
-  return true;<br>
+  // Now check Trans Alu<br>
+  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {<br>
+    const std::pair<int, unsigned> &Src = TransSrcs[i];<br>
+    unsigned Cycle = getTransSwizzle(TransSwz, i);<br>
+    if (Src.first < 0)<br>
+      continue;<br>
+    if (Src.first == 255)<br>
+      continue;<br>
+    if (Vector[Src.second][Cycle] < 0)<br>
+      Vector[Src.second][Cycle] = Src.first;<br>
+    if (Vector[Src.second][Cycle] != Src.first)<br>
+      return IGSrcs.size() - 1;<br>
+  }<br>
+  return IGSrcs.size();<br>
 }<br>
<br>
-bool<br>
-R600InstrInfo::recursiveFitsFPLimitation(<br>
-             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
-             std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
-             unsigned Depth) const {<br>
-  if (!isLegal(IGSrcs, SwzCandidate, Depth))<br>
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next<br>
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after<br>
+/// Idx can be skipped<br>
+static bool<br>
+NextPossibleSolution(<br>
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
+    unsigned Idx) {<br>
+  assert(Idx < SwzCandidate.size());<br>
+  int ResetIdx = Idx;<br>
+  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)<br>
+    ResetIdx --;<br>
+  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {<br>
+    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;<br>
+  }<br>
+  if (ResetIdx == -1)<br>
     return false;<br>
-  if (IGSrcs.size() == Depth)<br>
-    return true;<br>
-  unsigned i = SwzCandidate[Depth];<br>
-  for (; i < 6; i++) {<br>
-    SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;<br>
-    if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))<br>
+  SwzCandidate[ResetIdx]++;<br>
+  return true;<br>
+}<br>
+<br>
+/// Enumerate all possible Swizzle sequence to find one that can meet all<br>
+/// read port requirements.<br>
+bool R600InstrInfo::FindSwizzleForVectorSlot(<br>
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+    R600InstrInfo::BankSwizzle TransSwz) const {<br>
+  unsigned ValidUpTo = 0;<br>
+  do {<br>
+    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);<br>
+    if (ValidUpTo == IGSrcs.size())<br>
       return true;<br>
-  }<br>
-  SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;<br>
+  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));<br>
   return false;<br>
 }<br>
<br>
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read<br>
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.<br>
+static bool<br>
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,<br>
+                  const std::vector<std::pair<int, unsigned> > &TransOps,<br>
+                  unsigned ConstCount) {<br>
+  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {<br>
+    const std::pair<int, unsigned> &Src = TransOps[i];<br>
+    unsigned Cycle = getTransSwizzle(TransSwz, i);<br>
+    if (Src.first < 0)<br>
+      continue;<br>
+    if (ConstCount > 0 && Cycle == 0)<br>
+      return false;<br>
+    if (ConstCount > 1 && Cycle == 1)<br>
+      return false;<br>
+  }<br>
+  return true;<br>
+}<br>
+<br>
 bool<br>
 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,<br>
-                                      const DenseMap<unsigned, unsigned> &PV,<br>
-                                      std::vector<BankSwizzle> &ValidSwizzle)<br>
+                                       const DenseMap<unsigned, unsigned> &PV,<br>
+                                       std::vector<BankSwizzle> &ValidSwizzle,<br>
+                                       bool isLastAluTrans)<br>
     const {<br>
   //Todo : support shared src0 - src1 operand<br>
<br>
   std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;<br>
   ValidSwizzle.clear();<br>
+  unsigned ConstCount;<br>
+  BankSwizzle TransBS;<br>
   for (unsigned i = 0, e = IG.size(); i < e; ++i) {<br>
-    IGSrcs.push_back(ExtractSrcs(IG[i], PV));<br>
+    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));<br>
     unsigned Op = getOperandIdx(IG[i]->getOpcode(),<br>
         AMDGPU::OpName::bank_swizzle);<br>
     ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)<br>
         IG[i]->getOperand(Op).getImm());<br>
   }<br>
-  bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);<br>
-  if (!Result)<br>
-    return false;<br>
-  return true;<br>
+  std::vector<std::pair<int, unsigned> > TransOps;<br>
+  if (!isLastAluTrans)<br>
+    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);<br>
+<br>
+  TransOps = IGSrcs.back();<br>
+  IGSrcs.pop_back();<br>
+  ValidSwizzle.pop_back();<br>
+<br>
+  static const R600InstrInfo::BankSwizzle TransSwz[] = {<br>
+    ALU_VEC_012_SCL_210,<br>
+    ALU_VEC_021_SCL_122,<br>
+    ALU_VEC_120_SCL_212,<br>
+    ALU_VEC_102_SCL_221<br>
+  };<br>
+  for (unsigned i = 0; i < 4; i++) {<br>
+    TransBS = TransSwz[i];<br>
+    if (!isConstCompatible(TransBS, TransOps, ConstCount))<br>
+      continue;<br>
+    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,<br>
+        TransBS);<br>
+    if (Result) {<br>
+      ValidSwizzle.push_back(TransBS);<br>
+      return true;<br>
+    }<br>
+  }<br>
+<br>
+  return false;<br>
 }<br>
<br>
<br>
@@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(<br>
 }<br>
<br>
 bool<br>
-R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {<br>
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)<br>
+    const {<br>
   std::vector<unsigned> Consts;<br>
   for (unsigned i = 0, n = MIs.size(); i < n; i++) {<br>
     MachineInstr *MI = MIs[i];<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)<br>
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Sat Jun 29 14:32:43 2013<br>
@@ -84,26 +84,38 @@ namespace llvm {<br>
   SmallVector<std::pair<MachineOperand *, int64_t>, 3><br>
       getSrcs(MachineInstr *MI) const;<br>
<br>
-  bool isLegal(<br>
-             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
-             const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
-             unsigned CheckedSize) const;<br>
-  bool recursiveFitsFPLimitation(<br>
-             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
-             std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
-             unsigned Depth = 0) const;<br>
+  unsigned  isLegalUpTo(<br>
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+    const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+    R600InstrInfo::BankSwizzle TransSwz) const;<br>
+<br>
+  bool FindSwizzleForVectorSlot(<br>
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+    R600InstrInfo::BankSwizzle TransSwz) const;<br>
<br>
   /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210<br>
   /// returns true and the first (in lexical order) BankSwizzle affectation<br>
   /// starting from the one already provided in the Instruction Group MIs that<br>
   /// fits Read Port limitations in BS if available. Otherwise returns false<br>
   /// and undefined content in BS.<br>
+  /// isLastAluTrans should be set if the last Alu of MIs will be executed on<br>
+  /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to<br>
+  /// apply to the last instruction.<br>
   /// PV holds GPR to PV registers in the Instruction Group MIs.<br>
   bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,<br>
                                const DenseMap<unsigned, unsigned> &PV,<br>
-                               std::vector<BankSwizzle> &BS) const;<br>
+                               std::vector<BankSwizzle> &BS,<br>
+                               bool isLastAluTrans) const;<br>
+<br>
+  /// An instruction group can only access 2 channel pair (either [XY] or [ZW])<br>
+  /// from KCache bank on R700+. This function check if MI set in input meet<br>
+  /// this limitations<br>
+  bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;<br>
+  /// Same but using const index set instead of MI set.<br>
   bool fitsConstReadLimitations(const std::vector<unsigned>&) const;<br>
-  bool canBundle(const std::vector<MachineInstr *> &) const;<br>
<br>
   /// \breif Vector instructions are instructions that must fill all<br>
   /// instruction slots within an instruction group.<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600Instructions.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600Instructions.td (original)<br>
+++ llvm/trunk/lib/Target/R600/R600Instructions.td Sat Jun 29 14:32:43 2013<br>
@@ -1489,6 +1489,8 @@ let hasSideEffects = 1 in {<br>
<br>
   def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {<br>
     let Pattern = [];<br>
+    let TransOnly = 0;<br>
+    let Itinerary = AnyALU;<br>
   }<br>
<br>
   def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp (original)<br>
+++ llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp Sat Jun 29 14:32:43 2013<br>
@@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(Sched<br>
   MRI = &DAG->MRI;<br>
   CurInstKind = IDOther;<br>
   CurEmitted = 0;<br>
-  OccupedSlotsMask = 15;<br>
+  OccupedSlotsMask = 31;<br>
   InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();<br>
   InstKindLimit[IDOther] = 32;<br>
<br>
@@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit<br>
   if (NextInstKind != CurInstKind) {<br>
     DEBUG(dbgs() << "Instruction Type Switch\n");<br>
     if (NextInstKind != IDAlu)<br>
-      OccupedSlotsMask = 15;<br>
+      OccupedSlotsMask |= 31;<br>
     CurEmitted = 0;<br>
     CurInstKind = NextInstKind;<br>
   }<br>
@@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClas<br>
 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {<br>
   MachineInstr *MI = SU->getInstr();<br>
<br>
+  if (TII->isTransOnly(MI))<br>
+    return AluTrans;<br>
+<br>
     switch (MI->getOpcode()) {<br>
     case AMDGPU::PRED_X:<br>
       return AluPredX;<br>
@@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::v<br>
       It != E; ++It) {<br>
     SUnit *SU = *It;<br>
     InstructionsGroupCandidate.push_back(SU->getInstr());<br>
-    if (TII->canBundle(InstructionsGroupCandidate)) {<br>
+    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {<br>
       InstructionsGroupCandidate.pop_back();<br>
       Q.erase((It + 1).base());<br>
       return SU;<br>
@@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAl<br>
   return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +<br>
       AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +<br>
       AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +<br>
-      AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();<br>
+      AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +<br>
+      AvailableAlus[AluPredX].size();<br>
 }<br>
<br>
 SUnit* R600SchedStrategy::pickAlu() {<br>
@@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() {<br>
     if (!OccupedSlotsMask) {<br>
       // Bottom up scheduling : predX must comes first<br>
       if (!AvailableAlus[AluPredX].empty()) {<br>
-        OccupedSlotsMask = 15;<br>
+        OccupedSlotsMask |= 31;<br>
         return PopInst(AvailableAlus[AluPredX]);<br>
       }<br>
       // Flush physical reg copies (RA will discard them)<br>
       if (!AvailableAlus[AluDiscarded].empty()) {<br>
-        OccupedSlotsMask = 15;<br>
+        OccupedSlotsMask |= 31;<br>
         return PopInst(AvailableAlus[AluDiscarded]);<br>
       }<br>
       // If there is a T_XYZW alu available, use it<br>
       if (!AvailableAlus[AluT_XYZW].empty()) {<br>
-        OccupedSlotsMask = 15;<br>
+        OccupedSlotsMask |= 15;<br>
         return PopInst(AvailableAlus[AluT_XYZW]);<br>
       }<br>
     }<br>
+    bool TransSlotOccuped = OccupedSlotsMask & 16;<br>
+    if (!TransSlotOccuped) {<br>
+      if (!AvailableAlus[AluTrans].empty()) {<br>
+        OccupedSlotsMask |= 16;<br>
+        return PopInst(AvailableAlus[AluTrans]);<br>
+      }<br>
+    }<br>
     for (int Chan = 3; Chan > -1; --Chan) {<br>
       bool isOccupied = OccupedSlotsMask & (1 << Chan);<br>
       if (!isOccupied) {<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.h?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.h?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600MachineScheduler.h (original)<br>
+++ llvm/trunk/lib/Target/R600/R600MachineScheduler.h Sat Jun 29 14:32:43 2013<br>
@@ -46,6 +46,7 @@ class R600SchedStrategy : public Machine<br>
     AluT_W,<br>
     AluT_XYZW,<br>
     AluPredX,<br>
+    AluTrans,<br>
     AluDiscarded, // LLVM Instructions that are going to be eliminated<br>
     AluLast<br>
   };<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600Packetizer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Packetizer.cpp?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Packetizer.cpp?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600Packetizer.cpp (original)<br>
+++ llvm/trunk/lib/Target/R600/R600Packetizer.cpp Sat Jun 29 14:32:43 2013<br>
@@ -77,8 +77,6 @@ private:<br>
     do {<br>
       if (TII->isPredicated(BI))<br>
         continue;<br>
-      if (TII->isTransOnly(BI))<br>
-        continue;<br>
       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);<br>
       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)<br>
         continue;<br>
@@ -87,6 +85,10 @@ private:<br>
         continue;<br>
       }<br>
       unsigned Dst = BI->getOperand(DstIdx).getReg();<br>
+      if (TII->isTransOnly(BI)) {<br>
+        Result[Dst] = AMDGPU::PS;<br>
+        continue;<br>
+      }<br>
       if (BI->getOpcode() == AMDGPU::DOT4_r600 ||<br>
           BI->getOpcode() == AMDGPU::DOT4_eg) {<br>
         Result[Dst] = AMDGPU::PV_X;<br>
@@ -157,10 +159,6 @@ public:<br>
       return true;<br>
     if (!TII->isALUInstr(MI->getOpcode()))<br>
       return true;<br>
-    if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)<br>
-      return true;<br>
-    if (TII->isTransOnly(MI))<br>
-      return true;<br>
     if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)<br>
       return true;<br>
     return false;<br>
@@ -170,7 +168,7 @@ public:<br>
   // together.<br>
   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {<br>
     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();<br>
-    if (getSlot(MII) <= getSlot(MIJ))<br>
+    if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))<br>
       return false;<br>
     // Does MII and MIJ share the same pred_sel ?<br>
     int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),<br>
@@ -204,11 +202,16 @@ public:<br>
     MI->getOperand(LastOp).setImm(Bit);<br>
   }<br>
<br>
-  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {<br>
+  bool isBundlableWithCurrentPMI(MachineInstr *MI,<br>
+                                 const DenseMap<unsigned, unsigned> &PV,<br>
+                                 std::vector<R600InstrInfo::BankSwizzle> &BS,<br>
+                                 bool &isTransSlot) {<br>
+    isTransSlot = TII->isTransOnly(MI);<br>
+<br>
+    // Are the Constants limitations met ?<br>
     CurrentPacketMIs.push_back(MI);<br>
-    bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);<br>
-    DEBUG(<br>
-      if (!FitsConstLimits) {<br>
+    if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {<br>
+      DEBUG(<br>
         dbgs() << "Couldn't pack :\n";<br>
         MI->dump();<br>
         dbgs() << "with the following packets :\n";<br>
@@ -217,14 +220,15 @@ public:<br>
           dbgs() << "\n";<br>
         }<br>
         dbgs() << "because of Consts read limitations\n";<br>
-      });<br>
-    const DenseMap<unsigned, unsigned> &PV =<br>
-        getPreviousVector(CurrentPacketMIs.front());<br>
-    std::vector<R600InstrInfo::BankSwizzle> BS;<br>
-    bool FitsReadPortLimits =<br>
-        TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);<br>
-    DEBUG(<br>
-      if (!FitsReadPortLimits) {<br>
+      );<br>
+      CurrentPacketMIs.pop_back();<br>
+      return false;<br>
+    }<br>
+<br>
+    // Is there a BankSwizzle set that meet Read Port limitations ?<br>
+    if (!TII->fitsReadPortLimitations(CurrentPacketMIs,<br>
+            PV, BS, isTransSlot)) {<br>
+      DEBUG(<br>
         dbgs() << "Couldn't pack :\n";<br>
         MI->dump();<br>
         dbgs() << "with the following packets :\n";<br>
@@ -233,25 +237,43 @@ public:<br>
           dbgs() << "\n";<br>
         }<br>
         dbgs() << "because of Read port limitations\n";<br>
-      });<br>
-    bool isBundlable = FitsConstLimits && FitsReadPortLimits;<br>
-    if (isBundlable) {<br>
+      );<br>
+      CurrentPacketMIs.pop_back();<br>
+      return false;<br>
+    }<br>
+<br>
+    CurrentPacketMIs.pop_back();<br>
+    return true;<br>
+  }<br>
+<br>
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {<br>
+    MachineBasicBlock::iterator FirstInBundle =<br>
+        CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();<br>
+    const DenseMap<unsigned, unsigned> &PV =<br>
+        getPreviousVector(FirstInBundle);<br>
+    std::vector<R600InstrInfo::BankSwizzle> BS;<br>
+    bool isTransSlot;<br>
+<br>
+    if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {<br>
       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {<br>
         MachineInstr *MI = CurrentPacketMIs[i];<br>
-            unsigned Op = TII->getOperandIdx(MI->getOpcode(),<br>
-                AMDGPU::OpName::bank_swizzle);<br>
-            MI->getOperand(Op).setImm(BS[i]);<br>
+        unsigned Op = TII->getOperandIdx(MI->getOpcode(),<br>
+            AMDGPU::OpName::bank_swizzle);<br>
+        MI->getOperand(Op).setImm(BS[i]);<br>
+      }<br>
+      unsigned Op = TII->getOperandIdx(MI->getOpcode(),<br>
+          AMDGPU::OpName::bank_swizzle);<br>
+      MI->getOperand(Op).setImm(BS.back());<br>
+      if (!CurrentPacketMIs.empty())<br>
+        setIsLastBit(CurrentPacketMIs.back(), 0);<br>
+      substitutePV(MI, PV);<br>
+      MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);<br>
+      if (isTransSlot) {<br>
+        endPacket(llvm::next(It)->getParent(), llvm::next(It));<br>
       }<br>
+      return It;<br>
     }<br>
-    CurrentPacketMIs.pop_back();<br>
-    if (!isBundlable) {<br>
-      endPacket(MI->getParent(), MI);<br>
-      substitutePV(MI, getPreviousVector(MI));<br>
-      return VLIWPacketizerList::addToPacket(MI);<br>
-    }<br>
-    if (!CurrentPacketMIs.empty())<br>
-      setIsLastBit(CurrentPacketMIs.back(), 0);<br>
-    substitutePV(MI, PV);<br>
+    endPacket(MI->getParent(), MI);<br>
     return VLIWPacketizerList::addToPacket(MI);<br>
   }<br>
 };<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600RegisterInfo.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600RegisterInfo.td?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600RegisterInfo.td?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600RegisterInfo.td (original)<br>
+++ llvm/trunk/lib/Target/R600/R600RegisterInfo.td Sat Jun 29 14:32:43 2013<br>
@@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254,<br>
 def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;<br>
 def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;<br>
 def PV_W : R600RegWithChan<"PV.W", 254, "W">;<br>
+def PS: R600Reg<"PS", 255>;<br>
 def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;<br>
 def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;<br>
 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/fdiv.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fdiv.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fdiv.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/fdiv.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/fdiv.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,13 +1,13 @@<br>
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
<br>
 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {<br>
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/fp_to_sint.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_sint.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_sint.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/fp_to_sint.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/fp_to_sint.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,10 +1,10 @@<br>
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
 ; CHECK: @fp_to_sint_v4i32<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {<br>
   %value = load <4 x float> addrspace(1) * %in<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/fp_to_uint.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_uint.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_uint.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/fp_to_uint.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/fp_to_uint.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,10 +1,10 @@<br>
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
 ; CHECK: @fp_to_uint_v4i32<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
 define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {<br>
   %value = load <4 x float> addrspace(1) * %in<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/llvm.cos.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.cos.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.cos.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/llvm.cos.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/llvm.cos.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,6 +1,6 @@<br>
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
-;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
 define void @test() {<br>
    %r0 = call float @llvm.R600.load.input(i32 0)<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/llvm.pow.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.pow.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.pow.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/llvm.pow.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/llvm.pow.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,8 +1,8 @@<br>
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
 ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
+;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
 define void @test() {<br>
    %r0 = call float @llvm.R600.load.input(i32 0)<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/llvm.sin.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sin.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sin.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/llvm.sin.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/llvm.sin.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,6 +1,6 @@<br>
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
-;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
 define void @test() {<br>
    %r0 = call float @llvm.R600.load.input(i32 0)<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div>Alexey Samsonov, MSK</div>
</div>