[llvm] r185268 - R600: Support schedule and packetization of trans-only inst

Benjamin Kramer benny.kra at gmail.com
Sat Jun 29 13:06:17 PDT 2013


On 29.06.2013, at 22:00, Kai Nacke <kai.nacke at redstar.de> wrote:

> Hi Vincent!
> 
> This commit or the previous one breaks the build with gcc-4.4.7:
> 
> /home/u0012540/llvm/lib/Target/R600/R600InstrInfo.cpp: In function âbool NextPossibleSolution(std::vector<llvm::R600InstrInfo::BankSwizzle, std::allocator<llvm::R600InstrInfo::BankSwizzle> >&, unsigned int)â:
> /home/u0012540/llvm/lib/Target/R600/R600InstrInfo.cpp:401: error: no âoperator++(int)â declared for postfix â++â, trying prefix operator instead
> /home/u0012540/llvm/lib/Target/R600/R600InstrInfo.cpp:401: error: no match for âoperator++â in â++((std::vector<llvm::R600InstrInfo::BankSwizzle, std::allocator<llvm::R600InstrInfo::BankSwizzle> >*)SwzCandidate)->std::vector<_Tp, _Alloc>::operator[] [with _Tp = llvm::R600InstrInfo::BankSwizzle, _Alloc = std::allocator<llvm::R600InstrInfo::BankSwizzle>](((long unsigned int)ResetIdx))â
> make[2]: *** [lib/Target/R600/CMakeFiles/LLVMR600CodeGen.dir/R600InstrInfo.cpp.o] Error 1
> make[1]: *** [lib/Target/R600/CMakeFiles/LLVMR600CodeGen.dir/all] Error 2

I fixed it in r185269, so the bots stop harassing me.

- Ben

> 
> Regards
> Kai
> 
> 
> On 29.06.2013 21:32, Vincent Lejeune wrote:
>> Author: vljn
>> Date: Sat Jun 29 14:32:43 2013
>> New Revision: 185268
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=185268&view=rev
>> Log:
>> R600: Support schedule and packetization of trans-only inst
>> 
>> Modified:
>>     llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
>>     llvm/trunk/lib/Target/R600/R600InstrInfo.h
>>     llvm/trunk/lib/Target/R600/R600Instructions.td
>>     llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp
>>     llvm/trunk/lib/Target/R600/R600MachineScheduler.h
>>     llvm/trunk/lib/Target/R600/R600Packetizer.cpp
>>     llvm/trunk/lib/Target/R600/R600RegisterInfo.td
>>     llvm/trunk/test/CodeGen/R600/fdiv.ll
>>     llvm/trunk/test/CodeGen/R600/fp_to_sint.ll
>>     llvm/trunk/test/CodeGen/R600/fp_to_uint.ll
>>     llvm/trunk/test/CodeGen/R600/llvm.cos.ll
>>     llvm/trunk/test/CodeGen/R600/llvm.pow.ll
>>     llvm/trunk/test/CodeGen/R600/llvm.sin.ll
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Sat Jun 29 14:32:43 2013
>> @@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI)
>> 
>>  std::vector<std::pair<int, unsigned> >
>>  R600InstrInfo::ExtractSrcs(MachineInstr *MI,
>> -                           const DenseMap<unsigned, unsigned> &PV)
>> -    const {
>> +                           const DenseMap<unsigned, unsigned> &PV,
>> +                           unsigned &ConstCount) const {
>> +  ConstCount = 0;
>>    const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
>>    const std::pair<int, unsigned> DummyPair(-1, 0);
>>    std::vector<std::pair<int, unsigned> > Result;
>> @@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr
>>    for (unsigned n = Srcs.size(); i < n; ++i) {
>>      unsigned Reg = Srcs[i].first->getReg();
>>      unsigned Index = RI.getEncodingValue(Reg) & 0xff;
>> -    unsigned Chan = RI.getHWRegChan(Reg);
>>      if (Reg == AMDGPU::OQAP) {
>>        Result.push_back(std::pair<int, unsigned>(Index, 0));
>>      }
>> -    if (Index > 127) {
>> -      Result.push_back(DummyPair);
>> +    if (PV.find(Reg) != PV.end()) {
>> +      // 255 is used to tells its a PS/PV reg
>> +      Result.push_back(std::pair<int, unsigned>(255, 0));
>>        continue;
>>      }
>> -    if (PV.find(Reg) != PV.end()) {
>> +    if (Index > 127) {
>> +      ConstCount++;
>>        Result.push_back(DummyPair);
>>        continue;
>>      }
>> +    unsigned Chan = RI.getHWRegChan(Reg);
>>      Result.push_back(std::pair<int, unsigned>(Index, Chan));
>>    }
>>    for (; i < 3; ++i)
>> @@ -305,23 +308,51 @@ Swizzle(std::vector<std::pair<int, unsig
>>    return Src;
>>  }
>> 
>> -bool
>> -R600InstrInfo::isLegal(
>> -             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> -             const std::vector<R600InstrInfo::BankSwizzle> &Swz,
>> -             unsigned CheckedSize) const {
>> +static unsigned
>> +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
>> +  switch (Swz) {
>> +  case R600InstrInfo::ALU_VEC_012_SCL_210: {
>> +    unsigned Cycles[3] = { 2, 1, 0};
>> +    return Cycles[Op];
>> +  }
>> +  case R600InstrInfo::ALU_VEC_021_SCL_122: {
>> +    unsigned Cycles[3] = { 1, 2, 2};
>> +    return Cycles[Op];
>> +  }
>> +  case R600InstrInfo::ALU_VEC_120_SCL_212: {
>> +    unsigned Cycles[3] = { 2, 1, 2};
>> +    return Cycles[Op];
>> +  }
>> +  case R600InstrInfo::ALU_VEC_102_SCL_221: {
>> +    unsigned Cycles[3] = { 2, 2, 1};
>> +    return Cycles[Op];
>> +  }
>> +  default:
>> +    llvm_unreachable("Wrong Swizzle for Trans Slot");
>> +    return 0;
>> +  }
>> +}
>> +
>> +/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
>> +/// in the same Instruction Group while meeting read port limitations given a
>> +/// Swz swizzle sequence.
>> +unsigned  R600InstrInfo::isLegalUpTo(
>> +    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> +    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
>> +    const std::vector<std::pair<int, unsigned> > &TransSrcs,
>> +    R600InstrInfo::BankSwizzle TransSwz) const {
>>    int Vector[4][3];
>>    memset(Vector, -1, sizeof(Vector));
>> -  for (unsigned i = 0; i < CheckedSize; i++) {
>> +  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
>>      const std::vector<std::pair<int, unsigned> > &Srcs =
>>          Swizzle(IGSrcs[i], Swz[i]);
>>      for (unsigned j = 0; j < 3; j++) {
>>        const std::pair<int, unsigned> &Src = Srcs[j];
>> -      if (Src.first < 0)
>> +      if (Src.first < 0 || Src.first == 255)
>>          continue;
>>        if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
>> -        if (Swz[i] != R600InstrInfo::ALU_VEC_012 &&
>> -            Swz[i] != R600InstrInfo::ALU_VEC_021) {
>> +        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
>> +            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
>>              // The value from output queue A (denoted by register OQAP) can
>>              // only be fetched during the first cycle.
>>              return false;
>> @@ -332,51 +363,126 @@ R600InstrInfo::isLegal(
>>        if (Vector[Src.second][j] < 0)
>>          Vector[Src.second][j] = Src.first;
>>        if (Vector[Src.second][j] != Src.first)
>> -        return false;
>> +        return i;
>>      }
>>    }
>> -  return true;
>> +  // Now check Trans Alu
>> +  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
>> +    const std::pair<int, unsigned> &Src = TransSrcs[i];
>> +    unsigned Cycle = getTransSwizzle(TransSwz, i);
>> +    if (Src.first < 0)
>> +      continue;
>> +    if (Src.first == 255)
>> +      continue;
>> +    if (Vector[Src.second][Cycle] < 0)
>> +      Vector[Src.second][Cycle] = Src.first;
>> +    if (Vector[Src.second][Cycle] != Src.first)
>> +      return IGSrcs.size() - 1;
>> +  }
>> +  return IGSrcs.size();
>>  }
>> 
>> -bool
>> -R600InstrInfo::recursiveFitsFPLimitation(
>> -             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> -             std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> -             unsigned Depth) const {
>> -  if (!isLegal(IGSrcs, SwzCandidate, Depth))
>> +/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
>> +/// (in lexicographic term) swizzle sequence assuming that all swizzles after
>> +/// Idx can be skipped
>> +static bool
>> +NextPossibleSolution(
>> +    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> +    unsigned Idx) {
>> +  assert(Idx < SwzCandidate.size());
>> +  int ResetIdx = Idx;
>> +  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
>> +    ResetIdx --;
>> +  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
>> +    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
>> +  }
>> +  if (ResetIdx == -1)
>>      return false;
>> -  if (IGSrcs.size() == Depth)
>> -    return true;
>> -  unsigned i = SwzCandidate[Depth];
>> -  for (; i < 6; i++) {
>> -    SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
>> -    if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
>> +  SwzCandidate[ResetIdx]++;
>> +  return true;
>> +}
>> +
>> +/// Enumerate all possible Swizzle sequence to find one that can meet all
>> +/// read port requirements.
>> +bool R600InstrInfo::FindSwizzleForVectorSlot(
>> +    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> +    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> +    const std::vector<std::pair<int, unsigned> > &TransSrcs,
>> +    R600InstrInfo::BankSwizzle TransSwz) const {
>> +  unsigned ValidUpTo = 0;
>> +  do {
>> +    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
>> +    if (ValidUpTo == IGSrcs.size())
>>        return true;
>> -  }
>> -  SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
>> +  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
>>    return false;
>>  }
>> 
>> +/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
>> +/// a const, and can't read a gpr at cycle 1 if they read 2 const.
>> +static bool
>> +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
>> +                  const std::vector<std::pair<int, unsigned> > &TransOps,
>> +                  unsigned ConstCount) {
>> +  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
>> +    const std::pair<int, unsigned> &Src = TransOps[i];
>> +    unsigned Cycle = getTransSwizzle(TransSwz, i);
>> +    if (Src.first < 0)
>> +      continue;
>> +    if (ConstCount > 0 && Cycle == 0)
>> +      return false;
>> +    if (ConstCount > 1 && Cycle == 1)
>> +      return false;
>> +  }
>> +  return true;
>> +}
>> +
>>  bool
>>  R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
>> -                                      const DenseMap<unsigned, unsigned> &PV,
>> -                                      std::vector<BankSwizzle> &ValidSwizzle)
>> +                                       const DenseMap<unsigned, unsigned> &PV,
>> +                                       std::vector<BankSwizzle> &ValidSwizzle,
>> +                                       bool isLastAluTrans)
>>      const {
>>    //Todo : support shared src0 - src1 operand
>> 
>>    std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
>>    ValidSwizzle.clear();
>> +  unsigned ConstCount;
>> +  BankSwizzle TransBS;
>>    for (unsigned i = 0, e = IG.size(); i < e; ++i) {
>> -    IGSrcs.push_back(ExtractSrcs(IG[i], PV));
>> +    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
>>      unsigned Op = getOperandIdx(IG[i]->getOpcode(),
>>          AMDGPU::OpName::bank_swizzle);
>>      ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
>>          IG[i]->getOperand(Op).getImm());
>>    }
>> -  bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
>> -  if (!Result)
>> -    return false;
>> -  return true;
>> +  std::vector<std::pair<int, unsigned> > TransOps;
>> +  if (!isLastAluTrans)
>> +    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
>> +
>> +  TransOps = IGSrcs.back();
>> +  IGSrcs.pop_back();
>> +  ValidSwizzle.pop_back();
>> +
>> +  static const R600InstrInfo::BankSwizzle TransSwz[] = {
>> +    ALU_VEC_012_SCL_210,
>> +    ALU_VEC_021_SCL_122,
>> +    ALU_VEC_120_SCL_212,
>> +    ALU_VEC_102_SCL_221
>> +  };
>> +  for (unsigned i = 0; i < 4; i++) {
>> +    TransBS = TransSwz[i];
>> +    if (!isConstCompatible(TransBS, TransOps, ConstCount))
>> +      continue;
>> +    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
>> +        TransBS);
>> +    if (Result) {
>> +      ValidSwizzle.push_back(TransBS);
>> +      return true;
>> +    }
>> +  }
>> +
>> +  return false;
>>  }
>> 
>> 
>> @@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(
>>  }
>> 
>>  bool
>> -R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
>> +R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
>> +    const {
>>    std::vector<unsigned> Consts;
>>    for (unsigned i = 0, n = MIs.size(); i < n; i++) {
>>      MachineInstr *MI = MIs[i];
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)
>> +++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Sat Jun 29 14:32:43 2013
>> @@ -84,26 +84,38 @@ namespace llvm {
>>    SmallVector<std::pair<MachineOperand *, int64_t>, 3>
>>        getSrcs(MachineInstr *MI) const;
>> 
>> -  bool isLegal(
>> -             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> -             const std::vector<R600InstrInfo::BankSwizzle> &Swz,
>> -             unsigned CheckedSize) const;
>> -  bool recursiveFitsFPLimitation(
>> -             const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> -             std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> -             unsigned Depth = 0) const;
>> +  unsigned  isLegalUpTo(
>> +    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> +    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
>> +    const std::vector<std::pair<int, unsigned> > &TransSrcs,
>> +    R600InstrInfo::BankSwizzle TransSwz) const;
>> +
>> +  bool FindSwizzleForVectorSlot(
>> +    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>> +    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> +    const std::vector<std::pair<int, unsigned> > &TransSrcs,
>> +    R600InstrInfo::BankSwizzle TransSwz) const;
>> 
>>    /// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210
>>    /// returns true and the first (in lexical order) BankSwizzle affectation
>>    /// starting from the one already provided in the Instruction Group MIs that
>>    /// fits Read Port limitations in BS if available. Otherwise returns false
>>    /// and undefined content in BS.
>> +  /// isLastAluTrans should be set if the last Alu of MIs will be executed on
>> +  /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
>> +  /// apply to the last instruction.
>>    /// PV holds GPR to PV registers in the Instruction Group MIs.
>>    bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
>>                                 const DenseMap<unsigned, unsigned> &PV,
>> -                               std::vector<BankSwizzle> &BS) const;
>> +                               std::vector<BankSwizzle> &BS,
>> +                               bool isLastAluTrans) const;
>> +
>> +  /// An instruction group can only access 2 channel pair (either [XY] or [ZW])
>> +  /// from KCache bank on R700+. This function check if MI set in input meet
>> +  /// this limitations
>> +  bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;
>> +  /// Same but using const index set instead of MI set.
>>    bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
>> -  bool canBundle(const std::vector<MachineInstr *> &) const;
>> 
>>    /// \breif Vector instructions are instructions that must fill all
>>    /// instruction slots within an instruction group.
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600Instructions.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600Instructions.td (original)
>> +++ llvm/trunk/lib/Target/R600/R600Instructions.td Sat Jun 29 14:32:43 2013
>> @@ -1489,6 +1489,8 @@ let hasSideEffects = 1 in {
>> 
>>    def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
>>      let Pattern = [];
>> +    let TransOnly = 0;
>> +    let Itinerary = AnyALU;
>>    }
>> 
>>    def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp (original)
>> +++ llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp Sat Jun 29 14:32:43 2013
>> @@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(Sched
>>    MRI = &DAG->MRI;
>>    CurInstKind = IDOther;
>>    CurEmitted = 0;
>> -  OccupedSlotsMask = 15;
>> +  OccupedSlotsMask = 31;
>>    InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
>>    InstKindLimit[IDOther] = 32;
>> 
>> @@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit
>>    if (NextInstKind != CurInstKind) {
>>      DEBUG(dbgs() << "Instruction Type Switch\n");
>>      if (NextInstKind != IDAlu)
>> -      OccupedSlotsMask = 15;
>> +      OccupedSlotsMask |= 31;
>>      CurEmitted = 0;
>>      CurInstKind = NextInstKind;
>>    }
>> @@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClas
>>  R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
>>    MachineInstr *MI = SU->getInstr();
>> 
>> +  if (TII->isTransOnly(MI))
>> +    return AluTrans;
>> +
>>      switch (MI->getOpcode()) {
>>      case AMDGPU::PRED_X:
>>        return AluPredX;
>> @@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::v
>>        It != E; ++It) {
>>      SUnit *SU = *It;
>>      InstructionsGroupCandidate.push_back(SU->getInstr());
>> -    if (TII->canBundle(InstructionsGroupCandidate)) {
>> +    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
>>        InstructionsGroupCandidate.pop_back();
>>        Q.erase((It + 1).base());
>>        return SU;
>> @@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAl
>>    return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
>>        AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
>>        AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
>> -      AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();
>> +      AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
>> +      AvailableAlus[AluPredX].size();
>>  }
>> 
>>  SUnit* R600SchedStrategy::pickAlu() {
>> @@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() {
>>      if (!OccupedSlotsMask) {
>>        // Bottom up scheduling : predX must comes first
>>        if (!AvailableAlus[AluPredX].empty()) {
>> -        OccupedSlotsMask = 15;
>> +        OccupedSlotsMask |= 31;
>>          return PopInst(AvailableAlus[AluPredX]);
>>        }
>>        // Flush physical reg copies (RA will discard them)
>>        if (!AvailableAlus[AluDiscarded].empty()) {
>> -        OccupedSlotsMask = 15;
>> +        OccupedSlotsMask |= 31;
>>          return PopInst(AvailableAlus[AluDiscarded]);
>>        }
>>        // If there is a T_XYZW alu available, use it
>>        if (!AvailableAlus[AluT_XYZW].empty()) {
>> -        OccupedSlotsMask = 15;
>> +        OccupedSlotsMask |= 15;
>>          return PopInst(AvailableAlus[AluT_XYZW]);
>>        }
>>      }
>> +    bool TransSlotOccuped = OccupedSlotsMask & 16;
>> +    if (!TransSlotOccuped) {
>> +      if (!AvailableAlus[AluTrans].empty()) {
>> +        OccupedSlotsMask |= 16;
>> +        return PopInst(AvailableAlus[AluTrans]);
>> +      }
>> +    }
>>      for (int Chan = 3; Chan > -1; --Chan) {
>>        bool isOccupied = OccupedSlotsMask & (1 << Chan);
>>        if (!isOccupied) {
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.h?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600MachineScheduler.h (original)
>> +++ llvm/trunk/lib/Target/R600/R600MachineScheduler.h Sat Jun 29 14:32:43 2013
>> @@ -46,6 +46,7 @@ class R600SchedStrategy : public Machine
>>      AluT_W,
>>      AluT_XYZW,
>>      AluPredX,
>> +    AluTrans,
>>      AluDiscarded, // LLVM Instructions that are going to be eliminated
>>      AluLast
>>    };
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600Packetizer.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Packetizer.cpp?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600Packetizer.cpp (original)
>> +++ llvm/trunk/lib/Target/R600/R600Packetizer.cpp Sat Jun 29 14:32:43 2013
>> @@ -77,8 +77,6 @@ private:
>>      do {
>>        if (TII->isPredicated(BI))
>>          continue;
>> -      if (TII->isTransOnly(BI))
>> -        continue;
>>        int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
>>        if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
>>          continue;
>> @@ -87,6 +85,10 @@ private:
>>          continue;
>>        }
>>        unsigned Dst = BI->getOperand(DstIdx).getReg();
>> +      if (TII->isTransOnly(BI)) {
>> +        Result[Dst] = AMDGPU::PS;
>> +        continue;
>> +      }
>>        if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
>>            BI->getOpcode() == AMDGPU::DOT4_eg) {
>>          Result[Dst] = AMDGPU::PV_X;
>> @@ -157,10 +159,6 @@ public:
>>        return true;
>>      if (!TII->isALUInstr(MI->getOpcode()))
>>        return true;
>> -    if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
>> -      return true;
>> -    if (TII->isTransOnly(MI))
>> -      return true;
>>      if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
>>        return true;
>>      return false;
>> @@ -170,7 +168,7 @@ public:
>>    // together.
>>    bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
>>      MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
>> -    if (getSlot(MII) <= getSlot(MIJ))
>> +    if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
>>        return false;
>>      // Does MII and MIJ share the same pred_sel ?
>>      int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
>> @@ -204,11 +202,16 @@ public:
>>      MI->getOperand(LastOp).setImm(Bit);
>>    }
>> 
>> -  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
>> +  bool isBundlableWithCurrentPMI(MachineInstr *MI,
>> +                                 const DenseMap<unsigned, unsigned> &PV,
>> +                                 std::vector<R600InstrInfo::BankSwizzle> &BS,
>> +                                 bool &isTransSlot) {
>> +    isTransSlot = TII->isTransOnly(MI);
>> +
>> +    // Are the Constants limitations met ?
>>      CurrentPacketMIs.push_back(MI);
>> -    bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
>> -    DEBUG(
>> -      if (!FitsConstLimits) {
>> +    if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
>> +      DEBUG(
>>          dbgs() << "Couldn't pack :\n";
>>          MI->dump();
>>          dbgs() << "with the following packets :\n";
>> @@ -217,14 +220,15 @@ public:
>>            dbgs() << "\n";
>>          }
>>          dbgs() << "because of Consts read limitations\n";
>> -      });
>> -    const DenseMap<unsigned, unsigned> &PV =
>> -        getPreviousVector(CurrentPacketMIs.front());
>> -    std::vector<R600InstrInfo::BankSwizzle> BS;
>> -    bool FitsReadPortLimits =
>> -        TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
>> -    DEBUG(
>> -      if (!FitsReadPortLimits) {
>> +      );
>> +      CurrentPacketMIs.pop_back();
>> +      return false;
>> +    }
>> +
>> +    // Is there a BankSwizzle set that meet Read Port limitations ?
>> +    if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
>> +            PV, BS, isTransSlot)) {
>> +      DEBUG(
>>          dbgs() << "Couldn't pack :\n";
>>          MI->dump();
>>          dbgs() << "with the following packets :\n";
>> @@ -233,25 +237,43 @@ public:
>>            dbgs() << "\n";
>>          }
>>          dbgs() << "because of Read port limitations\n";
>> -      });
>> -    bool isBundlable = FitsConstLimits && FitsReadPortLimits;
>> -    if (isBundlable) {
>> +      );
>> +      CurrentPacketMIs.pop_back();
>> +      return false;
>> +    }
>> +
>> +    CurrentPacketMIs.pop_back();
>> +    return true;
>> +  }
>> +
>> +  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
>> +    MachineBasicBlock::iterator FirstInBundle =
>> +        CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
>> +    const DenseMap<unsigned, unsigned> &PV =
>> +        getPreviousVector(FirstInBundle);
>> +    std::vector<R600InstrInfo::BankSwizzle> BS;
>> +    bool isTransSlot;
>> +
>> +    if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
>>        for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
>>          MachineInstr *MI = CurrentPacketMIs[i];
>> -            unsigned Op = TII->getOperandIdx(MI->getOpcode(),
>> -                AMDGPU::OpName::bank_swizzle);
>> -            MI->getOperand(Op).setImm(BS[i]);
>> +        unsigned Op = TII->getOperandIdx(MI->getOpcode(),
>> +            AMDGPU::OpName::bank_swizzle);
>> +        MI->getOperand(Op).setImm(BS[i]);
>> +      }
>> +      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
>> +          AMDGPU::OpName::bank_swizzle);
>> +      MI->getOperand(Op).setImm(BS.back());
>> +      if (!CurrentPacketMIs.empty())
>> +        setIsLastBit(CurrentPacketMIs.back(), 0);
>> +      substitutePV(MI, PV);
>> +      MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
>> +      if (isTransSlot) {
>> +        endPacket(llvm::next(It)->getParent(), llvm::next(It));
>>        }
>> +      return It;
>>      }
>> -    CurrentPacketMIs.pop_back();
>> -    if (!isBundlable) {
>> -      endPacket(MI->getParent(), MI);
>> -      substitutePV(MI, getPreviousVector(MI));
>> -      return VLIWPacketizerList::addToPacket(MI);
>> -    }
>> -    if (!CurrentPacketMIs.empty())
>> -      setIsLastBit(CurrentPacketMIs.back(), 0);
>> -    substitutePV(MI, PV);
>> +    endPacket(MI->getParent(), MI);
>>      return VLIWPacketizerList::addToPacket(MI);
>>    }
>>  };
>> 
>> Modified: llvm/trunk/lib/Target/R600/R600RegisterInfo.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600RegisterInfo.td?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/R600/R600RegisterInfo.td (original)
>> +++ llvm/trunk/lib/Target/R600/R600RegisterInfo.td Sat Jun 29 14:32:43 2013
>> @@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254,
>>  def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
>>  def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
>>  def PV_W : R600RegWithChan<"PV.W", 254, "W">;
>> +def PS: R600Reg<"PS", 255>;
>>  def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
>>  def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
>>  def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
>> 
>> Modified: llvm/trunk/test/CodeGen/R600/fdiv.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fdiv.ll?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/fdiv.ll (original)
>> +++ llvm/trunk/test/CodeGen/R600/fdiv.ll Sat Jun 29 14:32:43 2013
>> @@ -1,13 +1,13 @@
>>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> 
>>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>> 
>>  define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
>>    %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
>> 
>> Modified: llvm/trunk/test/CodeGen/R600/fp_to_sint.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_sint.ll?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/fp_to_sint.ll (original)
>> +++ llvm/trunk/test/CodeGen/R600/fp_to_sint.ll Sat Jun 29 14:32:43 2013
>> @@ -1,10 +1,10 @@
>>  ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> 
>>  ; CHECK: @fp_to_sint_v4i32
>> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> 
>>  define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
>>    %value = load <4 x float> addrspace(1) * %in
>> 
>> Modified: llvm/trunk/test/CodeGen/R600/fp_to_uint.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_uint.ll?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/fp_to_uint.ll (original)
>> +++ llvm/trunk/test/CodeGen/R600/fp_to_uint.ll Sat Jun 29 14:32:43 2013
>> @@ -1,10 +1,10 @@
>>  ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> 
>>  ; CHECK: @fp_to_uint_v4i32
>> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> 
>>  define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
>>    %value = load <4 x float> addrspace(1) * %in
>> 
>> Modified: llvm/trunk/test/CodeGen/R600/llvm.cos.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.cos.ll?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/llvm.cos.ll (original)
>> +++ llvm/trunk/test/CodeGen/R600/llvm.cos.ll Sat Jun 29 14:32:43 2013
>> @@ -1,6 +1,6 @@
>>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> 
>> -;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> 
>>  define void @test() {
>>     %r0 = call float @llvm.R600.load.input(i32 0)
>> 
>> Modified: llvm/trunk/test/CodeGen/R600/llvm.pow.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.pow.ll?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/llvm.pow.ll (original)
>> +++ llvm/trunk/test/CodeGen/R600/llvm.pow.ll Sat Jun 29 14:32:43 2013
>> @@ -1,8 +1,8 @@
>>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> 
>>  ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>> +;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> 
>>  define void @test() {
>>     %r0 = call float @llvm.R600.load.input(i32 0)
>> 
>> Modified: llvm/trunk/test/CodeGen/R600/llvm.sin.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sin.ll?rev=185268&r1=185267&r2=185268&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/R600/llvm.sin.ll (original)
>> +++ llvm/trunk/test/CodeGen/R600/llvm.sin.ll Sat Jun 29 14:32:43 2013
>> @@ -1,6 +1,6 @@
>>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> 
>> -;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> 
>>  define void @test() {
>>     %r0 = call float @llvm.R600.load.input(i32 0)
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list