R600: Initial support for vliw5 scheduling

Tom Stellard tom at stellard.net
Thu Jun 27 14:59:09 PDT 2013


On Thu, Jun 27, 2013 at 01:56:58PM -0700, Vincent Lejeune wrote:
> Hi,
> 
> These 2 patches allows trans only instructions to be grouped with vector instructions to form 5 instructions bundle on vliw5 processors.
> I had to remove the isTransOnly attribute of FLT_TO_INT_eg instructions because it looks like the docs are wrong saying it's a trans only instruction :
> Single FLT_TO_INT_eg instructions does not write to PS register (trans only instructions always write to PS register) and there is no regression introduced when
> making it non trans only. (SB backend seems to mark it not trans only, too )
>

Patch 1 is:

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

See my inline comments for patch #2.

-Tom

> I have another set of patches to generalize vliw 5 support to all non vector-only instructions but it's still WIP at the moment.
> 
> 
> Vincent

> From 619dd4ef5223050e81a5f80eae2582f588efe3c7 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 26 Jun 2013 18:09:58 +0200
> Subject: [PATCH 2/2] R600: Support schedule and packetization of trans-only
>  inst
> 
> ---
>  lib/Target/R600/R600InstrInfo.cpp        | 160 +++++++++++++++++++++++++------
>  lib/Target/R600/R600InstrInfo.h          |   6 +-
>  lib/Target/R600/R600Instructions.td      |   2 +
>  lib/Target/R600/R600MachineScheduler.cpp |  23 +++--
>  lib/Target/R600/R600MachineScheduler.h   |   1 +
>  lib/Target/R600/R600Packetizer.cpp       |  88 ++++++++++-------
>  lib/Target/R600/R600RegisterInfo.td      |   1 +
>  test/CodeGen/R600/fdiv.ll                |   8 +-
>  test/CodeGen/R600/llvm.cos.ll            |   2 +-
>  test/CodeGen/R600/llvm.pow.ll            |   4 +-
>  test/CodeGen/R600/llvm.sin.ll            |   2 +-
>  11 files changed, 218 insertions(+), 79 deletions(-)
> 
> diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
> index 8f65cc2..f972bcf 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -225,24 +225,27 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
>  
>  std::vector<std::pair<int, unsigned> >
>  R600InstrInfo::ExtractSrcs(MachineInstr *MI,
> -                           const DenseMap<unsigned, unsigned> &PV)
> -    const {
> +                           const DenseMap<unsigned, unsigned> &PV,
> +                           unsigned &ConstCount) const {
> +  ConstCount = 0;
>    const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
>    const std::pair<int, unsigned> DummyPair(-1, 0);
>    std::vector<std::pair<int, unsigned> > Result;
>    unsigned i = 0;
>    for (unsigned n = Srcs.size(); i < n; ++i) {
>      unsigned Reg = Srcs[i].first->getReg();
> -    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
> -    unsigned Chan = RI.getHWRegChan(Reg);
> -    if (Index > 127) {
> -      Result.push_back(DummyPair);
> +    if (PV.find(Reg) != PV.end()) {
> +      // 255 is used to tells its a PS/PV reg
> +      Result.push_back(std::pair<int, unsigned>(255, 0));
>        continue;
>      }
> -    if (PV.find(Reg) != PV.end()) {
> +    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
> +    if (Index > 127) {
> +      ConstCount++;
>        Result.push_back(DummyPair);
>        continue;
>      }
> +    unsigned Chan = RI.getHWRegChan(Reg);
>      Result.push_back(std::pair<int, unsigned>(Index, Chan));
>    }
>    for (; i < 3; ++i)
> @@ -277,66 +280,161 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src,
>    return Src;
>  }
>  
> -static bool
> -isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
> +static unsigned
> +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
> +  switch (Swz) {
> +  case R600InstrInfo::ALU_VEC_012_SCL_210: {
> +    unsigned Cycles[3] = { 2, 1, 0};
> +    return Cycles[Op];
> +  }
> +  case R600InstrInfo::ALU_VEC_021_SCL_122: {
> +    unsigned Cycles[3] = { 1, 2, 2};
> +    return Cycles[Op];
> +  }
> +  case R600InstrInfo::ALU_VEC_120_SCL_212: {
> +    unsigned Cycles[3] = { 2, 1, 2};
> +    return Cycles[Op];
> +  }
> +  case R600InstrInfo::ALU_VEC_102_SCL_221: {
> +    unsigned Cycles[3] = { 2, 2, 1};
> +    return Cycles[Op];
> +  }
> +  default:
> +    llvm_unreachable("Wrong Swizzle for Trans Slot");
> +    return 0;
> +  }
> +}
> +
> +static unsigned
> +isLegalUpTo(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>      const std::vector<R600InstrInfo::BankSwizzle> &Swz,
> -    unsigned CheckedSize) {
> +    const std::vector<std::pair<int, unsigned> > &TransSrcs,
> +    R600InstrInfo::BankSwizzle TransSwz) {
>    int Vector[4][3];
>    memset(Vector, -1, sizeof(Vector));
> -  for (unsigned i = 0; i < CheckedSize; i++) {
> +  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
>      const std::vector<std::pair<int, unsigned> > &Srcs =
>          Swizzle(IGSrcs[i], Swz[i]);
>      for (unsigned j = 0; j < 3; j++) {
>        const std::pair<int, unsigned> &Src = Srcs[j];
> -      if (Src.first < 0)
> +      if (Src.first < 0 || Src.first == 255)
>          continue;
>        if (Vector[Src.second][j] < 0)
>          Vector[Src.second][j] = Src.first;
>        if (Vector[Src.second][j] != Src.first)
> -        return false;
> +        return i;
>      }
>    }
> +  // Now check Trans Alu
> +  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
> +    const std::pair<int, unsigned> &Src = TransSrcs[i];
> +    unsigned Cycle = getTransSwizzle(TransSwz, i);
> +    if (Src.first < 0)
> +      continue;
> +    if (Src.first == 255)
> +      continue;
> +    if (Vector[Src.second][Cycle] < 0)
> +      Vector[Src.second][Cycle] = Src.first;
> +    if (Vector[Src.second][Cycle] != Src.first)
> +      return IGSrcs.size() - 1;
> +  }
> +  return IGSrcs.size();
> +}
> +
> +static bool
> +NextPossibleSolution(
> +    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
> +    unsigned From) {
> +  assert(From < SwzCandidate.size());
> +  int ResetFrom = From;
> +  while (ResetFrom > -1 && SwzCandidate[ResetFrom] == R600InstrInfo::ALU_VEC_210)
> +    ResetFrom --;
> +  for (unsigned i = ResetFrom + 1, e = SwzCandidate.size(); i < e; i++) {
> +    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
> +  }
> +  if (ResetFrom == -1)
> +    return false;
> +  SwzCandidate[ResetFrom]++;
>    return true;
>  }
>  
>  static bool recursiveFitsFPLimitation(
>  const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
>  std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
> -unsigned Depth = 0) {
> -  if (!isLegal(IGSrcs, SwzCandidate, Depth))
> -    return false;
> -  if (IGSrcs.size() == Depth)
> -    return true;
> -  unsigned i = SwzCandidate[Depth];
> -  for (; i < 6; i++) {
> -    SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
> -    if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
> +const std::vector<std::pair<int, unsigned> > &TransSrcs,
> +R600InstrInfo::BankSwizzle TransSwz) {
> +  unsigned ValidUpTo = 0;
> +  do {
> +    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
> +    if (ValidUpTo == IGSrcs.size())
>        return true;
> -  }
> -  SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
> +  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
>    return false;
>  }
>  
> +static bool
> +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
> +                  const std::vector<std::pair<int, unsigned> > &TransOps,
> +                  unsigned ConstCount) {
> +  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
> +    const std::pair<int, unsigned> &Src = TransOps[i];
> +    unsigned Cycle = getTransSwizzle(TransSwz, i);
> +    if (Src.first < 0)
> +      continue;
> +    if (ConstCount > 0 && Cycle == 0)
> +      return false;
> +    if (ConstCount > 1 && Cycle == 1)
> +      return false;
> +  }
> +  return true;
> +}
> +
>  bool
>  R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
> -                                      const DenseMap<unsigned, unsigned> &PV,
> -                                      std::vector<BankSwizzle> &ValidSwizzle)
> +                                       const DenseMap<unsigned, unsigned> &PV,
> +                                       std::vector<BankSwizzle> &ValidSwizzle,
> +                                       bool isLastAluTrans)
>      const {
>    //Todo : support shared src0 - src1 operand
>  
>    std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
>    ValidSwizzle.clear();
> +  unsigned ConstCount;
> +  BankSwizzle TransBS;
>    for (unsigned i = 0, e = IG.size(); i < e; ++i) {
> -    IGSrcs.push_back(ExtractSrcs(IG[i], PV));
> +    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
>      unsigned Op = getOperandIdx(IG[i]->getOpcode(),
>          R600Operands::BANK_SWIZZLE);
>      ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
>          IG[i]->getOperand(Op).getImm());
>    }
> -  bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
> -  if (!Result)
> -    return false;
> -  return true;
> +  std::vector<std::pair<int, unsigned> > TransOps;
> +  if (!isLastAluTrans)
> +    return recursiveFitsFPLimitation(IGSrcs, ValidSwizzle, TransOps, TransBS);
> +
> +  TransOps = IGSrcs.back();
> +  IGSrcs.pop_back();
> +  ValidSwizzle.pop_back();
> +
> +  static const R600InstrInfo::BankSwizzle TransSwz[] = {
> +    ALU_VEC_012_SCL_210,
> +    ALU_VEC_021_SCL_122,
> +    ALU_VEC_120_SCL_212,
> +    ALU_VEC_102_SCL_221
> +  };
> +  for (unsigned i = 0; i < 4; i++) {
> +    TransBS = TransSwz[i];
> +    if (!isConstCompatible(TransBS, TransOps, ConstCount))
> +      continue;
> +    bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle, TransOps,
> +        TransBS);
> +    if (Result) {
> +      ValidSwizzle.push_back(TransBS);
> +      return true;
> +    }
> +  }
> +
> +  return false;
>  }
>  
>  
> diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
> index 79c7cdc..28fcbfd 100644
> --- a/lib/Target/R600/R600InstrInfo.h
> +++ b/lib/Target/R600/R600InstrInfo.h
> @@ -85,10 +85,14 @@ namespace llvm {
>    /// starting from the one already provided in the Instruction Group MIs that
>    /// fits Read Port limitations in BS if available. Otherwise returns false
>    /// and undefined content in BS.
> +  /// isLastAluTrans should be set if the last Alu of MIs will be executed on
> +  /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
> +  /// apply to the last instruction.
>    /// PV holds GPR to PV registers in the Instruction Group MIs.
>    bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
>                                 const DenseMap<unsigned, unsigned> &PV,
> -                               std::vector<BankSwizzle> &BS) const;
> +                               std::vector<BankSwizzle> &BS,
> +                               bool isLastAluTrans) const;
>    bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
>    bool canBundle(const std::vector<MachineInstr *> &) const;
>  
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 83d735f..f324146 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -1478,12 +1478,14 @@ let hasSideEffects = 1 in {
>  
>    def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
>      let Pattern = [];
> +    let TransOnly = 0;
>    }

I've confirmed that this is correct. FLT_TO_INT is trans only for
r6xx/r7xx and has no restrictions on Evergreen.  However, it looks like
you forgot to change the itinerary, so the packetizer still thinks it's
trans only.

>  
>    def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
>  
>    def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
>      let Pattern = [];
> +    let TransOnly = 0;
>    }
> 

FLT_TO_UINT is trans only for all GPU families.  My guess is you didn't
see any regressions because the itinerary is still TransALU.
 
>    def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
> diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
> index a330d88..050a12f 100644
> --- a/lib/Target/R600/R600MachineScheduler.cpp
> +++ b/lib/Target/R600/R600MachineScheduler.cpp
> @@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
>    MRI = &DAG->MRI;
>    CurInstKind = IDOther;
>    CurEmitted = 0;
> -  OccupedSlotsMask = 15;
> +  OccupedSlotsMask = 31;
>    InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
>    InstKindLimit[IDOther] = 32;
>  
> @@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
>    if (NextInstKind != CurInstKind) {
>      DEBUG(dbgs() << "Instruction Type Switch\n");
>      if (NextInstKind != IDAlu)
> -      OccupedSlotsMask = 15;
> +      OccupedSlotsMask |= 31;
>      CurEmitted = 0;
>      CurInstKind = NextInstKind;
>    }
> @@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
>  R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
>    MachineInstr *MI = SU->getInstr();
>  
> +  if (TII->isTransOnly(MI))
> +    return AluTrans;
> +
>      switch (MI->getOpcode()) {
>      case AMDGPU::PRED_X:
>        return AluPredX;
> @@ -409,7 +412,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
>    return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
>        AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
>        AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
> -      AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();
> +      AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
> +      AvailableAlus[AluPredX].size();
>  }
>  
>  SUnit* R600SchedStrategy::pickAlu() {
> @@ -417,20 +421,27 @@ SUnit* R600SchedStrategy::pickAlu() {
>      if (!OccupedSlotsMask) {
>        // Bottom up scheduling : predX must comes first
>        if (!AvailableAlus[AluPredX].empty()) {
> -        OccupedSlotsMask = 15;
> +        OccupedSlotsMask |= 31;
>          return PopInst(AvailableAlus[AluPredX]);
>        }
>        // Flush physical reg copies (RA will discard them)
>        if (!AvailableAlus[AluDiscarded].empty()) {
> -        OccupedSlotsMask = 15;
> +        OccupedSlotsMask |= 31;
>          return PopInst(AvailableAlus[AluDiscarded]);
>        }
>        // If there is a T_XYZW alu available, use it
>        if (!AvailableAlus[AluT_XYZW].empty()) {
> -        OccupedSlotsMask = 15;
> +        OccupedSlotsMask |= 15;
>          return PopInst(AvailableAlus[AluT_XYZW]);
>        }
>      }
> +    bool TransSlotOccuped = OccupedSlotsMask & 16;
> +    if (!TransSlotOccuped) {
> +      if (!AvailableAlus[AluTrans].empty()) {
> +        OccupedSlotsMask |= 16;
> +        return PopInst(AvailableAlus[AluTrans]);
> +      }
> +    }
>      for (int Chan = 3; Chan > -1; --Chan) {
>        bool isOccupied = OccupedSlotsMask & (1 << Chan);
>        if (!isOccupied) {
> diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
> index aae8b3f..f8965d8 100644
> --- a/lib/Target/R600/R600MachineScheduler.h
> +++ b/lib/Target/R600/R600MachineScheduler.h
> @@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy {
>      AluT_W,
>      AluT_XYZW,
>      AluPredX,
> +    AluTrans,
>      AluDiscarded, // LLVM Instructions that are going to be eliminated
>      AluLast
>    };
> diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
> index da614c7..7d6eef1 100644
> --- a/lib/Target/R600/R600Packetizer.cpp
> +++ b/lib/Target/R600/R600Packetizer.cpp
> @@ -77,12 +77,14 @@ private:
>      do {
>        if (TII->isPredicated(BI))
>          continue;
> -      if (TII->isTransOnly(BI))
> -        continue;
>        int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE);
>        if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
>          continue;
>        unsigned Dst = BI->getOperand(0).getReg();
> +      if (TII->isTransOnly(BI)) {
> +        Result[Dst] = AMDGPU::PS;
> +        continue;
> +      }
>        if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
>            BI->getOpcode() == AMDGPU::DOT4_eg) {
>          Result[Dst] = AMDGPU::PV_X;
> @@ -150,10 +152,6 @@ public:
>        return true;
>      if (!TII->isALUInstr(MI->getOpcode()))
>        return true;
> -    if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
> -      return true;
> -    if (TII->isTransOnly(MI))
> -      return true;
>      return false;
>    }
>  
> @@ -195,11 +193,16 @@ public:
>      MI->getOperand(LastOp).setImm(Bit);
>    }
>  
> -  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
> +  bool isBundlableWithCurrentPMI(MachineInstr *MI,
> +                                 const DenseMap<unsigned, unsigned> &PV,
> +                                 std::vector<R600InstrInfo::BankSwizzle> &BS,
> +                                 bool &isTransSlot) {
> +    isTransSlot = TII->isTransOnly(MI);
> +
> +    // Are the Constants limitations met ?
>      CurrentPacketMIs.push_back(MI);
> -    bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
> -    DEBUG(
> -      if (!FitsConstLimits) {
> +    if (!TII->canBundle(CurrentPacketMIs)) {
> +      DEBUG(
>          dbgs() << "Couldn't pack :\n";
>          MI->dump();
>          dbgs() << "with the following packets :\n";
> @@ -208,14 +211,15 @@ public:
>            dbgs() << "\n";
>          }
>          dbgs() << "because of Consts read limitations\n";
> -      });
> -    const DenseMap<unsigned, unsigned> &PV =
> -        getPreviousVector(CurrentPacketMIs.front());
> -    std::vector<R600InstrInfo::BankSwizzle> BS;
> -    bool FitsReadPortLimits =
> -        TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
> -    DEBUG(
> -      if (!FitsReadPortLimits) {
> +      );
> +      CurrentPacketMIs.pop_back();
> +      return false;
> +    }
> +
> +    // Is there a BankSwizzle set that meet Read Port limitations ?
> +    if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
> +            PV, BS, isTransSlot)) {
> +      DEBUG(
>          dbgs() << "Couldn't pack :\n";
>          MI->dump();
>          dbgs() << "with the following packets :\n";
> @@ -224,25 +228,43 @@ public:
>            dbgs() << "\n";
>          }
>          dbgs() << "because of Read port limitations\n";
> -      });
> -    bool isBundlable = FitsConstLimits && FitsReadPortLimits;
> -    if (isBundlable) {
> +      );
> +      CurrentPacketMIs.pop_back();
> +      return false;
> +    }
> +
> +    CurrentPacketMIs.pop_back();
> +    return true;
> +  }
> +
> +  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
> +    MachineBasicBlock::iterator FirstInBundle =
> +        CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
> +    const DenseMap<unsigned, unsigned> &PV =
> +        getPreviousVector(FirstInBundle);
> +    std::vector<R600InstrInfo::BankSwizzle> BS;
> +    bool isTransSlot;
> +
> +    if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
>        for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
>          MachineInstr *MI = CurrentPacketMIs[i];
> -            unsigned Op = TII->getOperandIdx(MI->getOpcode(),
> -                R600Operands::BANK_SWIZZLE);
> -            MI->getOperand(Op).setImm(BS[i]);
> +        unsigned Op = TII->getOperandIdx(MI->getOpcode(),
> +            R600Operands::BANK_SWIZZLE);
> +        MI->getOperand(Op).setImm(BS[i]);
>        }
> +      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
> +          R600Operands::BANK_SWIZZLE);
> +      MI->getOperand(Op).setImm(BS.back());
> +      if (!CurrentPacketMIs.empty())
> +        setIsLastBit(CurrentPacketMIs.back(), 0);
> +      substitutePV(MI, PV);
> +      MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
> +      if (isTransSlot) {
> +        endPacket(llvm::next(It)->getParent(), llvm::next(It));
> +      }
> +      return It;
>      }
> -    CurrentPacketMIs.pop_back();
> -    if (!isBundlable) {
> -      endPacket(MI->getParent(), MI);
> -      substitutePV(MI, getPreviousVector(MI));
> -      return VLIWPacketizerList::addToPacket(MI);
> -    }
> -    if (!CurrentPacketMIs.empty())
> -      setIsLastBit(CurrentPacketMIs.back(), 0);
> -    substitutePV(MI, PV);
> +    endPacket(MI->getParent(), MI);
>      return VLIWPacketizerList::addToPacket(MI);
>    }
>  };
> diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
> index a8b9b70..323bf9f 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254, "X">;
>  def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
>  def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
>  def PV_W : R600RegWithChan<"PV.W", 254, "W">;
> +def PS: R600Reg<"PS", 255>;
>  def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
>  def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
>  def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
> index 003590b..21ed486 100644
> --- a/test/CodeGen/R600/fdiv.ll
> +++ b/test/CodeGen/R600/fdiv.ll
> @@ -1,13 +1,13 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>  
>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>  ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>  
>  define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
> diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
> index 9b28167..b444fa7 100644
> --- a/test/CodeGen/R600/llvm.cos.ll
> +++ b/test/CodeGen/R600/llvm.cos.ll
> @@ -1,6 +1,6 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>  
> -;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>  
>  define void @test() {
>     %r0 = call float @llvm.R600.load.input(i32 0)
> diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
> index 1422083..0f51cf4 100644
> --- a/test/CodeGen/R600/llvm.pow.ll
> +++ b/test/CodeGen/R600/llvm.pow.ll
> @@ -1,8 +1,8 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>  
>  ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
> +;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>  
>  define void @test() {
>     %r0 = call float @llvm.R600.load.input(i32 0)
> diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
> index 803dc2d..09cc3d2 100644
> --- a/test/CodeGen/R600/llvm.sin.ll
> +++ b/test/CodeGen/R600/llvm.sin.ll
> @@ -1,6 +1,6 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>  
> -;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>  
>  define void @test() {
>     %r0 = call float @llvm.R600.load.input(i32 0)
> -- 
> 1.8.3.1
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list