R600: Initial support for vliw5 scheduling
Tom Stellard
tom at stellard.net
Fri Jun 28 15:37:29 PDT 2013
Hi Vincent,
I went ahead and rebased these patches on top of master, because I
wanted to verify that they worked with my local memory changes. You can
find the rebased patches here:
http://cgit.freedesktop.org/~tstellar/llvm/log/?h=vliw5-rebase
I've tested these with compute, but you may want to retest with graphics
to make sure I didn't make any mistakes.
It looks like you forgot to update the Itinerary for FLT_TO_INT_eg after
clearing the TransOnly bit, but with that change, the patches are:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
On Fri, Jun 28, 2013 at 02:25:14PM -0700, Vincent Lejeune wrote:
> From 7e714c3541374336aba58d4f0389781ff1fe7f18 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 26 Jun 2013 18:09:58 +0200
> Subject: [PATCH] R600: Support schedule and packetization of trans-only inst
>
> ---
> lib/Target/R600/R600InstrInfo.cpp | 179 +++++++++++++++++++++++++------
> lib/Target/R600/R600InstrInfo.h | 13 ++-
> lib/Target/R600/R600Instructions.td | 1 +
> lib/Target/R600/R600MachineScheduler.cpp | 25 +++--
> lib/Target/R600/R600MachineScheduler.h | 1 +
> lib/Target/R600/R600Packetizer.cpp | 90 ++++++++++------
> lib/Target/R600/R600RegisterInfo.td | 1 +
> test/CodeGen/R600/fdiv.ll | 8 +-
> test/CodeGen/R600/fp_to_sint.ll | 8 +-
> test/CodeGen/R600/fp_to_uint.ll | 8 +-
> test/CodeGen/R600/llvm.cos.ll | 2 +-
> test/CodeGen/R600/llvm.pow.ll | 4 +-
> test/CodeGen/R600/llvm.sin.ll | 2 +-
> 13 files changed, 248 insertions(+), 94 deletions(-)
>
> diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
> index 8f65cc2..a3b3957 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -225,24 +225,27 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
>
> std::vector<std::pair<int, unsigned> >
> R600InstrInfo::ExtractSrcs(MachineInstr *MI,
> - const DenseMap<unsigned, unsigned> &PV)
> - const {
> + const DenseMap<unsigned, unsigned> &PV,
> + unsigned &ConstCount) const {
> + ConstCount = 0;
> const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
> const std::pair<int, unsigned> DummyPair(-1, 0);
> std::vector<std::pair<int, unsigned> > Result;
> unsigned i = 0;
> for (unsigned n = Srcs.size(); i < n; ++i) {
> unsigned Reg = Srcs[i].first->getReg();
> - unsigned Index = RI.getEncodingValue(Reg) & 0xff;
> - unsigned Chan = RI.getHWRegChan(Reg);
> - if (Index > 127) {
> - Result.push_back(DummyPair);
> + if (PV.find(Reg) != PV.end()) {
> + // 255 is used to tells its a PS/PV reg
> + Result.push_back(std::pair<int, unsigned>(255, 0));
> continue;
> }
> - if (PV.find(Reg) != PV.end()) {
> + unsigned Index = RI.getEncodingValue(Reg) & 0xff;
> + if (Index > 127) {
> + ConstCount++;
> Result.push_back(DummyPair);
> continue;
> }
> + unsigned Chan = RI.getHWRegChan(Reg);
> Result.push_back(std::pair<int, unsigned>(Index, Chan));
> }
> for (; i < 3; ++i)
> @@ -277,66 +280,171 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src,
> return Src;
> }
>
> -static bool
> -isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
> +static unsigned
> +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
> + switch (Swz) {
> + case R600InstrInfo::ALU_VEC_012_SCL_210: {
> + unsigned Cycles[3] = { 2, 1, 0};
> + return Cycles[Op];
> + }
> + case R600InstrInfo::ALU_VEC_021_SCL_122: {
> + unsigned Cycles[3] = { 1, 2, 2};
> + return Cycles[Op];
> + }
> + case R600InstrInfo::ALU_VEC_120_SCL_212: {
> + unsigned Cycles[3] = { 2, 1, 2};
> + return Cycles[Op];
> + }
> + case R600InstrInfo::ALU_VEC_102_SCL_221: {
> + unsigned Cycles[3] = { 2, 2, 1};
> + return Cycles[Op];
> + }
> + default:
> + llvm_unreachable("Wrong Swizzle for Trans Slot");
> + return 0;
> + }
> +}
> +
> +/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
> +/// in the same Instruction Group while meeting read port limitations given a
> +/// Swz swizzle sequence.
> +static unsigned
> +isLegalUpTo(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
> const std::vector<R600InstrInfo::BankSwizzle> &Swz,
> - unsigned CheckedSize) {
> + const std::vector<std::pair<int, unsigned> > &TransSrcs,
> + R600InstrInfo::BankSwizzle TransSwz) {
> int Vector[4][3];
> memset(Vector, -1, sizeof(Vector));
> - for (unsigned i = 0; i < CheckedSize; i++) {
> + for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
> const std::vector<std::pair<int, unsigned> > &Srcs =
> Swizzle(IGSrcs[i], Swz[i]);
> for (unsigned j = 0; j < 3; j++) {
> const std::pair<int, unsigned> &Src = Srcs[j];
> - if (Src.first < 0)
> + if (Src.first < 0 || Src.first == 255)
> continue;
> if (Vector[Src.second][j] < 0)
> Vector[Src.second][j] = Src.first;
> if (Vector[Src.second][j] != Src.first)
> - return false;
> + return i;
> }
> }
> - return true;
> + // Now check Trans Alu
> + for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
> + const std::pair<int, unsigned> &Src = TransSrcs[i];
> + unsigned Cycle = getTransSwizzle(TransSwz, i);
> + if (Src.first < 0)
> + continue;
> + if (Src.first == 255)
> + continue;
> + if (Vector[Src.second][Cycle] < 0)
> + Vector[Src.second][Cycle] = Src.first;
> + if (Vector[Src.second][Cycle] != Src.first)
> + return IGSrcs.size() - 1;
> + }
> + return IGSrcs.size();
> }
>
> -static bool recursiveFitsFPLimitation(
> -const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
> -std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
> -unsigned Depth = 0) {
> - if (!isLegal(IGSrcs, SwzCandidate, Depth))
> +/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
> +/// (in lexicographic term) swizzle sequence assuming that all swizzles after
> +/// Idx can be skipped
> +static bool
> +NextPossibleSolution(
> + std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
> + unsigned Idx) {
> + assert(Idx < SwzCandidate.size());
> + int ResetIdx = Idx;
> + while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
> + ResetIdx --;
> + for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
> + SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
> + }
> + if (ResetIdx == -1)
> return false;
> - if (IGSrcs.size() == Depth)
> - return true;
> - unsigned i = SwzCandidate[Depth];
> - for (; i < 6; i++) {
> - SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
> - if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
> + SwzCandidate[ResetIdx]++;
> + return true;
> +}
> +
> +/// Enumerate all possible Swizzle sequence to find one that can meet all
> +/// read port requirements.
> +static bool FindSwizzleForVectorSlot(
> + const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
> + std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
> + const std::vector<std::pair<int, unsigned> > &TransSrcs,
> + R600InstrInfo::BankSwizzle TransSwz) {
> + unsigned ValidUpTo = 0;
> + do {
> + ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
> + if (ValidUpTo == IGSrcs.size())
> return true;
> - }
> - SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
> + } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
> return false;
> }
>
> +/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
> +/// a const, and can't read a gpr at cycle 1 if they read 2 const.
> +static bool
> +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
> + const std::vector<std::pair<int, unsigned> > &TransOps,
> + unsigned ConstCount) {
> + for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
> + const std::pair<int, unsigned> &Src = TransOps[i];
> + unsigned Cycle = getTransSwizzle(TransSwz, i);
> + if (Src.first < 0)
> + continue;
> + if (ConstCount > 0 && Cycle == 0)
> + return false;
> + if (ConstCount > 1 && Cycle == 1)
> + return false;
> + }
> + return true;
> +}
> +
> bool
> R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
> - const DenseMap<unsigned, unsigned> &PV,
> - std::vector<BankSwizzle> &ValidSwizzle)
> + const DenseMap<unsigned, unsigned> &PV,
> + std::vector<BankSwizzle> &ValidSwizzle,
> + bool isLastAluTrans)
> const {
> //Todo : support shared src0 - src1 operand
>
> std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
> ValidSwizzle.clear();
> + unsigned ConstCount;
> + BankSwizzle TransBS;
> for (unsigned i = 0, e = IG.size(); i < e; ++i) {
> - IGSrcs.push_back(ExtractSrcs(IG[i], PV));
> + IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
> unsigned Op = getOperandIdx(IG[i]->getOpcode(),
> R600Operands::BANK_SWIZZLE);
> ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
> IG[i]->getOperand(Op).getImm());
> }
> - bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
> - if (!Result)
> - return false;
> - return true;
> + std::vector<std::pair<int, unsigned> > TransOps;
> + if (!isLastAluTrans)
> + return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
> +
> + TransOps = IGSrcs.back();
> + IGSrcs.pop_back();
> + ValidSwizzle.pop_back();
> +
> + static const R600InstrInfo::BankSwizzle TransSwz[] = {
> + ALU_VEC_012_SCL_210,
> + ALU_VEC_021_SCL_122,
> + ALU_VEC_120_SCL_212,
> + ALU_VEC_102_SCL_221
> + };
> + for (unsigned i = 0; i < 4; i++) {
> + TransBS = TransSwz[i];
> + if (!isConstCompatible(TransBS, TransOps, ConstCount))
> + continue;
> + bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
> + TransBS);
> + if (Result) {
> + ValidSwizzle.push_back(TransBS);
> + return true;
> + }
> + }
> +
> + return false;
> }
>
>
> @@ -366,7 +474,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
> }
>
> bool
> -R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
> +R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
> + const {
> std::vector<unsigned> Consts;
> for (unsigned i = 0, n = MIs.size(); i < n; i++) {
> MachineInstr *MI = MIs[i];
> diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
> index 79c7cdc..1bc138c 100644
> --- a/lib/Target/R600/R600InstrInfo.h
> +++ b/lib/Target/R600/R600InstrInfo.h
> @@ -85,12 +85,21 @@ namespace llvm {
> /// starting from the one already provided in the Instruction Group MIs that
> /// fits Read Port limitations in BS if available. Otherwise returns false
> /// and undefined content in BS.
> + /// isLastAluTrans should be set if the last Alu of MIs will be executed on
> + /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
> + /// apply to the last instruction.
> /// PV holds GPR to PV registers in the Instruction Group MIs.
> bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
> const DenseMap<unsigned, unsigned> &PV,
> - std::vector<BankSwizzle> &BS) const;
> + std::vector<BankSwizzle> &BS,
> + bool isLastAluTrans) const;
> +
> + /// An instruction group can only access 2 channel pair (either [XY] or [ZW])
> + /// from KCache bank on R700+. This function check if MI set in input meet
> + /// this limitations
> + bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;
> + /// Same but using const index set instead of MI set.
> bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
> - bool canBundle(const std::vector<MachineInstr *> &) const;
>
> /// \breif Vector instructions are instructions that must fill all
> /// instruction slots within an instruction group.
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 83d735f..53bc613 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -1478,6 +1478,7 @@ let hasSideEffects = 1 in {
>
> def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
> let Pattern = [];
> + let TransOnly = 0;
> }
>
> def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
> diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
> index a330d88..ec73db1 100644
> --- a/lib/Target/R600/R600MachineScheduler.cpp
> +++ b/lib/Target/R600/R600MachineScheduler.cpp
> @@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
> MRI = &DAG->MRI;
> CurInstKind = IDOther;
> CurEmitted = 0;
> - OccupedSlotsMask = 15;
> + OccupedSlotsMask = 31;
> InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
> InstKindLimit[IDOther] = 32;
>
> @@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
> if (NextInstKind != CurInstKind) {
> DEBUG(dbgs() << "Instruction Type Switch\n");
> if (NextInstKind != IDAlu)
> - OccupedSlotsMask = 15;
> + OccupedSlotsMask |= 31;
> CurEmitted = 0;
> CurInstKind = NextInstKind;
> }
> @@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
> R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
> MachineInstr *MI = SU->getInstr();
>
> + if (TII->isTransOnly(MI))
> + return AluTrans;
> +
> switch (MI->getOpcode()) {
> case AMDGPU::PRED_X:
> return AluPredX;
> @@ -338,7 +341,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
> It != E; ++It) {
> SUnit *SU = *It;
> InstructionsGroupCandidate.push_back(SU->getInstr());
> - if (TII->canBundle(InstructionsGroupCandidate)) {
> + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
> InstructionsGroupCandidate.pop_back();
> Q.erase((It + 1).base());
> return SU;
> @@ -409,7 +412,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
> return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
> AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
> AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
> - AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();
> + AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
> + AvailableAlus[AluPredX].size();
> }
>
> SUnit* R600SchedStrategy::pickAlu() {
> @@ -417,20 +421,27 @@ SUnit* R600SchedStrategy::pickAlu() {
> if (!OccupedSlotsMask) {
> // Bottom up scheduling : predX must comes first
> if (!AvailableAlus[AluPredX].empty()) {
> - OccupedSlotsMask = 15;
> + OccupedSlotsMask |= 31;
> return PopInst(AvailableAlus[AluPredX]);
> }
> // Flush physical reg copies (RA will discard them)
> if (!AvailableAlus[AluDiscarded].empty()) {
> - OccupedSlotsMask = 15;
> + OccupedSlotsMask |= 31;
> return PopInst(AvailableAlus[AluDiscarded]);
> }
> // If there is a T_XYZW alu available, use it
> if (!AvailableAlus[AluT_XYZW].empty()) {
> - OccupedSlotsMask = 15;
> + OccupedSlotsMask |= 15;
> return PopInst(AvailableAlus[AluT_XYZW]);
> }
> }
> + bool TransSlotOccuped = OccupedSlotsMask & 16;
> + if (!TransSlotOccuped) {
> + if (!AvailableAlus[AluTrans].empty()) {
> + OccupedSlotsMask |= 16;
> + return PopInst(AvailableAlus[AluTrans]);
> + }
> + }
> for (int Chan = 3; Chan > -1; --Chan) {
> bool isOccupied = OccupedSlotsMask & (1 << Chan);
> if (!isOccupied) {
> diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
> index aae8b3f..f8965d8 100644
> --- a/lib/Target/R600/R600MachineScheduler.h
> +++ b/lib/Target/R600/R600MachineScheduler.h
> @@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy {
> AluT_W,
> AluT_XYZW,
> AluPredX,
> + AluTrans,
> AluDiscarded, // LLVM Instructions that are going to be eliminated
> AluLast
> };
> diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
> index da614c7..78403de 100644
> --- a/lib/Target/R600/R600Packetizer.cpp
> +++ b/lib/Target/R600/R600Packetizer.cpp
> @@ -77,12 +77,14 @@ private:
> do {
> if (TII->isPredicated(BI))
> continue;
> - if (TII->isTransOnly(BI))
> - continue;
> int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE);
> if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
> continue;
> unsigned Dst = BI->getOperand(0).getReg();
> + if (TII->isTransOnly(BI)) {
> + Result[Dst] = AMDGPU::PS;
> + continue;
> + }
> if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
> BI->getOpcode() == AMDGPU::DOT4_eg) {
> Result[Dst] = AMDGPU::PV_X;
> @@ -150,10 +152,6 @@ public:
> return true;
> if (!TII->isALUInstr(MI->getOpcode()))
> return true;
> - if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
> - return true;
> - if (TII->isTransOnly(MI))
> - return true;
> return false;
> }
>
> @@ -161,7 +159,7 @@ public:
> // together.
> bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
> MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
> - if (getSlot(MII) <= getSlot(MIJ))
> + if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
> return false;
> // Does MII and MIJ share the same pred_sel ?
> int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL),
> @@ -195,11 +193,16 @@ public:
> MI->getOperand(LastOp).setImm(Bit);
> }
>
> - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
> + bool isBundlableWithCurrentPMI(MachineInstr *MI,
> + const DenseMap<unsigned, unsigned> &PV,
> + std::vector<R600InstrInfo::BankSwizzle> &BS,
> + bool &isTransSlot) {
> + isTransSlot = TII->isTransOnly(MI);
> +
> + // Are the Constants limitations met ?
> CurrentPacketMIs.push_back(MI);
> - bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
> - DEBUG(
> - if (!FitsConstLimits) {
> + if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
> + DEBUG(
> dbgs() << "Couldn't pack :\n";
> MI->dump();
> dbgs() << "with the following packets :\n";
> @@ -208,14 +211,15 @@ public:
> dbgs() << "\n";
> }
> dbgs() << "because of Consts read limitations\n";
> - });
> - const DenseMap<unsigned, unsigned> &PV =
> - getPreviousVector(CurrentPacketMIs.front());
> - std::vector<R600InstrInfo::BankSwizzle> BS;
> - bool FitsReadPortLimits =
> - TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
> - DEBUG(
> - if (!FitsReadPortLimits) {
> + );
> + CurrentPacketMIs.pop_back();
> + return false;
> + }
> +
> + // Is there a BankSwizzle set that meet Read Port limitations ?
> + if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
> + PV, BS, isTransSlot)) {
> + DEBUG(
> dbgs() << "Couldn't pack :\n";
> MI->dump();
> dbgs() << "with the following packets :\n";
> @@ -224,25 +228,43 @@ public:
> dbgs() << "\n";
> }
> dbgs() << "because of Read port limitations\n";
> - });
> - bool isBundlable = FitsConstLimits && FitsReadPortLimits;
> - if (isBundlable) {
> + );
> + CurrentPacketMIs.pop_back();
> + return false;
> + }
> +
> + CurrentPacketMIs.pop_back();
> + return true;
> + }
> +
> + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
> + MachineBasicBlock::iterator FirstInBundle =
> + CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
> + const DenseMap<unsigned, unsigned> &PV =
> + getPreviousVector(FirstInBundle);
> + std::vector<R600InstrInfo::BankSwizzle> BS;
> + bool isTransSlot;
> +
> + if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
> for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
> MachineInstr *MI = CurrentPacketMIs[i];
> - unsigned Op = TII->getOperandIdx(MI->getOpcode(),
> - R600Operands::BANK_SWIZZLE);
> - MI->getOperand(Op).setImm(BS[i]);
> + unsigned Op = TII->getOperandIdx(MI->getOpcode(),
> + R600Operands::BANK_SWIZZLE);
> + MI->getOperand(Op).setImm(BS[i]);
> }
> + unsigned Op = TII->getOperandIdx(MI->getOpcode(),
> + R600Operands::BANK_SWIZZLE);
> + MI->getOperand(Op).setImm(BS.back());
> + if (!CurrentPacketMIs.empty())
> + setIsLastBit(CurrentPacketMIs.back(), 0);
> + substitutePV(MI, PV);
> + MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
> + if (isTransSlot) {
> + endPacket(llvm::next(It)->getParent(), llvm::next(It));
> + }
> + return It;
> }
> - CurrentPacketMIs.pop_back();
> - if (!isBundlable) {
> - endPacket(MI->getParent(), MI);
> - substitutePV(MI, getPreviousVector(MI));
> - return VLIWPacketizerList::addToPacket(MI);
> - }
> - if (!CurrentPacketMIs.empty())
> - setIsLastBit(CurrentPacketMIs.back(), 0);
> - substitutePV(MI, PV);
> + endPacket(MI->getParent(), MI);
> return VLIWPacketizerList::addToPacket(MI);
> }
> };
> diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
> index a8b9b70..323bf9f 100644
> --- a/lib/Target/R600/R600RegisterInfo.td
> +++ b/lib/Target/R600/R600RegisterInfo.td
> @@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254, "X">;
> def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
> def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
> def PV_W : R600RegWithChan<"PV.W", 254, "W">;
> +def PS: R600Reg<"PS", 255>;
> def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
> def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
> def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
> diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
> index 003590b..f6eb6a6 100644
> --- a/test/CodeGen/R600/fdiv.ll
> +++ b/test/CodeGen/R600/fdiv.ll
> @@ -1,13 +1,13 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>
> define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
> %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
> diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
> index f5716e1..5a608fd 100644
> --- a/test/CodeGen/R600/fp_to_sint.ll
> +++ b/test/CodeGen/R600/fp_to_sint.ll
> @@ -1,10 +1,10 @@
> ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> ; CHECK: @fp_to_sint_v4i32
> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>
> define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
> %value = load <4 x float> addrspace(1) * %in
> diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
> index 1c3c0c6..b07e286 100644
> --- a/test/CodeGen/R600/fp_to_uint.ll
> +++ b/test/CodeGen/R600/fp_to_uint.ll
> @@ -1,10 +1,10 @@
> ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> ; CHECK: @fp_to_uint_v4i32
> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
> +; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>
> define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
> %value = load <4 x float> addrspace(1) * %in
> diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
> index 9b28167..b444fa7 100644
> --- a/test/CodeGen/R600/llvm.cos.ll
> +++ b/test/CodeGen/R600/llvm.cos.ll
> @@ -1,6 +1,6 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> -;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>
> define void @test() {
> %r0 = call float @llvm.R600.load.input(i32 0)
> diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
> index 1422083..0f51cf4 100644
> --- a/test/CodeGen/R600/llvm.pow.ll
> +++ b/test/CodeGen/R600/llvm.pow.ll
> @@ -1,8 +1,8 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> -;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
> +;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>
> define void @test() {
> %r0 = call float @llvm.R600.load.input(i32 0)
> diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
> index 803dc2d..09cc3d2 100644
> --- a/test/CodeGen/R600/llvm.sin.ll
> +++ b/test/CodeGen/R600/llvm.sin.ll
> @@ -1,6 +1,6 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> -;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> +;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>
> define void @test() {
> %r0 = call float @llvm.R600.load.input(i32 0)
> --
> 1.8.3.1
>
More information about the llvm-commits
mailing list