R600: Initial support for vliw5 scheduling
Vincent Lejeune
vljn at ovi.com
Fri Jun 28 13:21:27 PDT 2013
----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : Vincent Lejeune <vljn at ovi.com>
> Cc : "llvm-commits at cs.uiuc.edu" <llvm-commits at cs.uiuc.edu>
> Envoyé le : Jeudi 27 juin 2013 23h59
> Objet : Re: R600: Initial support for vliw5 scheduling
>
> On Thu, Jun 27, 2013 at 01:56:58PM -0700, Vincent Lejeune wrote:
>> Hi,
>>
>> These 2 patches allows trans only instructions to be grouped with vector
> instructions to form 5 instructions bundle on vliw5 processors.
>> I had to remove the isTransOnly attribute of FLT_TO_INT_eg instructions
> because it looks like the docs are wrong saying it's a trans only
> instruction :
>> Single FLT_TO_INT_eg instructions does not write to PS register (trans only
> instructions always write to PS register) and there is no regression introduced
> when
>> making it non trans only. (SB backend seems to mark it not trans only, too
> )
>>
>
> Patch 1 is:
>
> Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
>
> See my inline comments for patch #2.
>
> -Tom
>
>> I have another set of patches to generalize vliw 5 support to all non
> vector-only instructions but it's still WIP at the moment.
>>
>>
>> Vincent
>
>> From 619dd4ef5223050e81a5f80eae2582f588efe3c7 Mon Sep 17 00:00:00 2001
>> From: Vincent Lejeune <vljn at ovi.com>
>> Date: Wed, 26 Jun 2013 18:09:58 +0200
>> Subject: [PATCH 2/2] R600: Support schedule and packetization of trans-only
>> inst
>>
>> ---
>> lib/Target/R600/R600InstrInfo.cpp | 160
> +++++++++++++++++++++++++------
>> lib/Target/R600/R600InstrInfo.h | 6 +-
>> lib/Target/R600/R600Instructions.td | 2 +
>> lib/Target/R600/R600MachineScheduler.cpp | 23 +++--
>> lib/Target/R600/R600MachineScheduler.h | 1 +
>> lib/Target/R600/R600Packetizer.cpp | 88 ++++++++++-------
>> lib/Target/R600/R600RegisterInfo.td | 1 +
>> test/CodeGen/R600/fdiv.ll | 8 +-
>> test/CodeGen/R600/llvm.cos.ll | 2 +-
>> test/CodeGen/R600/llvm.pow.ll | 4 +-
>> test/CodeGen/R600/llvm.sin.ll | 2 +-
>> 11 files changed, 218 insertions(+), 79 deletions(-)
>>
>> diff --git a/lib/Target/R600/R600InstrInfo.cpp
> b/lib/Target/R600/R600InstrInfo.cpp
>> index 8f65cc2..f972bcf 100644
>> --- a/lib/Target/R600/R600InstrInfo.cpp
>> +++ b/lib/Target/R600/R600InstrInfo.cpp
>> @@ -225,24 +225,27 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
>>
>> std::vector<std::pair<int, unsigned> >
>> R600InstrInfo::ExtractSrcs(MachineInstr *MI,
>> - const DenseMap<unsigned, unsigned>
> &PV)
>> - const {
>> + const DenseMap<unsigned, unsigned>
> &PV,
>> + unsigned &ConstCount) const {
>> + ConstCount = 0;
>> const SmallVector<std::pair<MachineOperand *, int64_t>, 3>
> Srcs = getSrcs(MI);
>> const std::pair<int, unsigned> DummyPair(-1, 0);
>> std::vector<std::pair<int, unsigned> > Result;
>> unsigned i = 0;
>> for (unsigned n = Srcs.size(); i < n; ++i) {
>> unsigned Reg = Srcs[i].first->getReg();
>> - unsigned Index = RI.getEncodingValue(Reg) & 0xff;
>> - unsigned Chan = RI.getHWRegChan(Reg);
>> - if (Index > 127) {
>> - Result.push_back(DummyPair);
>> + if (PV.find(Reg) != PV.end()) {
>> + // 255 is used to tells its a PS/PV reg
>> + Result.push_back(std::pair<int, unsigned>(255, 0));
>> continue;
>> }
>> - if (PV.find(Reg) != PV.end()) {
>> + unsigned Index = RI.getEncodingValue(Reg) & 0xff;
>> + if (Index > 127) {
>> + ConstCount++;
>> Result.push_back(DummyPair);
>> continue;
>> }
>> + unsigned Chan = RI.getHWRegChan(Reg);
>> Result.push_back(std::pair<int, unsigned>(Index, Chan));
>> }
>> for (; i < 3; ++i)
>> @@ -277,66 +280,161 @@ Swizzle(std::vector<std::pair<int,
> unsigned> > Src,
>> return Src;
>> }
>>
>> -static bool
>> -isLegal(const std::vector<std::vector<std::pair<int, unsigned>
>> > &IGSrcs,
>> +static unsigned
>> +getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
>> + switch (Swz) {
>> + case R600InstrInfo::ALU_VEC_012_SCL_210: {
>> + unsigned Cycles[3] = { 2, 1, 0};
>> + return Cycles[Op];
>> + }
>> + case R600InstrInfo::ALU_VEC_021_SCL_122: {
>> + unsigned Cycles[3] = { 1, 2, 2};
>> + return Cycles[Op];
>> + }
>> + case R600InstrInfo::ALU_VEC_120_SCL_212: {
>> + unsigned Cycles[3] = { 2, 1, 2};
>> + return Cycles[Op];
>> + }
>> + case R600InstrInfo::ALU_VEC_102_SCL_221: {
>> + unsigned Cycles[3] = { 2, 2, 1};
>> + return Cycles[Op];
>> + }
>> + default:
>> + llvm_unreachable("Wrong Swizzle for Trans Slot");
>> + return 0;
>> + }
>> +}
>> +
>> +static unsigned
>> +isLegalUpTo(const std::vector<std::vector<std::pair<int,
> unsigned> > > &IGSrcs,
>> const std::vector<R600InstrInfo::BankSwizzle> &Swz,
>> - unsigned CheckedSize) {
>> + const std::vector<std::pair<int, unsigned> >
> &TransSrcs,
>> + R600InstrInfo::BankSwizzle TransSwz) {
>> int Vector[4][3];
>> memset(Vector, -1, sizeof(Vector));
>> - for (unsigned i = 0; i < CheckedSize; i++) {
>> + for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
>> const std::vector<std::pair<int, unsigned> > &Srcs =
>> Swizzle(IGSrcs[i], Swz[i]);
>> for (unsigned j = 0; j < 3; j++) {
>> const std::pair<int, unsigned> &Src = Srcs[j];
>> - if (Src.first < 0)
>> + if (Src.first < 0 || Src.first == 255)
>> continue;
>> if (Vector[Src.second][j] < 0)
>> Vector[Src.second][j] = Src.first;
>> if (Vector[Src.second][j] != Src.first)
>> - return false;
>> + return i;
>> }
>> }
>> + // Now check Trans Alu
>> + for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
>> + const std::pair<int, unsigned> &Src = TransSrcs[i];
>> + unsigned Cycle = getTransSwizzle(TransSwz, i);
>> + if (Src.first < 0)
>> + continue;
>> + if (Src.first == 255)
>> + continue;
>> + if (Vector[Src.second][Cycle] < 0)
>> + Vector[Src.second][Cycle] = Src.first;
>> + if (Vector[Src.second][Cycle] != Src.first)
>> + return IGSrcs.size() - 1;
>> + }
>> + return IGSrcs.size();
>> +}
>> +
>> +static bool
>> +NextPossibleSolution(
>> + std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> + unsigned From) {
>> + assert(From < SwzCandidate.size());
>> + int ResetFrom = From;
>> + while (ResetFrom > -1 && SwzCandidate[ResetFrom] ==
> R600InstrInfo::ALU_VEC_210)
>> + ResetFrom --;
>> + for (unsigned i = ResetFrom + 1, e = SwzCandidate.size(); i < e; i++)
> {
>> + SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
>> + }
>> + if (ResetFrom == -1)
>> + return false;
>> + SwzCandidate[ResetFrom]++;
>> return true;
>> }
>>
>> static bool recursiveFitsFPLimitation(
>> const std::vector<std::vector<std::pair<int, unsigned> >
>> &IGSrcs,
>> std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
>> -unsigned Depth = 0) {
>> - if (!isLegal(IGSrcs, SwzCandidate, Depth))
>> - return false;
>> - if (IGSrcs.size() == Depth)
>> - return true;
>> - unsigned i = SwzCandidate[Depth];
>> - for (; i < 6; i++) {
>> - SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
>> - if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
>> +const std::vector<std::pair<int, unsigned> > &TransSrcs,
>> +R600InstrInfo::BankSwizzle TransSwz) {
>> + unsigned ValidUpTo = 0;
>> + do {
>> + ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
>> + if (ValidUpTo == IGSrcs.size())
>> return true;
>> - }
>> - SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
>> + } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
>> return false;
>> }
>>
>> +static bool
>> +isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
>> + const std::vector<std::pair<int, unsigned> >
> &TransOps,
>> + unsigned ConstCount) {
>> + for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
>> + const std::pair<int, unsigned> &Src = TransOps[i];
>> + unsigned Cycle = getTransSwizzle(TransSwz, i);
>> + if (Src.first < 0)
>> + continue;
>> + if (ConstCount > 0 && Cycle == 0)
>> + return false;
>> + if (ConstCount > 1 && Cycle == 1)
>> + return false;
>> + }
>> + return true;
>> +}
>> +
>> bool
>> R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr
> *> &IG,
>> - const DenseMap<unsigned,
> unsigned> &PV,
>> - std::vector<BankSwizzle>
> &ValidSwizzle)
>> + const DenseMap<unsigned,
> unsigned> &PV,
>> + std::vector<BankSwizzle>
> &ValidSwizzle,
>> + bool isLastAluTrans)
>> const {
>> //Todo : support shared src0 - src1 operand
>>
>> std::vector<std::vector<std::pair<int, unsigned> > >
> IGSrcs;
>> ValidSwizzle.clear();
>> + unsigned ConstCount;
>> + BankSwizzle TransBS;
>> for (unsigned i = 0, e = IG.size(); i < e; ++i) {
>> - IGSrcs.push_back(ExtractSrcs(IG[i], PV));
>> + IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
>> unsigned Op = getOperandIdx(IG[i]->getOpcode(),
>> R600Operands::BANK_SWIZZLE);
>> ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
>> IG[i]->getOperand(Op).getImm());
>> }
>> - bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
>> - if (!Result)
>> - return false;
>> - return true;
>> + std::vector<std::pair<int, unsigned> > TransOps;
>> + if (!isLastAluTrans)
>> + return recursiveFitsFPLimitation(IGSrcs, ValidSwizzle, TransOps,
> TransBS);
>> +
>> + TransOps = IGSrcs.back();
>> + IGSrcs.pop_back();
>> + ValidSwizzle.pop_back();
>> +
>> + static const R600InstrInfo::BankSwizzle TransSwz[] = {
>> + ALU_VEC_012_SCL_210,
>> + ALU_VEC_021_SCL_122,
>> + ALU_VEC_120_SCL_212,
>> + ALU_VEC_102_SCL_221
>> + };
>> + for (unsigned i = 0; i < 4; i++) {
>> + TransBS = TransSwz[i];
>> + if (!isConstCompatible(TransBS, TransOps, ConstCount))
>> + continue;
>> + bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle,
> TransOps,
>> + TransBS);
>> + if (Result) {
>> + ValidSwizzle.push_back(TransBS);
>> + return true;
>> + }
>> + }
>> +
>> + return false;
>> }
>>
>>
>> diff --git a/lib/Target/R600/R600InstrInfo.h
> b/lib/Target/R600/R600InstrInfo.h
>> index 79c7cdc..28fcbfd 100644
>> --- a/lib/Target/R600/R600InstrInfo.h
>> +++ b/lib/Target/R600/R600InstrInfo.h
>> @@ -85,10 +85,14 @@ namespace llvm {
>> /// starting from the one already provided in the Instruction Group MIs
> that
>> /// fits Read Port limitations in BS if available. Otherwise returns
> false
>> /// and undefined content in BS.
>> + /// isLastAluTrans should be set if the last Alu of MIs will be executed
> on
>> + /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value
> to
>> + /// apply to the last instruction.
>> /// PV holds GPR to PV registers in the Instruction Group MIs.
>> bool fitsReadPortLimitations(const std::vector<MachineInstr *>
> &MIs,
>> const DenseMap<unsigned, unsigned>
> &PV,
>> - std::vector<BankSwizzle> &BS)
> const;
>> + std::vector<BankSwizzle> &BS,
>> + bool isLastAluTrans) const;
>> bool fitsConstReadLimitations(const std::vector<unsigned>&)
> const;
>> bool canBundle(const std::vector<MachineInstr *> &) const;
>>
>> diff --git a/lib/Target/R600/R600Instructions.td
> b/lib/Target/R600/R600Instructions.td
>> index 83d735f..f324146 100644
>> --- a/lib/Target/R600/R600Instructions.td
>> +++ b/lib/Target/R600/R600Instructions.td
>> @@ -1478,12 +1478,14 @@ let hasSideEffects = 1 in {
>>
>> def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
>> let Pattern = [];
>> + let TransOnly = 0;
>> }
>
> I've confirmed that this is correct. FLT_TO_INT is trans only for
> r6xx/r7xx and has no restrictions on Evergreen. However, it looks like
> you forgot to change the itinerary, so the packetizer still thinks it's
> trans only.
>
>>
>> def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
>>
>> def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
>> let Pattern = [];
>> + let TransOnly = 0;
>> }
>>
>
> FLT_TO_UINT is trans only for all GPU families. My guess is you didn't
> see any regressions because the itinerary is still TransALU.
>
>> def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
>> diff --git a/lib/Target/R600/R600MachineScheduler.cpp
> b/lib/Target/R600/R600MachineScheduler.cpp
>> index a330d88..050a12f 100644
>> --- a/lib/Target/R600/R600MachineScheduler.cpp
>> +++ b/lib/Target/R600/R600MachineScheduler.cpp
>> @@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
>> MRI = &DAG->MRI;
>> CurInstKind = IDOther;
>> CurEmitted = 0;
>> - OccupedSlotsMask = 15;
>> + OccupedSlotsMask = 31;
>> InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
>> InstKindLimit[IDOther] = 32;
>>
>> @@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool
> IsTopNode) {
>> if (NextInstKind != CurInstKind) {
>> DEBUG(dbgs() << "Instruction Type Switch\n");
>> if (NextInstKind != IDAlu)
>> - OccupedSlotsMask = 15;
>> + OccupedSlotsMask |= 31;
>> CurEmitted = 0;
>> CurInstKind = NextInstKind;
>> }
>> @@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
>> R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const
> {
>> MachineInstr *MI = SU->getInstr();
>>
>> + if (TII->isTransOnly(MI))
>> + return AluTrans;
>> +
>> switch (MI->getOpcode()) {
>> case AMDGPU::PRED_X:
>> return AluPredX;
>> @@ -409,7 +412,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const
> {
>> return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
>> AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
>> AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
>> - AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();
>> + AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size()
> +
>> + AvailableAlus[AluPredX].size();
>> }
>>
>> SUnit* R600SchedStrategy::pickAlu() {
>> @@ -417,20 +421,27 @@ SUnit* R600SchedStrategy::pickAlu() {
>> if (!OccupedSlotsMask) {
>> // Bottom up scheduling : predX must comes first
>> if (!AvailableAlus[AluPredX].empty()) {
>> - OccupedSlotsMask = 15;
>> + OccupedSlotsMask |= 31;
>> return PopInst(AvailableAlus[AluPredX]);
>> }
>> // Flush physical reg copies (RA will discard them)
>> if (!AvailableAlus[AluDiscarded].empty()) {
>> - OccupedSlotsMask = 15;
>> + OccupedSlotsMask |= 31;
>> return PopInst(AvailableAlus[AluDiscarded]);
>> }
>> // If there is a T_XYZW alu available, use it
>> if (!AvailableAlus[AluT_XYZW].empty()) {
>> - OccupedSlotsMask = 15;
>> + OccupedSlotsMask |= 15;
>> return PopInst(AvailableAlus[AluT_XYZW]);
>> }
>> }
>> + bool TransSlotOccuped = OccupedSlotsMask & 16;
>> + if (!TransSlotOccuped) {
>> + if (!AvailableAlus[AluTrans].empty()) {
>> + OccupedSlotsMask |= 16;
>> + return PopInst(AvailableAlus[AluTrans]);
>> + }
>> + }
>> for (int Chan = 3; Chan > -1; --Chan) {
>> bool isOccupied = OccupedSlotsMask & (1 << Chan);
>> if (!isOccupied) {
>> diff --git a/lib/Target/R600/R600MachineScheduler.h
> b/lib/Target/R600/R600MachineScheduler.h
>> index aae8b3f..f8965d8 100644
>> --- a/lib/Target/R600/R600MachineScheduler.h
>> +++ b/lib/Target/R600/R600MachineScheduler.h
>> @@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy {
>> AluT_W,
>> AluT_XYZW,
>> AluPredX,
>> + AluTrans,
>> AluDiscarded, // LLVM Instructions that are going to be eliminated
>> AluLast
>> };
>> diff --git a/lib/Target/R600/R600Packetizer.cpp
> b/lib/Target/R600/R600Packetizer.cpp
>> index da614c7..7d6eef1 100644
>> --- a/lib/Target/R600/R600Packetizer.cpp
>> +++ b/lib/Target/R600/R600Packetizer.cpp
>> @@ -77,12 +77,14 @@ private:
>> do {
>> if (TII->isPredicated(BI))
>> continue;
>> - if (TII->isTransOnly(BI))
>> - continue;
>> int OperandIdx = TII->getOperandIdx(BI->getOpcode(),
> R600Operands::WRITE);
>> if (OperandIdx > -1 &&
> BI->getOperand(OperandIdx).getImm() == 0)
>> continue;
>> unsigned Dst = BI->getOperand(0).getReg();
>> + if (TII->isTransOnly(BI)) {
>> + Result[Dst] = AMDGPU::PS;
>> + continue;
>> + }
>> if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
>> BI->getOpcode() == AMDGPU::DOT4_eg) {
>> Result[Dst] = AMDGPU::PV_X;
>> @@ -150,10 +152,6 @@ public:
>> return true;
>> if (!TII->isALUInstr(MI->getOpcode()))
>> return true;
>> - if (TII->get(MI->getOpcode()).TSFlags &
> R600_InstFlag::TRANS_ONLY)
>> - return true;
>> - if (TII->isTransOnly(MI))
>> - return true;
>> return false;
>> }
>>
>> @@ -195,11 +193,16 @@ public:
>> MI->getOperand(LastOp).setImm(Bit);
>> }
>>
>> - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
>> + bool isBundlableWithCurrentPMI(MachineInstr *MI,
>> + const DenseMap<unsigned, unsigned>
> &PV,
>> +
> std::vector<R600InstrInfo::BankSwizzle> &BS,
>> + bool &isTransSlot) {
>> + isTransSlot = TII->isTransOnly(MI);
>> +
>> + // Are the Constants limitations met ?
>> CurrentPacketMIs.push_back(MI);
>> - bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
>> - DEBUG(
>> - if (!FitsConstLimits) {
>> + if (!TII->canBundle(CurrentPacketMIs)) {
>> + DEBUG(
>> dbgs() << "Couldn't pack :\n";
>> MI->dump();
>> dbgs() << "with the following packets :\n";
>> @@ -208,14 +211,15 @@ public:
>> dbgs() << "\n";
>> }
>> dbgs() << "because of Consts read
> limitations\n";
>> - });
>> - const DenseMap<unsigned, unsigned> &PV =
>> - getPreviousVector(CurrentPacketMIs.front());
>> - std::vector<R600InstrInfo::BankSwizzle> BS;
>> - bool FitsReadPortLimits =
>> - TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
>> - DEBUG(
>> - if (!FitsReadPortLimits) {
>> + );
>> + CurrentPacketMIs.pop_back();
>> + return false;
>> + }
>> +
>> + // Is there a BankSwizzle set that meet Read Port limitations ?
>> + if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
>> + PV, BS, isTransSlot)) {
>> + DEBUG(
>> dbgs() << "Couldn't pack :\n";
>> MI->dump();
>> dbgs() << "with the following packets :\n";
>> @@ -224,25 +228,43 @@ public:
>> dbgs() << "\n";
>> }
>> dbgs() << "because of Read port
> limitations\n";
>> - });
>> - bool isBundlable = FitsConstLimits && FitsReadPortLimits;
>> - if (isBundlable) {
>> + );
>> + CurrentPacketMIs.pop_back();
>> + return false;
>> + }
>> +
>> + CurrentPacketMIs.pop_back();
>> + return true;
>> + }
>> +
>> + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
>> + MachineBasicBlock::iterator FirstInBundle =
>> + CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
>> + const DenseMap<unsigned, unsigned> &PV =
>> + getPreviousVector(FirstInBundle);
>> + std::vector<R600InstrInfo::BankSwizzle> BS;
>> + bool isTransSlot;
>> +
>> + if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
>> for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
>> MachineInstr *MI = CurrentPacketMIs[i];
>> - unsigned Op = TII->getOperandIdx(MI->getOpcode(),
>> - R600Operands::BANK_SWIZZLE);
>> - MI->getOperand(Op).setImm(BS[i]);
>> + unsigned Op = TII->getOperandIdx(MI->getOpcode(),
>> + R600Operands::BANK_SWIZZLE);
>> + MI->getOperand(Op).setImm(BS[i]);
>> }
>> + unsigned Op = TII->getOperandIdx(MI->getOpcode(),
>> + R600Operands::BANK_SWIZZLE);
>> + MI->getOperand(Op).setImm(BS.back());
>> + if (!CurrentPacketMIs.empty())
>> + setIsLastBit(CurrentPacketMIs.back(), 0);
>> + substitutePV(MI, PV);
>> + MachineBasicBlock::iterator It =
> VLIWPacketizerList::addToPacket(MI);
>> + if (isTransSlot) {
>> + endPacket(llvm::next(It)->getParent(), llvm::next(It));
>> + }
>> + return It;
>> }
>> - CurrentPacketMIs.pop_back();
>> - if (!isBundlable) {
>> - endPacket(MI->getParent(), MI);
>> - substitutePV(MI, getPreviousVector(MI));
>> - return VLIWPacketizerList::addToPacket(MI);
>> - }
>> - if (!CurrentPacketMIs.empty())
>> - setIsLastBit(CurrentPacketMIs.back(), 0);
>> - substitutePV(MI, PV);
>> + endPacket(MI->getParent(), MI);
>> return VLIWPacketizerList::addToPacket(MI);
>> }
>> };
>> diff --git a/lib/Target/R600/R600RegisterInfo.td
> b/lib/Target/R600/R600RegisterInfo.td
>> index a8b9b70..323bf9f 100644
>> --- a/lib/Target/R600/R600RegisterInfo.td
>> +++ b/lib/Target/R600/R600RegisterInfo.td
>> @@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254,
> "X">;
>> def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
>> def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
>> def PV_W : R600RegWithChan<"PV.W", 254, "W">;
>> +def PS: R600Reg<"PS", 255>;
>> def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
>> def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
>> def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
>> diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
>> index 003590b..21ed486 100644
>> --- a/test/CodeGen/R600/fdiv.ll
>> +++ b/test/CodeGen/R600/fdiv.ll
>> @@ -1,13 +1,13 @@
>> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>>
>> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW],
> T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW],
> T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW],
> T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>> ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW],
> T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>>
>> define void @test(<4 x float> addrspace(1)* %out, <4 x float>
> addrspace(1)* %in) {
>> %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
>> diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
>> index 9b28167..b444fa7 100644
>> --- a/test/CodeGen/R600/llvm.cos.ll
>> +++ b/test/CodeGen/R600/llvm.cos.ll
>> @@ -1,6 +1,6 @@
>> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>>
>> -;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>>
>> define void @test() {
>> %r0 = call float @llvm.R600.load.input(i32 0)
>> diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
>> index 1422083..0f51cf4 100644
>> --- a/test/CodeGen/R600/llvm.pow.ll
>> +++ b/test/CodeGen/R600/llvm.pow.ll
>> @@ -1,8 +1,8 @@
>> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>>
>> ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> -;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW],
> T[0-9]+\.[XYZW]}}
>> -;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
>> +;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>>
>> define void @test() {
>> %r0 = call float @llvm.R600.load.input(i32 0)
>> diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
>> index 803dc2d..09cc3d2 100644
>> --- a/test/CodeGen/R600/llvm.sin.ll
>> +++ b/test/CodeGen/R600/llvm.sin.ll
>> @@ -1,6 +1,6 @@
>> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>>
>> -;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
>> +;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>>
>> define void @test() {
>> %r0 = call float @llvm.R600.load.input(i32 0)
>> --
>> 1.8.3.1
>>
>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-R600-Support-schedule-and-packetization-of-trans-onl.patch
Type: text/x-patch
Size: 24286 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130628/490b44ae/attachment.bin>
More information about the llvm-commits
mailing list