<div dir="ltr"><div>I see the following error in self-hosted -Werror build, could you please fix it?</div><div><br></div><div>llvm/lib/Target/R600/R600InstrInfo.cpp:462:69: error: variable 'TransBS' is uninitialized when used here [-Werror,-Wuninitialized]</div>
<div> return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);</div><div> ^~~~~~~</div><div>llvm/lib/Target/R600/R600InstrInfo.cpp:452:3: note: variable 'TransBS' is declared here</div>
<div> BankSwizzle TransBS;</div><div> ^</div><div><br></div><div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Sat, Jun 29, 2013 at 11:32 PM, Vincent Lejeune <span dir="ltr"><<a href="mailto:vljn@ovi.com" target="_blank">vljn@ovi.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: vljn<br>
Date: Sat Jun 29 14:32:43 2013<br>
New Revision: 185268<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=185268&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=185268&view=rev</a><br>
Log:<br>
R600: Support schedule and packetization of trans-only inst<br>
<br>
Modified:<br>
llvm/trunk/lib/Target/R600/R600InstrInfo.cpp<br>
llvm/trunk/lib/Target/R600/R600InstrInfo.h<br>
llvm/trunk/lib/Target/R600/R600Instructions.td<br>
llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp<br>
llvm/trunk/lib/Target/R600/R600MachineScheduler.h<br>
llvm/trunk/lib/Target/R600/R600Packetizer.cpp<br>
llvm/trunk/lib/Target/R600/R600RegisterInfo.td<br>
llvm/trunk/test/CodeGen/R600/fdiv.ll<br>
llvm/trunk/test/CodeGen/R600/fp_to_sint.ll<br>
llvm/trunk/test/CodeGen/R600/fp_to_uint.ll<br>
llvm/trunk/test/CodeGen/R600/llvm.cos.ll<br>
llvm/trunk/test/CodeGen/R600/llvm.pow.ll<br>
llvm/trunk/test/CodeGen/R600/llvm.sin.ll<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)<br>
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Sat Jun 29 14:32:43 2013<br>
@@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI)<br>
<br>
std::vector<std::pair<int, unsigned> ><br>
R600InstrInfo::ExtractSrcs(MachineInstr *MI,<br>
- const DenseMap<unsigned, unsigned> &PV)<br>
- const {<br>
+ const DenseMap<unsigned, unsigned> &PV,<br>
+ unsigned &ConstCount) const {<br>
+ ConstCount = 0;<br>
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);<br>
const std::pair<int, unsigned> DummyPair(-1, 0);<br>
std::vector<std::pair<int, unsigned> > Result;<br>
@@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr<br>
for (unsigned n = Srcs.size(); i < n; ++i) {<br>
unsigned Reg = Srcs[i].first->getReg();<br>
unsigned Index = RI.getEncodingValue(Reg) & 0xff;<br>
- unsigned Chan = RI.getHWRegChan(Reg);<br>
if (Reg == AMDGPU::OQAP) {<br>
Result.push_back(std::pair<int, unsigned>(Index, 0));<br>
}<br>
- if (Index > 127) {<br>
- Result.push_back(DummyPair);<br>
+ if (PV.find(Reg) != PV.end()) {<br>
+ // 255 is used to tells its a PS/PV reg<br>
+ Result.push_back(std::pair<int, unsigned>(255, 0));<br>
continue;<br>
}<br>
- if (PV.find(Reg) != PV.end()) {<br>
+ if (Index > 127) {<br>
+ ConstCount++;<br>
Result.push_back(DummyPair);<br>
continue;<br>
}<br>
+ unsigned Chan = RI.getHWRegChan(Reg);<br>
Result.push_back(std::pair<int, unsigned>(Index, Chan));<br>
}<br>
for (; i < 3; ++i)<br>
@@ -305,23 +308,51 @@ Swizzle(std::vector<std::pair<int, unsig<br>
return Src;<br>
}<br>
<br>
-bool<br>
-R600InstrInfo::isLegal(<br>
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
- const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
- unsigned CheckedSize) const {<br>
+static unsigned<br>
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {<br>
+ switch (Swz) {<br>
+ case R600InstrInfo::ALU_VEC_012_SCL_210: {<br>
+ unsigned Cycles[3] = { 2, 1, 0};<br>
+ return Cycles[Op];<br>
+ }<br>
+ case R600InstrInfo::ALU_VEC_021_SCL_122: {<br>
+ unsigned Cycles[3] = { 1, 2, 2};<br>
+ return Cycles[Op];<br>
+ }<br>
+ case R600InstrInfo::ALU_VEC_120_SCL_212: {<br>
+ unsigned Cycles[3] = { 2, 1, 2};<br>
+ return Cycles[Op];<br>
+ }<br>
+ case R600InstrInfo::ALU_VEC_102_SCL_221: {<br>
+ unsigned Cycles[3] = { 2, 2, 1};<br>
+ return Cycles[Op];<br>
+ }<br>
+ default:<br>
+ llvm_unreachable("Wrong Swizzle for Trans Slot");<br>
+ return 0;<br>
+ }<br>
+}<br>
+<br>
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed<br>
+/// in the same Instruction Group while meeting read port limitations given a<br>
+/// Swz swizzle sequence.<br>
+unsigned R600InstrInfo::isLegalUpTo(<br>
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+ R600InstrInfo::BankSwizzle TransSwz) const {<br>
int Vector[4][3];<br>
memset(Vector, -1, sizeof(Vector));<br>
- for (unsigned i = 0; i < CheckedSize; i++) {<br>
+ for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {<br>
const std::vector<std::pair<int, unsigned> > &Srcs =<br>
Swizzle(IGSrcs[i], Swz[i]);<br>
for (unsigned j = 0; j < 3; j++) {<br>
const std::pair<int, unsigned> &Src = Srcs[j];<br>
- if (Src.first < 0)<br>
+ if (Src.first < 0 || Src.first == 255)<br>
continue;<br>
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {<br>
- if (Swz[i] != R600InstrInfo::ALU_VEC_012 &&<br>
- Swz[i] != R600InstrInfo::ALU_VEC_021) {<br>
+ if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&<br>
+ Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {<br>
// The value from output queue A (denoted by register OQAP) can<br>
// only be fetched during the first cycle.<br>
return false;<br>
@@ -332,51 +363,126 @@ R600InstrInfo::isLegal(<br>
if (Vector[Src.second][j] < 0)<br>
Vector[Src.second][j] = Src.first;<br>
if (Vector[Src.second][j] != Src.first)<br>
- return false;<br>
+ return i;<br>
}<br>
}<br>
- return true;<br>
+ // Now check Trans Alu<br>
+ for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {<br>
+ const std::pair<int, unsigned> &Src = TransSrcs[i];<br>
+ unsigned Cycle = getTransSwizzle(TransSwz, i);<br>
+ if (Src.first < 0)<br>
+ continue;<br>
+ if (Src.first == 255)<br>
+ continue;<br>
+ if (Vector[Src.second][Cycle] < 0)<br>
+ Vector[Src.second][Cycle] = Src.first;<br>
+ if (Vector[Src.second][Cycle] != Src.first)<br>
+ return IGSrcs.size() - 1;<br>
+ }<br>
+ return IGSrcs.size();<br>
}<br>
<br>
-bool<br>
-R600InstrInfo::recursiveFitsFPLimitation(<br>
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
- std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
- unsigned Depth) const {<br>
- if (!isLegal(IGSrcs, SwzCandidate, Depth))<br>
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next<br>
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after<br>
+/// Idx can be skipped<br>
+static bool<br>
+NextPossibleSolution(<br>
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
+ unsigned Idx) {<br>
+ assert(Idx < SwzCandidate.size());<br>
+ int ResetIdx = Idx;<br>
+ while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)<br>
+ ResetIdx --;<br>
+ for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {<br>
+ SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;<br>
+ }<br>
+ if (ResetIdx == -1)<br>
return false;<br>
- if (IGSrcs.size() == Depth)<br>
- return true;<br>
- unsigned i = SwzCandidate[Depth];<br>
- for (; i < 6; i++) {<br>
- SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;<br>
- if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))<br>
+ SwzCandidate[ResetIdx]++;<br>
+ return true;<br>
+}<br>
+<br>
+/// Enumerate all possible Swizzle sequence to find one that can meet all<br>
+/// read port requirements.<br>
+bool R600InstrInfo::FindSwizzleForVectorSlot(<br>
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+ R600InstrInfo::BankSwizzle TransSwz) const {<br>
+ unsigned ValidUpTo = 0;<br>
+ do {<br>
+ ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);<br>
+ if (ValidUpTo == IGSrcs.size())<br>
return true;<br>
- }<br>
- SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;<br>
+ } while (NextPossibleSolution(SwzCandidate, ValidUpTo));<br>
return false;<br>
}<br>
<br>
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read<br>
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.<br>
+static bool<br>
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,<br>
+ const std::vector<std::pair<int, unsigned> > &TransOps,<br>
+ unsigned ConstCount) {<br>
+ for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {<br>
+ const std::pair<int, unsigned> &Src = TransOps[i];<br>
+ unsigned Cycle = getTransSwizzle(TransSwz, i);<br>
+ if (Src.first < 0)<br>
+ continue;<br>
+ if (ConstCount > 0 && Cycle == 0)<br>
+ return false;<br>
+ if (ConstCount > 1 && Cycle == 1)<br>
+ return false;<br>
+ }<br>
+ return true;<br>
+}<br>
+<br>
bool<br>
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,<br>
- const DenseMap<unsigned, unsigned> &PV,<br>
- std::vector<BankSwizzle> &ValidSwizzle)<br>
+ const DenseMap<unsigned, unsigned> &PV,<br>
+ std::vector<BankSwizzle> &ValidSwizzle,<br>
+ bool isLastAluTrans)<br>
const {<br>
//Todo : support shared src0 - src1 operand<br>
<br>
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;<br>
ValidSwizzle.clear();<br>
+ unsigned ConstCount;<br>
+ BankSwizzle TransBS;<br>
for (unsigned i = 0, e = IG.size(); i < e; ++i) {<br>
- IGSrcs.push_back(ExtractSrcs(IG[i], PV));<br>
+ IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));<br>
unsigned Op = getOperandIdx(IG[i]->getOpcode(),<br>
AMDGPU::OpName::bank_swizzle);<br>
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)<br>
IG[i]->getOperand(Op).getImm());<br>
}<br>
- bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);<br>
- if (!Result)<br>
- return false;<br>
- return true;<br>
+ std::vector<std::pair<int, unsigned> > TransOps;<br>
+ if (!isLastAluTrans)<br>
+ return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);<br>
+<br>
+ TransOps = IGSrcs.back();<br>
+ IGSrcs.pop_back();<br>
+ ValidSwizzle.pop_back();<br>
+<br>
+ static const R600InstrInfo::BankSwizzle TransSwz[] = {<br>
+ ALU_VEC_012_SCL_210,<br>
+ ALU_VEC_021_SCL_122,<br>
+ ALU_VEC_120_SCL_212,<br>
+ ALU_VEC_102_SCL_221<br>
+ };<br>
+ for (unsigned i = 0; i < 4; i++) {<br>
+ TransBS = TransSwz[i];<br>
+ if (!isConstCompatible(TransBS, TransOps, ConstCount))<br>
+ continue;<br>
+ bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,<br>
+ TransBS);<br>
+ if (Result) {<br>
+ ValidSwizzle.push_back(TransBS);<br>
+ return true;<br>
+ }<br>
+ }<br>
+<br>
+ return false;<br>
}<br>
<br>
<br>
@@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(<br>
}<br>
<br>
bool<br>
-R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {<br>
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)<br>
+ const {<br>
std::vector<unsigned> Consts;<br>
for (unsigned i = 0, n = MIs.size(); i < n; i++) {<br>
MachineInstr *MI = MIs[i];<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)<br>
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Sat Jun 29 14:32:43 2013<br>
@@ -84,26 +84,38 @@ namespace llvm {<br>
SmallVector<std::pair<MachineOperand *, int64_t>, 3><br>
getSrcs(MachineInstr *MI) const;<br>
<br>
- bool isLegal(<br>
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
- const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
- unsigned CheckedSize) const;<br>
- bool recursiveFitsFPLimitation(<br>
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
- std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
- unsigned Depth = 0) const;<br>
+ unsigned isLegalUpTo(<br>
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,<br>
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+ R600InstrInfo::BankSwizzle TransSwz) const;<br>
+<br>
+ bool FindSwizzleForVectorSlot(<br>
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,<br>
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,<br>
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,<br>
+ R600InstrInfo::BankSwizzle TransSwz) const;<br>
<br>
/// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210<br>
/// returns true and the first (in lexical order) BankSwizzle affectation<br>
/// starting from the one already provided in the Instruction Group MIs that<br>
/// fits Read Port limitations in BS if available. Otherwise returns false<br>
/// and undefined content in BS.<br>
+ /// isLastAluTrans should be set if the last Alu of MIs will be executed on<br>
+ /// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to<br>
+ /// apply to the last instruction.<br>
/// PV holds GPR to PV registers in the Instruction Group MIs.<br>
bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,<br>
const DenseMap<unsigned, unsigned> &PV,<br>
- std::vector<BankSwizzle> &BS) const;<br>
+ std::vector<BankSwizzle> &BS,<br>
+ bool isLastAluTrans) const;<br>
+<br>
+ /// An instruction group can only access 2 channel pair (either [XY] or [ZW])<br>
+ /// from KCache bank on R700+. This function check if MI set in input meet<br>
+ /// this limitations<br>
+ bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;<br>
+ /// Same but using const index set instead of MI set.<br>
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;<br>
- bool canBundle(const std::vector<MachineInstr *> &) const;<br>
<br>
/// \breif Vector instructions are instructions that must fill all<br>
/// instruction slots within an instruction group.<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600Instructions.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600Instructions.td (original)<br>
+++ llvm/trunk/lib/Target/R600/R600Instructions.td Sat Jun 29 14:32:43 2013<br>
@@ -1489,6 +1489,8 @@ let hasSideEffects = 1 in {<br>
<br>
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {<br>
let Pattern = [];<br>
+ let TransOnly = 0;<br>
+ let Itinerary = AnyALU;<br>
}<br>
<br>
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp (original)<br>
+++ llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp Sat Jun 29 14:32:43 2013<br>
@@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(Sched<br>
MRI = &DAG->MRI;<br>
CurInstKind = IDOther;<br>
CurEmitted = 0;<br>
- OccupedSlotsMask = 15;<br>
+ OccupedSlotsMask = 31;<br>
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();<br>
InstKindLimit[IDOther] = 32;<br>
<br>
@@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit<br>
if (NextInstKind != CurInstKind) {<br>
DEBUG(dbgs() << "Instruction Type Switch\n");<br>
if (NextInstKind != IDAlu)<br>
- OccupedSlotsMask = 15;<br>
+ OccupedSlotsMask |= 31;<br>
CurEmitted = 0;<br>
CurInstKind = NextInstKind;<br>
}<br>
@@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClas<br>
R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {<br>
MachineInstr *MI = SU->getInstr();<br>
<br>
+ if (TII->isTransOnly(MI))<br>
+ return AluTrans;<br>
+<br>
switch (MI->getOpcode()) {<br>
case AMDGPU::PRED_X:<br>
return AluPredX;<br>
@@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::v<br>
It != E; ++It) {<br>
SUnit *SU = *It;<br>
InstructionsGroupCandidate.push_back(SU->getInstr());<br>
- if (TII->canBundle(InstructionsGroupCandidate)) {<br>
+ if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {<br>
InstructionsGroupCandidate.pop_back();<br>
Q.erase((It + 1).base());<br>
return SU;<br>
@@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAl<br>
return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +<br>
AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +<br>
AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +<br>
- AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();<br>
+ AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +<br>
+ AvailableAlus[AluPredX].size();<br>
}<br>
<br>
SUnit* R600SchedStrategy::pickAlu() {<br>
@@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() {<br>
if (!OccupedSlotsMask) {<br>
// Bottom up scheduling : predX must comes first<br>
if (!AvailableAlus[AluPredX].empty()) {<br>
- OccupedSlotsMask = 15;<br>
+ OccupedSlotsMask |= 31;<br>
return PopInst(AvailableAlus[AluPredX]);<br>
}<br>
// Flush physical reg copies (RA will discard them)<br>
if (!AvailableAlus[AluDiscarded].empty()) {<br>
- OccupedSlotsMask = 15;<br>
+ OccupedSlotsMask |= 31;<br>
return PopInst(AvailableAlus[AluDiscarded]);<br>
}<br>
// If there is a T_XYZW alu available, use it<br>
if (!AvailableAlus[AluT_XYZW].empty()) {<br>
- OccupedSlotsMask = 15;<br>
+ OccupedSlotsMask |= 15;<br>
return PopInst(AvailableAlus[AluT_XYZW]);<br>
}<br>
}<br>
+ bool TransSlotOccuped = OccupedSlotsMask & 16;<br>
+ if (!TransSlotOccuped) {<br>
+ if (!AvailableAlus[AluTrans].empty()) {<br>
+ OccupedSlotsMask |= 16;<br>
+ return PopInst(AvailableAlus[AluTrans]);<br>
+ }<br>
+ }<br>
for (int Chan = 3; Chan > -1; --Chan) {<br>
bool isOccupied = OccupedSlotsMask & (1 << Chan);<br>
if (!isOccupied) {<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.h?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.h?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600MachineScheduler.h (original)<br>
+++ llvm/trunk/lib/Target/R600/R600MachineScheduler.h Sat Jun 29 14:32:43 2013<br>
@@ -46,6 +46,7 @@ class R600SchedStrategy : public Machine<br>
AluT_W,<br>
AluT_XYZW,<br>
AluPredX,<br>
+ AluTrans,<br>
AluDiscarded, // LLVM Instructions that are going to be eliminated<br>
AluLast<br>
};<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600Packetizer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Packetizer.cpp?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Packetizer.cpp?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600Packetizer.cpp (original)<br>
+++ llvm/trunk/lib/Target/R600/R600Packetizer.cpp Sat Jun 29 14:32:43 2013<br>
@@ -77,8 +77,6 @@ private:<br>
do {<br>
if (TII->isPredicated(BI))<br>
continue;<br>
- if (TII->isTransOnly(BI))<br>
- continue;<br>
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);<br>
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)<br>
continue;<br>
@@ -87,6 +85,10 @@ private:<br>
continue;<br>
}<br>
unsigned Dst = BI->getOperand(DstIdx).getReg();<br>
+ if (TII->isTransOnly(BI)) {<br>
+ Result[Dst] = AMDGPU::PS;<br>
+ continue;<br>
+ }<br>
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||<br>
BI->getOpcode() == AMDGPU::DOT4_eg) {<br>
Result[Dst] = AMDGPU::PV_X;<br>
@@ -157,10 +159,6 @@ public:<br>
return true;<br>
if (!TII->isALUInstr(MI->getOpcode()))<br>
return true;<br>
- if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)<br>
- return true;<br>
- if (TII->isTransOnly(MI))<br>
- return true;<br>
if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)<br>
return true;<br>
return false;<br>
@@ -170,7 +168,7 @@ public:<br>
// together.<br>
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {<br>
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();<br>
- if (getSlot(MII) <= getSlot(MIJ))<br>
+ if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))<br>
return false;<br>
// Does MII and MIJ share the same pred_sel ?<br>
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),<br>
@@ -204,11 +202,16 @@ public:<br>
MI->getOperand(LastOp).setImm(Bit);<br>
}<br>
<br>
- MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {<br>
+ bool isBundlableWithCurrentPMI(MachineInstr *MI,<br>
+ const DenseMap<unsigned, unsigned> &PV,<br>
+ std::vector<R600InstrInfo::BankSwizzle> &BS,<br>
+ bool &isTransSlot) {<br>
+ isTransSlot = TII->isTransOnly(MI);<br>
+<br>
+ // Are the Constants limitations met ?<br>
CurrentPacketMIs.push_back(MI);<br>
- bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);<br>
- DEBUG(<br>
- if (!FitsConstLimits) {<br>
+ if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {<br>
+ DEBUG(<br>
dbgs() << "Couldn't pack :\n";<br>
MI->dump();<br>
dbgs() << "with the following packets :\n";<br>
@@ -217,14 +220,15 @@ public:<br>
dbgs() << "\n";<br>
}<br>
dbgs() << "because of Consts read limitations\n";<br>
- });<br>
- const DenseMap<unsigned, unsigned> &PV =<br>
- getPreviousVector(CurrentPacketMIs.front());<br>
- std::vector<R600InstrInfo::BankSwizzle> BS;<br>
- bool FitsReadPortLimits =<br>
- TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);<br>
- DEBUG(<br>
- if (!FitsReadPortLimits) {<br>
+ );<br>
+ CurrentPacketMIs.pop_back();<br>
+ return false;<br>
+ }<br>
+<br>
+ // Is there a BankSwizzle set that meet Read Port limitations ?<br>
+ if (!TII->fitsReadPortLimitations(CurrentPacketMIs,<br>
+ PV, BS, isTransSlot)) {<br>
+ DEBUG(<br>
dbgs() << "Couldn't pack :\n";<br>
MI->dump();<br>
dbgs() << "with the following packets :\n";<br>
@@ -233,25 +237,43 @@ public:<br>
dbgs() << "\n";<br>
}<br>
dbgs() << "because of Read port limitations\n";<br>
- });<br>
- bool isBundlable = FitsConstLimits && FitsReadPortLimits;<br>
- if (isBundlable) {<br>
+ );<br>
+ CurrentPacketMIs.pop_back();<br>
+ return false;<br>
+ }<br>
+<br>
+ CurrentPacketMIs.pop_back();<br>
+ return true;<br>
+ }<br>
+<br>
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {<br>
+ MachineBasicBlock::iterator FirstInBundle =<br>
+ CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();<br>
+ const DenseMap<unsigned, unsigned> &PV =<br>
+ getPreviousVector(FirstInBundle);<br>
+ std::vector<R600InstrInfo::BankSwizzle> BS;<br>
+ bool isTransSlot;<br>
+<br>
+ if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {<br>
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {<br>
MachineInstr *MI = CurrentPacketMIs[i];<br>
- unsigned Op = TII->getOperandIdx(MI->getOpcode(),<br>
- AMDGPU::OpName::bank_swizzle);<br>
- MI->getOperand(Op).setImm(BS[i]);<br>
+ unsigned Op = TII->getOperandIdx(MI->getOpcode(),<br>
+ AMDGPU::OpName::bank_swizzle);<br>
+ MI->getOperand(Op).setImm(BS[i]);<br>
+ }<br>
+ unsigned Op = TII->getOperandIdx(MI->getOpcode(),<br>
+ AMDGPU::OpName::bank_swizzle);<br>
+ MI->getOperand(Op).setImm(BS.back());<br>
+ if (!CurrentPacketMIs.empty())<br>
+ setIsLastBit(CurrentPacketMIs.back(), 0);<br>
+ substitutePV(MI, PV);<br>
+ MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);<br>
+ if (isTransSlot) {<br>
+ endPacket(llvm::next(It)->getParent(), llvm::next(It));<br>
}<br>
+ return It;<br>
}<br>
- CurrentPacketMIs.pop_back();<br>
- if (!isBundlable) {<br>
- endPacket(MI->getParent(), MI);<br>
- substitutePV(MI, getPreviousVector(MI));<br>
- return VLIWPacketizerList::addToPacket(MI);<br>
- }<br>
- if (!CurrentPacketMIs.empty())<br>
- setIsLastBit(CurrentPacketMIs.back(), 0);<br>
- substitutePV(MI, PV);<br>
+ endPacket(MI->getParent(), MI);<br>
return VLIWPacketizerList::addToPacket(MI);<br>
}<br>
};<br>
<br>
Modified: llvm/trunk/lib/Target/R600/R600RegisterInfo.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600RegisterInfo.td?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600RegisterInfo.td?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/R600/R600RegisterInfo.td (original)<br>
+++ llvm/trunk/lib/Target/R600/R600RegisterInfo.td Sat Jun 29 14:32:43 2013<br>
@@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254,<br>
def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;<br>
def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;<br>
def PV_W : R600RegWithChan<"PV.W", 254, "W">;<br>
+def PS: R600Reg<"PS", 255>;<br>
def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;<br>
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;<br>
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/fdiv.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fdiv.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fdiv.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/fdiv.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/fdiv.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,13 +1,13 @@<br>
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
<br>
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {<br>
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/fp_to_sint.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_sint.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_sint.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/fp_to_sint.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/fp_to_sint.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,10 +1,10 @@<br>
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
; CHECK: @fp_to_sint_v4i32<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {<br>
%value = load <4 x float> addrspace(1) * %in<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/fp_to_uint.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_uint.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fp_to_uint.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/fp_to_uint.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/fp_to_uint.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,10 +1,10 @@<br>
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
; CHECK: @fp_to_uint_v4i32<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {<br>
%value = load <4 x float> addrspace(1) * %in<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/llvm.cos.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.cos.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.cos.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/llvm.cos.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/llvm.cos.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,6 +1,6 @@<br>
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
-;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
define void @test() {<br>
%r0 = call float @llvm.R600.load.input(i32 0)<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/llvm.pow.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.pow.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.pow.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/llvm.pow.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/llvm.pow.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,8 +1,8 @@<br>
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}<br>
+;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
define void @test() {<br>
%r0 = call float @llvm.R600.load.input(i32 0)<br>
<br>
Modified: llvm/trunk/test/CodeGen/R600/llvm.sin.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sin.ll?rev=185268&r1=185267&r2=185268&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.sin.ll?rev=185268&r1=185267&r2=185268&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/test/CodeGen/R600/llvm.sin.ll (original)<br>
+++ llvm/trunk/test/CodeGen/R600/llvm.sin.ll Sat Jun 29 14:32:43 2013<br>
@@ -1,6 +1,6 @@<br>
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s<br>
<br>
-;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}<br>
+;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}<br>
<br>
define void @test() {<br>
%r0 = call float @llvm.R600.load.input(i32 0)<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div>Alexey Samsonov, MSK</div>
</div>