R600: Various Fixes
Tom Stellard
tom at stellard.net
Tue Jul 9 07:19:24 PDT 2013
On Tue, Jul 09, 2013 at 06:34:42AM -0700, Vincent Lejeune wrote:
> Patch 2 can be a good candidate for the stable branch,
Ok, can you mark it as a candidate before you commit
> Patch 5 too (updated version attached) but I'm not sure it will apply cleanly.
>
>
Patch 5 is:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
Go ahead and mark this as a stable candidate too, I will try to backport
it.
-Tom
>
>
> ----- Mail original -----
> > De : Tom Stellard <tom at stellard.net>
> > À : Vincent Lejeune <vljn at ovi.com>
> > Cc : "llvm-commits at cs.uiuc.edu" <llvm-commits at cs.uiuc.edu>
> > Envoyé le : Lundi 8 juillet 2013 18h11
> > Objet : Re: R600: Various Fixes
> >
> > On Sun, Jul 07, 2013 at 07:34:27AM -0700, Vincent Lejeune wrote:
> >> Hi,
> >>
> >> This patches serie fixes some bugs uncovered by running some webgl samples.
> >>
> >> Patches 1, 3, 4 fix 2 bugs in the VectorMerger pass.
> >> Patch 2 makes fcos/fsin works for all values, and not only the one between
> > -Pi and Pi.
> >> Last patch rework the predication we're doing by taking clause marker
> > into account.
> >> Currently we loose some predicate opportunities (all machine block with
> > Kcache constant
> >> becomes unpredicable atm, this can be solved later but need the
> > isPredicableFunction to accept
> >> an extra argument) but the pass output should be correct in most
> > situations.
> >>
> >>
> >
> > Patches 1-4 are:
> >
> > Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> >
> > Are any of these candidates for stable?
> >
> > -Tom
> >
> >> From cc427492b38d672f33ffd13ac3762d8064476663 Mon Sep 17 00:00:00 2001
> >> From: Vincent Lejeune <vljn at ovi.com>
> >> Date: Wed, 3 Jul 2013 21:25:38 +0200
> >> Subject: [PATCH 5/5] R600: Do not predicated basic block with multiple alu
> >> clause
> >>
> >> Test is not included as it is several 1000 lines long.
> >> To test this functionnality, a test case must generate at least 2 ALU
> > clauses,
> >> where an ALU clause is ~110 instructions long.
> >> ---
> >> lib/Target/R600/AMDGPUTargetMachine.cpp | 3 +-
> >> lib/Target/R600/R600ControlFlowFinalizer.cpp | 2 ++
> >> lib/Target/R600/R600EmitClauseMarkers.cpp | 10 ++++---
> >> lib/Target/R600/R600InstrInfo.cpp | 45
> > ++++++++++++++++++++++++++++
> >> lib/Target/R600/R600Instructions.td | 2 +-
> >> lib/Target/R600/R600Packetizer.cpp | 3 +-
> >> test/CodeGen/R600/jump-address.ll | 2 +-
> >> 7 files changed, 58 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp
> > b/lib/Target/R600/AMDGPUTargetMachine.cpp
> >> index 90f72de..59b1fd6 100644
> >> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> >> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> >> @@ -148,7 +148,7 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
> >> }
> >>
> >> bool AMDGPUPassConfig::addPreSched2() {
> >> -
> >> + addPass(createR600EmitClauseMarkers(*TM));
> >
> > Does adding this pass here mean it will be run for SI too?
> >
> >> addPass(&IfConverterID);
> >> return false;
> >> }
> >> @@ -158,7 +158,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
> >> if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> >> addPass(createAMDGPUCFGPreparationPass(*TM));
> >> addPass(createAMDGPUCFGStructurizerPass(*TM));
> >> - addPass(createR600EmitClauseMarkers(*TM));
> >> addPass(createR600ExpandSpecialInstrsPass(*TM));
> >> addPass(&FinalizeMachineBundlesID);
> >> addPass(createR600Packetizer(*TM));
> >> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> > b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> >> index 887c808..932a6a7 100644
> >> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> >> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> >> @@ -256,6 +256,7 @@ private:
> >> ClauseContent.push_back(MILit);
> >> }
> >> }
> >> + assert(ClauseContent.size() < 128 && "ALU clause is
> > too big");
> >> ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
> >> return ClauseFile(ClauseHead, ClauseContent);
> >> }
> >> @@ -276,6 +277,7 @@ private:
> >> void
> >> EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile
> > &Clause,
> >> unsigned &CfCount) {
> >> + Clause.first->getOperand(0).setImm(0);
> >> CounterPropagateAddr(Clause.first, CfCount);
> >> MachineBasicBlock *BB = Clause.first->getParent();
> >> BuildMI(BB, InsertPos->getDebugLoc(),
> > TII->get(AMDGPU::ALU_CLAUSE))
> >> diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp
> > b/lib/Target/R600/R600EmitClauseMarkers.cpp
> >> index 0aea2d7..8056634 100644
> >> --- a/lib/Target/R600/R600EmitClauseMarkers.cpp
> >> +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
> >> @@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public
> > MachineFunctionPass {
> >> private:
> >> static char ID;
> >> const R600InstrInfo *TII;
> >> + int address;
> >
> > Coding Style address should be capitalized.
> >>
> >> unsigned OccupiedDwords(MachineInstr *MI) const {
> >> switch (MI->getOpcode()) {
> >> @@ -159,7 +160,7 @@ private:
> >> }
> >>
> >> MachineBasicBlock::iterator
> >> - MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
> > const {
> >> + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
> > {
> >> MachineBasicBlock::iterator ClauseHead = I;
> >> std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
> >> bool PushBeforeModifier = false;
> >> @@ -199,20 +200,21 @@ private:
> >> unsigned Opcode = PushBeforeModifier ?
> >> AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
> >> BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
> > TII->get(Opcode))
> >> - .addImm(0) // ADDR
> >> + .addImm(address++) // ADDR
> >> .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
> >> .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
> >> .addImm(KCacheBanks.empty()?0:2) // KM0
> >> .addImm((KCacheBanks.size() < 2)?0:2) // KM1
> >> .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
> >> .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) //
> > KLINE1
> >> - .addImm(AluInstCount); // COUNT
> >> + .addImm(AluInstCount) // COUNT
> >> + .addImm(1); // Enabled
> >> return I;
> >> }
> >>
> >> public:
> >> R600EmitClauseMarkersPass(TargetMachine &tm) :
> > MachineFunctionPass(ID),
> >> - TII(0) { }
> >> + TII(0), address(0) { }
> >>
> >> virtual bool runOnMachineFunction(MachineFunction &MF) {
> >> TII = static_cast<const R600InstrInfo
> > *>(MF.getTarget().getInstrInfo());
> >> diff --git a/lib/Target/R600/R600InstrInfo.cpp
> > b/lib/Target/R600/R600InstrInfo.cpp
> >> index 974a579..8d622d6 100644
> >> --- a/lib/Target/R600/R600InstrInfo.cpp
> >> +++ b/lib/Target/R600/R600InstrInfo.cpp
> >> @@ -650,6 +650,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand
> > &op) const {
> >> };
> >> }
> >>
> >> +static
> >> +MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB)
> > {
> >> + for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E =
> > MBB.rend();
> >> + It != E; ++It) {
> >> + if (It->getOpcode() == AMDGPU::CF_ALU ||
> >> + It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
> >> + return llvm::prior(It.base());
> >> + }
> >> + return MBB.end();
> >> +}
> >> +
> >> unsigned
> >> R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
> >> MachineBasicBlock *TBB,
> >> @@ -671,6 +682,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock
> > &MBB,
> >> BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
> >> .addMBB(TBB)
> >> .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
> >> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> >> + if (CfAlu == MBB.end())
> >> + return 1;
> >> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
> >> + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
> >> return 1;
> >> }
> >> } else {
> >> @@ -682,6 +698,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock
> > &MBB,
> >> .addMBB(TBB)
> >> .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
> >> BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
> >> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> >> + if (CfAlu == MBB.end())
> >> + return 2;
> >> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
> >> + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
> >> return 2;
> >> }
> >> }
> >> @@ -705,6 +726,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock
> > &MBB) const {
> >> MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
> >> clearFlag(predSet, 0, MO_FLAG_PUSH);
> >> I->eraseFromParent();
> >> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> >> + if (CfAlu == MBB.end())
> >> + break;
> >> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
> >> + CfAlu->setDesc(get(AMDGPU::CF_ALU));
> >> break;
> >> }
> >> case AMDGPU::JUMP:
> >> @@ -725,6 +751,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock
> > &MBB) const {
> >> MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
> >> clearFlag(predSet, 0, MO_FLAG_PUSH);
> >> I->eraseFromParent();
> >> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> >> + if (CfAlu == MBB.end())
> >> + break;
> >> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
> >> + CfAlu->setDesc(get(AMDGPU::CF_ALU));
> >> break;
> >> }
> >> case AMDGPU::JUMP:
> >> @@ -759,6 +790,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
> >>
> >> if (MI->getOpcode() == AMDGPU::KILLGT) {
> >> return false;
> >> + } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
> >> + // If the clause start in the middle of MBB then the MBB has more
> >> + // than a single clause, unable to predicate several clauses.
> >> + if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
> >> + return false;
> >> + // TODO: We don't support KC merging atm
> >> + if (MI->getOperand(3).getImm() != 0 ||
> > MI->getOperand(4).getImm() != 0)
> >> + return false;
> >> + return true;
> >> } else if (isVector(*MI)) {
> >> return false;
> >> } else {
> >> @@ -854,6 +894,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
> >> const SmallVectorImpl<MachineOperand>
> > &Pred) const {
> >> int PIdx = MI->findFirstPredOperandIdx();
> >>
> >> + if (MI->getOpcode() == AMDGPU::CF_ALU) {
> >> + MI->getOperand(8).setImm(0);
> >> + return true;
> >> + }
> >> +
> >> if (PIdx != -1) {
> >> MachineOperand &PMO = MI->getOperand(PIdx);
> >> PMO.setReg(Pred[2].getReg());
> >> diff --git a/lib/Target/R600/R600Instructions.td
> > b/lib/Target/R600/R600Instructions.td
> >> index 735dcfc..df5c438 100644
> >> --- a/lib/Target/R600/R600Instructions.td
> >> +++ b/lib/Target/R600/R600Instructions.td
> >> @@ -563,7 +563,7 @@ class ALU_CLAUSE<bits<4> inst, string
> > OpName> : AMDGPUInst <(outs),
> >> (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
> >> KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
> >> i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
> >> -i32imm:$COUNT),
> >> +i32imm:$COUNT, i32imm:$Enabled),
> >> !strconcat(OpName, " $COUNT, @$ADDR, "
> >> "KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
> >> [] >, CF_ALU_WORD0, CF_ALU_WORD1 {
> >> diff --git a/lib/Target/R600/R600Packetizer.cpp
> > b/lib/Target/R600/R600Packetizer.cpp
> >> index 5ee51fa..f4219bd 100644
> >> --- a/lib/Target/R600/R600Packetizer.cpp
> >> +++ b/lib/Target/R600/R600Packetizer.cpp
> >> @@ -304,7 +304,8 @@ bool
> > R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
> >> MachineBasicBlock::iterator End = MBB->end();
> >> MachineBasicBlock::iterator MI = MBB->begin();
> >> while (MI != End) {
> >> - if (MI->isKill()) {
> >> + if (MI->isKill() ||
> >> + (MI->getOpcode() == AMDGPU::CF_ALU &&
> > !MI->getOperand(8).getImm())) {
> >> MachineBasicBlock::iterator DeleteMI = MI;
> >> ++MI;
> >> MBB->erase(DeleteMI);
> >> diff --git a/test/CodeGen/R600/jump-address.ll
> > b/test/CodeGen/R600/jump-address.ll
> >> index ae9c8bb..9a5f1bc 100644
> >> --- a/test/CodeGen/R600/jump-address.ll
> >> +++ b/test/CodeGen/R600/jump-address.ll
> >> @@ -1,6 +1,6 @@
> >> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> >>
> >> -; CHECK: JUMP @3
> >> +; CHECK: JUMP @7
> >> ; CHECK: EXPORT
> >> ; CHECK-NOT: EXPORT
> >>
> >> --
> >> 1.8.3.1
> >>
> >
> >> _______________________________________________
> >> llvm-commits mailing list
> >> llvm-commits at cs.uiuc.edu
> >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >
> From 8cf2e33a07816f650585212e2158fb17b70c1ca9 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 3 Jul 2013 21:25:38 +0200
> Subject: [PATCH] R600: Do not predicated basic block with multiple alu clause
>
> Test is not included as it is several 1000 lines long.
> To test this functionnality, a test case must generate at least 2 ALU clauses,
> where an ALU clause is ~110 instructions long.
> ---
> lib/Target/R600/AMDGPUTargetMachine.cpp | 5 +++-
> lib/Target/R600/R600ControlFlowFinalizer.cpp | 2 ++
> lib/Target/R600/R600EmitClauseMarkers.cpp | 14 ++++++---
> lib/Target/R600/R600InstrInfo.cpp | 45 ++++++++++++++++++++++++++++
> lib/Target/R600/R600Instructions.td | 2 +-
> lib/Target/R600/R600Packetizer.cpp | 3 +-
> test/CodeGen/R600/jump-address.ll | 2 +-
> 7 files changed, 65 insertions(+), 8 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 90f72de..7a14e50 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -148,7 +148,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
> }
>
> bool AMDGPUPassConfig::addPreSched2() {
> + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
>
> + if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> + addPass(createR600EmitClauseMarkers(*TM));
> + }
> addPass(&IfConverterID);
> return false;
> }
> @@ -158,7 +162,6 @@ bool AMDGPUPassConfig::addPreEmitPass() {
> if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> addPass(createAMDGPUCFGPreparationPass(*TM));
> addPass(createAMDGPUCFGStructurizerPass(*TM));
> - addPass(createR600EmitClauseMarkers(*TM));
> addPass(createR600ExpandSpecialInstrsPass(*TM));
> addPass(&FinalizeMachineBundlesID);
> addPass(createR600Packetizer(*TM));
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index 887c808..932a6a7 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -256,6 +256,7 @@ private:
> ClauseContent.push_back(MILit);
> }
> }
> + assert(ClauseContent.size() < 128 && "ALU clause is too big");
> ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
> return ClauseFile(ClauseHead, ClauseContent);
> }
> @@ -276,6 +277,7 @@ private:
> void
> EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
> unsigned &CfCount) {
> + Clause.first->getOperand(0).setImm(0);
> CounterPropagateAddr(Clause.first, CfCount);
> MachineBasicBlock *BB = Clause.first->getParent();
> BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
> diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
> index 0aea2d7..c1da64c 100644
> --- a/lib/Target/R600/R600EmitClauseMarkers.cpp
> +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
> @@ -32,6 +32,7 @@ class R600EmitClauseMarkersPass : public MachineFunctionPass {
> private:
> static char ID;
> const R600InstrInfo *TII;
> + int Address;
>
> unsigned OccupiedDwords(MachineInstr *MI) const {
> switch (MI->getOpcode()) {
> @@ -159,7 +160,7 @@ private:
> }
>
> MachineBasicBlock::iterator
> - MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
> + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
> MachineBasicBlock::iterator ClauseHead = I;
> std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
> bool PushBeforeModifier = false;
> @@ -199,20 +200,25 @@ private:
> unsigned Opcode = PushBeforeModifier ?
> AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
> BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
> - .addImm(0) // ADDR
> + // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
> + // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
> + // pass may assume that identical ALU clause starter at the beginning of a
> + // true and false branch can be factorized which is not the case.
> + .addImm(Address++) // ADDR
> .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
> .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
> .addImm(KCacheBanks.empty()?0:2) // KM0
> .addImm((KCacheBanks.size() < 2)?0:2) // KM1
> .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
> .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
> - .addImm(AluInstCount); // COUNT
> + .addImm(AluInstCount) // COUNT
> + .addImm(1); // Enabled
> return I;
> }
>
> public:
> R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
> - TII(0) { }
> + TII(0), Address(0) { }
>
> virtual bool runOnMachineFunction(MachineFunction &MF) {
> TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
> diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
> index 969a7ce..d0935fa 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -651,6 +651,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
> };
> }
>
> +static
> +MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
> + for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
> + It != E; ++It) {
> + if (It->getOpcode() == AMDGPU::CF_ALU ||
> + It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
> + return llvm::prior(It.base());
> + }
> + return MBB.end();
> +}
> +
> unsigned
> R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
> MachineBasicBlock *TBB,
> @@ -672,6 +683,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
> BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
> .addMBB(TBB)
> .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> + if (CfAlu == MBB.end())
> + return 1;
> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
> + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
> return 1;
> }
> } else {
> @@ -683,6 +699,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
> .addMBB(TBB)
> .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
> BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> + if (CfAlu == MBB.end())
> + return 2;
> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
> + CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
> return 2;
> }
> }
> @@ -706,6 +727,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
> MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
> clearFlag(predSet, 0, MO_FLAG_PUSH);
> I->eraseFromParent();
> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> + if (CfAlu == MBB.end())
> + break;
> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
> + CfAlu->setDesc(get(AMDGPU::CF_ALU));
> break;
> }
> case AMDGPU::JUMP:
> @@ -726,6 +752,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
> MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
> clearFlag(predSet, 0, MO_FLAG_PUSH);
> I->eraseFromParent();
> + MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
> + if (CfAlu == MBB.end())
> + break;
> + assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
> + CfAlu->setDesc(get(AMDGPU::CF_ALU));
> break;
> }
> case AMDGPU::JUMP:
> @@ -760,6 +791,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
>
> if (MI->getOpcode() == AMDGPU::KILLGT) {
> return false;
> + } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
> + // If the clause start in the middle of MBB then the MBB has more
> + // than a single clause, unable to predicate several clauses.
> + if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
> + return false;
> + // TODO: We don't support KC merging atm
> + if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
> + return false;
> + return true;
> } else if (isVector(*MI)) {
> return false;
> } else {
> @@ -855,6 +895,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
> const SmallVectorImpl<MachineOperand> &Pred) const {
> int PIdx = MI->findFirstPredOperandIdx();
>
> + if (MI->getOpcode() == AMDGPU::CF_ALU) {
> + MI->getOperand(8).setImm(0);
> + return true;
> + }
> +
> if (PIdx != -1) {
> MachineOperand &PMO = MI->getOperand(PIdx);
> PMO.setReg(Pred[2].getReg());
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 735dcfc..df5c438 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -563,7 +563,7 @@ class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
> (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
> KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
> i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
> -i32imm:$COUNT),
> +i32imm:$COUNT, i32imm:$Enabled),
> !strconcat(OpName, " $COUNT, @$ADDR, "
> "KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
> [] >, CF_ALU_WORD0, CF_ALU_WORD1 {
> diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
> index 5ee51fa..f4219bd 100644
> --- a/lib/Target/R600/R600Packetizer.cpp
> +++ b/lib/Target/R600/R600Packetizer.cpp
> @@ -304,7 +304,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
> MachineBasicBlock::iterator End = MBB->end();
> MachineBasicBlock::iterator MI = MBB->begin();
> while (MI != End) {
> - if (MI->isKill()) {
> + if (MI->isKill() ||
> + (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
> MachineBasicBlock::iterator DeleteMI = MI;
> ++MI;
> MBB->erase(DeleteMI);
> diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
> index ae9c8bb..9a5f1bc 100644
> --- a/test/CodeGen/R600/jump-address.ll
> +++ b/test/CodeGen/R600/jump-address.ll
> @@ -1,6 +1,6 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> -; CHECK: JUMP @3
> +; CHECK: JUMP @7
> ; CHECK: EXPORT
> ; CHECK-NOT: EXPORT
>
> --
> 1.8.3.1
>
More information about the llvm-commits
mailing list