[PATCH] R600: Make sure to schedule AR register uses and defs in the same clause
Vincent Lejeune
vljn at ovi.com
Sun Jun 2 15:32:29 PDT 2013
Only one whitespace issue, otherwise patch is reviewed-by:vljn at ovi.com
----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : llvm-commits at cs.uiuc.edu
> Cc : Tom Stellard <thomas.stellard at amd.com>
> Envoyé le : Vendredi 31 mai 2013 20h28
> Objet : [PATCH] R600: Make sure to schedule AR register uses and defs in the
same clause
>
> From: Tom Stellard <thomas.stellard at amd.com>
>
> ---
> lib/Target/R600/R600InstrInfo.cpp | 6 +++--
> lib/Target/R600/R600MachineScheduler.cpp | 37 ++++++++++++++++++++++++--
> lib/Target/R600/R600MachineScheduler.h | 2 ++
> test/CodeGen/R600/stack-simple.ll | 45 ++++++++++++++++++++++++++++++++
> 4 files changed, 86 insertions(+), 4 deletions(-)
> create mode 100644 test/CodeGen/R600/stack-simple.ll
>
> diff --git a/lib/Target/R600/R600InstrInfo.cpp
> b/lib/Target/R600/R600InstrInfo.cpp
> index 42e62a4..14808b5 100644
> --- a/lib/Target/R600/R600InstrInfo.cpp
> +++ b/lib/Target/R600/R600InstrInfo.cpp
> @@ -791,7 +791,8 @@ MachineInstrBuilder
> R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
>
> MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
> AddrReg, ValueReg)
> - .addReg(AMDGPU::AR_X,
> RegState::Implicit);
> + .addReg(AMDGPU::AR_X,
> + RegState::Implicit |
> RegState::Kill);
> setImmOperand(Mov, AMDGPU::OpName::DST_REL, 1);
> return Mov;
> }
> @@ -808,7 +809,8 @@ MachineInstrBuilder
> R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
> MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
> ValueReg,
> AddrReg)
> - .addReg(AMDGPU::AR_X,
> RegState::Implicit);
> + .addReg(AMDGPU::AR_X,
> + RegState::Implicit |
> RegState::Kill);
> setImmOperand(Mov, AMDGPU::OpName::SRC0_REL, 1);
>
> return Mov;
> diff --git a/lib/Target/R600/R600MachineScheduler.cpp
> b/lib/Target/R600/R600MachineScheduler.cpp
> index 8d61b8c..de40277 100644
> --- a/lib/Target/R600/R600MachineScheduler.cpp
> +++ b/lib/Target/R600/R600MachineScheduler.cpp
> @@ -59,14 +59,23 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
> bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind])
> &&
> (!Available[IDFetch].empty() || !Available[IDOther].empty());
>
> - if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
> - (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
> + // We want to scheduled AR defs as soon as possible to make sure they
> aren't
> + // put in a different ALU clause from their uses.
> + if (!SU && !UnscheduledARDefs.empty()) {
> + SU = UnscheduledARDefs[0];
> + UnscheduledARDefs.erase(UnscheduledARDefs.begin());
> + NextInstKind = IDAlu;
> + }
> +
> + if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
> + (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
> // try to pick ALU
> SU = pickAlu();
> if (SU) {
> if (CurEmitted >= InstKindLimit[IDAlu])
> CurEmitted = 0;
> NextInstKind = IDAlu;
> +
extra whitespace
> }
> }
>
> @@ -84,6 +93,15 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
> NextInstKind = IDOther;
> }
>
> + // We want to schedule the AR uses as late as possible to make sure that
> + // the AR defs have been released.
> + if (!SU && !UnscheduledARUses.empty()) {
> + SU = UnscheduledARUses[0];
> + UnscheduledARUses.erase(UnscheduledARUses.begin());
> + NextInstKind = IDAlu;
> + }
> +
> +
> DEBUG(
> if (SU) {
> dbgs() << " ** Pick node **\n";
> @@ -149,6 +167,21 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
> DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
>
> int IK = getInstKind(SU);
> +
> + // Check for AR register defines
> + for (MachineInstr::const_mop_iterator I =
> SU->getInstr()->operands_begin(),
> + E =
> SU->getInstr()->operands_end();
> + I != E; ++I) {
> + if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
> + if (I->isDef()) {
> + UnscheduledARDefs.push_back(SU);
> + } else {
> + UnscheduledARUses.push_back(SU);
> + }
> + return;
> + }
> + }
> +
> // There is no export clause, we can schedule one as soon as its ready
> if (IK == IDOther)
> Available[IDOther].push_back(SU);
> diff --git a/lib/Target/R600/R600MachineScheduler.h
> b/lib/Target/R600/R600MachineScheduler.h
> index 814ae9e..4dedf70 100644
> --- a/lib/Target/R600/R600MachineScheduler.h
> +++ b/lib/Target/R600/R600MachineScheduler.h
> @@ -52,6 +52,8 @@ class R600SchedStrategy : public MachineSchedStrategy {
>
> std::vector<SUnit *> Available[IDLast], Pending[IDLast];
> std::vector<SUnit *> AvailableAlus[AluLast];
> + std::vector<SUnit *> UnscheduledARDefs;
> + std::vector<SUnit *> UnscheduledARUses;
>
> InstKind CurInstKind;
> int CurEmitted;
> diff --git a/test/CodeGen/R600/stack-simple.ll
> b/test/CodeGen/R600/stack-simple.ll
> new file mode 100644
> index 0000000..9bec183
> --- /dev/null
> +++ b/test/CodeGen/R600/stack-simple.ll
> @@ -0,0 +1,45 @@
> +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +
> +; CHECK: MOVA_INT
> +; CHECK-NOT: ALU clause
> +; CHECK: 0 + AR.x
> +; CHECK: MOVA_INT
> +; CHECK-NOT: ALU clause
> +; CHECK: 0 + AR.x
> +
> +; Function Attrs: nounwind
> +define void @stack_array_write(i32 addrspace(1)* nocapture %out, i32
> addrspace(1)* nocapture %in) #0 {
> +entry:
> + %stack = alloca [5 x i32], align 4
> + %0 = load i32 addrspace(1)* %in, align 4, !tbaa !7
> + %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
> + store i32 4, i32* %arrayidx1, align 4, !tbaa !7
> + %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
> + %1 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !7
> + %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
> + store i32 5, i32* %arrayidx3, align 4, !tbaa !7
> + %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
> + %2 = load i32* %arrayidx10, align 4, !tbaa !7
> + store i32 %2, i32 addrspace(1)* %out, align 4, !tbaa !7
> + %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
> + %3 = load i32* %arrayidx12, align 4, !tbaa !7
> + %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
> + store i32 %3, i32 addrspace(1)* %arrayidx13, align 4, !tbaa !7
> + ret void
> +}
> +
> +attributes #0 = { nounwind "less-precise-fpmad"="false"
> "no-frame-pointer-elim"="false"
> "no-frame-pointer-elim-non-leaf"="false"
> "no-infs-fp-math"="false"
> "no-nans-fp-math"="false"
> "unsafe-fp-math"="false"
> "use-soft-float"="false" }
> +
> +!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6}
> +
> +!0 = metadata !{null}
> +!1 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*)*
> @stack_array_write}
> +!2 = metadata !{null}
> +!3 = metadata !{null}
> +!4 = metadata !{null}
> +!5 = metadata !{null}
> +!6 = metadata !{null}
> +!7 = metadata !{metadata !"int", metadata !8}
> +!8 = metadata !{metadata !"omnipotent char", metadata !9}
> +!9 = metadata !{metadata !"Simple C/C++ TBAA"}
> +
> --
> 1.7.11.4
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list