[PATCH] R600: Make sure to schedule AR register uses and defs in the same clause
Tom Stellard
tom at stellard.net
Fri May 31 11:28:46 PDT 2013
From: Tom Stellard <thomas.stellard at amd.com>
---
lib/Target/R600/R600InstrInfo.cpp | 6 +++--
lib/Target/R600/R600MachineScheduler.cpp | 37 ++++++++++++++++++++++++--
lib/Target/R600/R600MachineScheduler.h | 2 ++
test/CodeGen/R600/stack-simple.ll | 45 ++++++++++++++++++++++++++++++++
4 files changed, 86 insertions(+), 4 deletions(-)
create mode 100644 test/CodeGen/R600/stack-simple.ll
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 42e62a4..14808b5 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -791,7 +791,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
AddrReg, ValueReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
setImmOperand(Mov, AMDGPU::OpName::DST_REL, 1);
return Mov;
}
@@ -808,7 +809,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
ValueReg,
AddrReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
setImmOperand(Mov, AMDGPU::OpName::SRC0_REL, 1);
return Mov;
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 8d61b8c..de40277 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -59,14 +59,23 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
(!Available[IDFetch].empty() || !Available[IDOther].empty());
- if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
- (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+ // We want to scheduled AR defs as soon as possible to make sure they aren't
+ // put in a different ALU clause from their uses.
+ if (!SU && !UnscheduledARDefs.empty()) {
+ SU = UnscheduledARDefs[0];
+ UnscheduledARDefs.erase(UnscheduledARDefs.begin());
+ NextInstKind = IDAlu;
+ }
+
+ if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+ (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
// try to pick ALU
SU = pickAlu();
if (SU) {
if (CurEmitted >= InstKindLimit[IDAlu])
CurEmitted = 0;
NextInstKind = IDAlu;
+
}
}
@@ -84,6 +93,15 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
NextInstKind = IDOther;
}
+ // We want to schedule the AR uses as late as possible to make sure that
+ // the AR defs have been released.
+ if (!SU && !UnscheduledARUses.empty()) {
+ SU = UnscheduledARUses[0];
+ UnscheduledARUses.erase(UnscheduledARUses.begin());
+ NextInstKind = IDAlu;
+ }
+
+
DEBUG(
if (SU) {
dbgs() << " ** Pick node **\n";
@@ -149,6 +167,21 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
int IK = getInstKind(SU);
+
+ // Check for AR register defines
+ for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
+ E = SU->getInstr()->operands_end();
+ I != E; ++I) {
+ if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
+ if (I->isDef()) {
+ UnscheduledARDefs.push_back(SU);
+ } else {
+ UnscheduledARUses.push_back(SU);
+ }
+ return;
+ }
+ }
+
// There is no export clause, we can schedule one as soon as its ready
if (IK == IDOther)
Available[IDOther].push_back(SU);
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index 814ae9e..4dedf70 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -52,6 +52,8 @@ class R600SchedStrategy : public MachineSchedStrategy {
std::vector<SUnit *> Available[IDLast], Pending[IDLast];
std::vector<SUnit *> AvailableAlus[AluLast];
+ std::vector<SUnit *> UnscheduledARDefs;
+ std::vector<SUnit *> UnscheduledARUses;
InstKind CurInstKind;
int CurEmitted;
diff --git a/test/CodeGen/R600/stack-simple.ll b/test/CodeGen/R600/stack-simple.ll
new file mode 100644
index 0000000..9bec183
--- /dev/null
+++ b/test/CodeGen/R600/stack-simple.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: MOVA_INT
+; CHECK-NOT: ALU clause
+; CHECK: 0 + AR.x
+; CHECK: MOVA_INT
+; CHECK-NOT: ALU clause
+; CHECK: 0 + AR.x
+
+; Function Attrs: nounwind
+define void @stack_array_write(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+entry:
+ %stack = alloca [5 x i32], align 4
+ %0 = load i32 addrspace(1)* %in, align 4, !tbaa !7
+ %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
+ store i32 4, i32* %arrayidx1, align 4, !tbaa !7
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
+ %1 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !7
+ %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
+ store i32 5, i32* %arrayidx3, align 4, !tbaa !7
+ %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
+ %2 = load i32* %arrayidx10, align 4, !tbaa !7
+ store i32 %2, i32 addrspace(1)* %out, align 4, !tbaa !7
+ %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
+ %3 = load i32* %arrayidx12, align 4, !tbaa !7
+ %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
+ store i32 %3, i32 addrspace(1)* %arrayidx13, align 4, !tbaa !7
+ ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6}
+
+!0 = metadata !{null}
+!1 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*)* @stack_array_write}
+!2 = metadata !{null}
+!3 = metadata !{null}
+!4 = metadata !{null}
+!5 = metadata !{null}
+!6 = metadata !{null}
+!7 = metadata !{metadata !"int", metadata !8}
+!8 = metadata !{metadata !"omnipotent char", metadata !9}
+!9 = metadata !{metadata !"Simple C/C++ TBAA"}
+
--
1.7.11.4
More information about the llvm-commits
mailing list