[PATCH] R600: Workaround for cayman loop bug
Vincent Lejeune
vljn at ovi.com
Fri Nov 15 12:34:12 PST 2013
vljn added you to the CC list for the revision "R600: Workaround for cayman loop bug".
Some shadertoy's sample were setting cayman gpus in a lock up state because of a hw bug
(CF_ALU_PUSH_BEFORE stack might be corrupted inside a nested loop, so we expand these instructions to PUSH;CF_ALU)
http://llvm-reviews.chandlerc.com/D2192
Files:
lib/Target/R600/R600ControlFlowFinalizer.cpp
lib/Target/R600/R600Instructions.td
Index: lib/Target/R600/R600ControlFlowFinalizer.cpp
===================================================================
--- lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -332,6 +332,7 @@
unsigned MaxStack = 0;
unsigned CurrentStack = 0;
+ unsigned CurrentLoopDepth = 0;
bool HasPush = false;
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
@@ -370,14 +371,22 @@
CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack);
HasPush = true;
+ if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ MI->setDesc(TII->get(AMDGPU::CF_ALU));
+ CfCount++;
+ }
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
case AMDGPU::WHILELOOP: {
CurrentStack+=4;
+ CurrentLoopDepth ++;
MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
@@ -392,6 +401,7 @@
}
case AMDGPU::ENDLOOP: {
CurrentStack-=4;
+ CurrentLoopDepth --;
std::pair<unsigned, std::set<MachineInstr *> > Pair =
LoopStack.back();
LoopStack.pop_back();
Index: lib/Target/R600/R600Instructions.td
===================================================================
--- lib/Target/R600/R600Instructions.td
+++ lib/Target/R600/R600Instructions.td
@@ -1854,6 +1854,10 @@
let COUNT = 0;
}
+ def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2192.1.patch
Type: text/x-patch
Size: 2144 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131115/1ce998d1/attachment.bin>
More information about the llvm-commits
mailing list