[PATCH] R600: Workaround for cayman loop bug
Tom Stellard
tom at stellard.net
Fri Nov 15 12:51:56 PST 2013
On Fri, Nov 15, 2013 at 12:34:12PM -0800, Vincent Lejeune wrote:
> vljn added you to the CC list for the revision "R600: Workaround for cayman loop bug".
>
> Some shadertoy's sample were setting cayman gpus in a lock up state because of a hw bug
> (CF_ALU_PUSH_BEFORE stack might be corrupted inside a nested loop, so we expand these instructions to PUSH;CF_ALU)
>
Is it possible to add a lit test for this?
> http://llvm-reviews.chandlerc.com/D2192
>
> Files:
> lib/Target/R600/R600ControlFlowFinalizer.cpp
> lib/Target/R600/R600Instructions.td
>
> Index: lib/Target/R600/R600ControlFlowFinalizer.cpp
> ===================================================================
> --- lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -332,6 +332,7 @@
>
> unsigned MaxStack = 0;
> unsigned CurrentStack = 0;
> + unsigned CurrentLoopDepth = 0;
> bool HasPush = false;
> for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
> ++MB) {
> @@ -370,14 +371,22 @@
> CurrentStack++;
> MaxStack = std::max(MaxStack, CurrentStack);
> HasPush = true;
> + if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
> + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
> + .addImm(CfCount + 1)
> + .addImm(1);
> + MI->setDesc(TII->get(AMDGPU::CF_ALU));
> + CfCount++;
> + }
> case AMDGPU::CF_ALU:
> I = MI;
> AluClauses.push_back(MakeALUClause(MBB, I));
> DEBUG(dbgs() << CfCount << ":"; MI->dump(););
> CfCount++;
> break;
> case AMDGPU::WHILELOOP: {
> CurrentStack+=4;
> + CurrentLoopDepth ++;
No space before ++
> MaxStack = std::max(MaxStack, CurrentStack);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> getHWInstrDesc(CF_WHILE_LOOP))
> @@ -392,6 +401,7 @@
> }
> case AMDGPU::ENDLOOP: {
> CurrentStack-=4;
> + CurrentLoopDepth --;
Same here.
> std::pair<unsigned, std::set<MachineInstr *> > Pair =
> LoopStack.back();
> LoopStack.pop_back();
> Index: lib/Target/R600/R600Instructions.td
> ===================================================================
> --- lib/Target/R600/R600Instructions.td
> +++ lib/Target/R600/R600Instructions.td
> @@ -1854,6 +1854,10 @@
> let COUNT = 0;
> }
>
> + def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> +
> def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
>
> class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
> Index: lib/Target/R600/R600ControlFlowFinalizer.cpp
> ===================================================================
> --- lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -332,6 +332,7 @@
>
> unsigned MaxStack = 0;
> unsigned CurrentStack = 0;
> + unsigned CurrentLoopDepth = 0;
> bool HasPush = false;
> for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
> ++MB) {
> @@ -370,14 +371,22 @@
> CurrentStack++;
> MaxStack = std::max(MaxStack, CurrentStack);
> HasPush = true;
> + if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
> + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
> + .addImm(CfCount + 1)
> + .addImm(1);
> + MI->setDesc(TII->get(AMDGPU::CF_ALU));
> + CfCount++;
> + }
> case AMDGPU::CF_ALU:
> I = MI;
> AluClauses.push_back(MakeALUClause(MBB, I));
> DEBUG(dbgs() << CfCount << ":"; MI->dump(););
> CfCount++;
> break;
> case AMDGPU::WHILELOOP: {
> CurrentStack+=4;
> + CurrentLoopDepth ++;
> MaxStack = std::max(MaxStack, CurrentStack);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> getHWInstrDesc(CF_WHILE_LOOP))
> @@ -392,6 +401,7 @@
> }
> case AMDGPU::ENDLOOP: {
> CurrentStack-=4;
> + CurrentLoopDepth --;
> std::pair<unsigned, std::set<MachineInstr *> > Pair =
> LoopStack.back();
> LoopStack.pop_back();
> Index: lib/Target/R600/R600Instructions.td
> ===================================================================
> --- lib/Target/R600/R600Instructions.td
> +++ lib/Target/R600/R600Instructions.td
> @@ -1854,6 +1854,10 @@
> let COUNT = 0;
> }
>
> + def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
> + let COUNT = 0;
> + }
> +
> def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
>
> class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list