[PATCH] R600: Workaround for cayman loop bug

Tom Stellard tom at stellard.net
Fri Nov 15 12:51:56 PST 2013


On Fri, Nov 15, 2013 at 12:34:12PM -0800, Vincent Lejeune wrote:
> vljn added you to the CC list for the revision "R600: Workaround for cayman loop bug".
> 
> Some shadertoy's sample were setting cayman gpus in a lock up state because of a hw bug
> (CF_ALU_PUSH_BEFORE stack might be corrupted inside a nested loop, so we expand these instructions to PUSH;CF_ALU)
> 

Is it possible to add a lit test for this?

> http://llvm-reviews.chandlerc.com/D2192
> 
> Files:
>   lib/Target/R600/R600ControlFlowFinalizer.cpp
>   lib/Target/R600/R600Instructions.td
> 
> Index: lib/Target/R600/R600ControlFlowFinalizer.cpp
> ===================================================================
> --- lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -332,6 +332,7 @@
>  
>      unsigned MaxStack = 0;
>      unsigned CurrentStack = 0;
> +    unsigned CurrentLoopDepth = 0;
>      bool HasPush = false;
>      for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
>          ++MB) {
> @@ -370,14 +371,22 @@
>            CurrentStack++;
>            MaxStack = std::max(MaxStack, CurrentStack);
>            HasPush = true;
> +          if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
> +            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
> +                .addImm(CfCount + 1)
> +                .addImm(1);
> +            MI->setDesc(TII->get(AMDGPU::CF_ALU));
> +            CfCount++;
> +          }
>          case AMDGPU::CF_ALU:
>            I = MI;
>            AluClauses.push_back(MakeALUClause(MBB, I));
>            DEBUG(dbgs() << CfCount << ":"; MI->dump(););
>            CfCount++;
>            break;
>          case AMDGPU::WHILELOOP: {
>            CurrentStack+=4;
> +          CurrentLoopDepth ++;

No space before ++

>            MaxStack = std::max(MaxStack, CurrentStack);
>            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
>                getHWInstrDesc(CF_WHILE_LOOP))
> @@ -392,6 +401,7 @@
>          }
>          case AMDGPU::ENDLOOP: {
>            CurrentStack-=4;
> +          CurrentLoopDepth --;

Same here.

>            std::pair<unsigned, std::set<MachineInstr *> > Pair =
>                LoopStack.back();
>            LoopStack.pop_back();
> Index: lib/Target/R600/R600Instructions.td
> ===================================================================
> --- lib/Target/R600/R600Instructions.td
> +++ lib/Target/R600/R600Instructions.td
> @@ -1854,6 +1854,10 @@
>      let COUNT = 0;
>    }
>  
> +  def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
> +    let COUNT = 0;
> +  }
> +
>  def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
>  
>  class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :

> Index: lib/Target/R600/R600ControlFlowFinalizer.cpp
> ===================================================================
> --- lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -332,6 +332,7 @@
>  
>      unsigned MaxStack = 0;
>      unsigned CurrentStack = 0;
> +    unsigned CurrentLoopDepth = 0;
>      bool HasPush = false;
>      for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
>          ++MB) {
> @@ -370,14 +371,22 @@
>            CurrentStack++;
>            MaxStack = std::max(MaxStack, CurrentStack);
>            HasPush = true;
> +          if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
> +            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
> +                .addImm(CfCount + 1)
> +                .addImm(1);
> +            MI->setDesc(TII->get(AMDGPU::CF_ALU));
> +            CfCount++;
> +          }
>          case AMDGPU::CF_ALU:
>            I = MI;
>            AluClauses.push_back(MakeALUClause(MBB, I));
>            DEBUG(dbgs() << CfCount << ":"; MI->dump(););
>            CfCount++;
>            break;
>          case AMDGPU::WHILELOOP: {
>            CurrentStack+=4;
> +          CurrentLoopDepth ++;
>            MaxStack = std::max(MaxStack, CurrentStack);
>            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
>                getHWInstrDesc(CF_WHILE_LOOP))
> @@ -392,6 +401,7 @@
>          }
>          case AMDGPU::ENDLOOP: {
>            CurrentStack-=4;
> +          CurrentLoopDepth --;
>            std::pair<unsigned, std::set<MachineInstr *> > Pair =
>                LoopStack.back();
>            LoopStack.pop_back();
> Index: lib/Target/R600/R600Instructions.td
> ===================================================================
> --- lib/Target/R600/R600Instructions.td
> +++ lib/Target/R600/R600Instructions.td
> @@ -1854,6 +1854,10 @@
>      let COUNT = 0;
>    }
>  
> +  def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
> +    let COUNT = 0;
> +  }
> +
>  def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
>  
>  class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list