R600 Patches (Plus one for SI): Various fixes to get the piglit all_cl tests to stop hanging

Tom Stellard tom at stellard.net
Mon Apr 22 21:03:27 PDT 2013


On Mon, Apr 22, 2013 at 02:28:44PM -0700, Vincent Lejeune wrote:
> Hi,
> 
> Some of our passes use the AMDGPU::RETURN instruction (off hand I remember one case in InstructionEmitter).
> Beside I need to postprocess the return instruction to add PAD.
> 
> Vincent
>

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
 
> 
> From 57a4bb947a5fd0feed4c7b8ae67c514039b2a2b0 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sat, 13 Apr 2013 16:34:39 +0200
> Subject: [PATCH] R600: Add CF_END
> 
> ---
>  lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  6 +-
>  lib/Target/R600/R600ControlFlowFinalizer.cpp       | 91 ++++++++++++----------
>  lib/Target/R600/R600Instructions.td                | 24 +++++-
>  test/CodeGen/R600/sdiv.ll                          |  2 +-
>  test/CodeGen/R600/udiv.ll                          |  2 +-
>  test/CodeGen/R600/urem.ll                          |  2 +-
>  6 files changed, 80 insertions(+), 47 deletions(-)
> 
> diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> index 416d710..4864b3e 100644
> --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -281,7 +281,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
>      case AMDGPU::EG_ExportSwz:
>      case AMDGPU::R600_ExportSwz:
>      case AMDGPU::EG_ExportBuf:
> -    case AMDGPU::R600_ExportBuf: {
> +    case AMDGPU::R600_ExportBuf:
> +    case AMDGPU::PAD:
> +    case AMDGPU::CF_END_R600:
> +    case AMDGPU::CF_END_EG:
> +    case AMDGPU::CF_END_CM: {
>        uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
>        EmitByte(INSTR_NATIVE, OS);
>        Emit(Inst, OS);
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index bc1ca58..e683d75 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -39,7 +39,8 @@ private:
>      CF_LOOP_CONTINUE,
>      CF_JUMP,
>      CF_ELSE,
> -    CF_POP
> +    CF_POP,
> +    CF_END
>    };
>  
>    static char ID;
> @@ -91,49 +92,46 @@ private:
>    }
>  
>    const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
> -    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
> -      switch (CFI) {
> -      case CF_TC:
> -        return TII->get(AMDGPU::CF_TC_R600);
> -      case CF_CALL_FS:
> -        return TII->get(AMDGPU::CF_CALL_FS_R600);
> -      case CF_WHILE_LOOP:
> -        return TII->get(AMDGPU::WHILE_LOOP_R600);
> -      case CF_END_LOOP:
> -        return TII->get(AMDGPU::END_LOOP_R600);
> -      case CF_LOOP_BREAK:
> -        return TII->get(AMDGPU::LOOP_BREAK_R600);
> -      case CF_LOOP_CONTINUE:
> -        return TII->get(AMDGPU::CF_CONTINUE_R600);
> -      case CF_JUMP:
> -        return TII->get(AMDGPU::CF_JUMP_R600);
> -      case CF_ELSE:
> -        return TII->get(AMDGPU::CF_ELSE_R600);
> -      case CF_POP:
> -        return TII->get(AMDGPU::POP_R600);
> -      }
> -    } else {
> -      switch (CFI) {
> -      case CF_TC:
> -        return TII->get(AMDGPU::CF_TC_EG);
> -      case CF_CALL_FS:
> -        return TII->get(AMDGPU::CF_CALL_FS_EG);
> -      case CF_WHILE_LOOP:
> -        return TII->get(AMDGPU::WHILE_LOOP_EG);
> -      case CF_END_LOOP:
> -        return TII->get(AMDGPU::END_LOOP_EG);
> -      case CF_LOOP_BREAK:
> -        return TII->get(AMDGPU::LOOP_BREAK_EG);
> -      case CF_LOOP_CONTINUE:
> -        return TII->get(AMDGPU::CF_CONTINUE_EG);
> -      case CF_JUMP:
> -        return TII->get(AMDGPU::CF_JUMP_EG);
> -      case CF_ELSE:
> -        return TII->get(AMDGPU::CF_ELSE_EG);
> -      case CF_POP:
> -        return TII->get(AMDGPU::POP_EG);
> +    unsigned Opcode = 0;
> +    bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
> +    switch (CFI) {
> +    case CF_TC:
> +      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
> +      break;
> +    case CF_CALL_FS:
> +      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
> +      break;
> +    case CF_WHILE_LOOP:
> +      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
> +      break;
> +    case CF_END_LOOP:
> +      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
> +      break;
> +    case CF_LOOP_BREAK:
> +      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
> +      break;
> +    case CF_LOOP_CONTINUE:
> +      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
> +      break;
> +    case CF_JUMP:
> +      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
> +      break;
> +    case CF_ELSE:
> +      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
> +      break;
> +    case CF_POP:
> +      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
> +      break;
> +    case CF_END:
> +      if (ST.device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX) {
> +        Opcode = AMDGPU::CF_END_CM;
> +        break;
>        }
> +      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
> +      break;
>      }
> +    assert (Opcode && "No opcode selected");
> +    return TII->get(Opcode);
>    }
>  
>    MachineBasicBlock::iterator
> @@ -310,6 +308,15 @@ public:
>            CfCount++;
>            break;
>          }
> +        case AMDGPU::RETURN: {
> +          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
> +          CfCount++;
> +          MI->eraseFromParent();
> +          if (CfCount % 2) {
> +            BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
> +            CfCount++;
> +          }
> +        }
>          default:
>            break;
>          }
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 361fc98..e0b2a8c 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -897,6 +897,7 @@ class CF_WORD1_EG {
>    bits<2> COND;
>    bits<6> COUNT;
>    bits<1> VALID_PIXEL_MODE;
> +  bits<1> END_OF_PROGRAM;
>    bits<8> CF_INST;
>    bits<1> BARRIER;
>  
> @@ -919,6 +920,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
>    let CF_CONST = 0;
>    let VALID_PIXEL_MODE = 0;
>    let COND = 0;
> +  let END_OF_PROGRAM = 0;
>  
>    let Inst{31-0} = Word0;
>    let Inst{63-32} = Word1;
> @@ -934,6 +936,10 @@ def STACK_SIZE : AMDGPUInst <(outs),
>    let Inst = num;
>  }
>  
> +def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
> +  field bits<64> Inst;
> +}
> +
>  let Predicates = [isR600toCayman] in {
>  
>  //===----------------------------------------------------------------------===//
> @@ -1486,6 +1492,12 @@ let Predicates = [isR600] in {
>    "POP @$ADDR POP:$POP_COUNT"> {
>      let COUNT = 0;
>    }
> +  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
> +    let COUNT = 0;
> +    let POP_COUNT = 0;
> +    let ADDR = 0;
> +    let END_OF_PROGRAM = 1;
> +  }
>  
>  }
>  
> @@ -1690,7 +1702,12 @@ let hasSideEffects = 1 in {
>    "POP @$ADDR POP:$POP_COUNT"> {
>      let COUNT = 0;
>    }
> -
> +  def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
> +    let COUNT = 0;
> +    let POP_COUNT = 0;
> +    let ADDR = 0;
> +    let END_OF_PROGRAM = 1;
> +  }
>  
>  //===----------------------------------------------------------------------===//
>  // Memory read/write instructions
> @@ -1935,6 +1952,11 @@ def : Pat <
>                              (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>  >;
>  
> +  def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
> +    let ADDR = 0;
> +    let POP_COUNT = 0;
> +    let COUNT = 0;
> +  }
>  
>  def : Pat<(fsqrt R600_Reg32:$src),
>    (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
> diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
> index 3556fac..3dd10c8 100644
> --- a/test/CodeGen/R600/sdiv.ll
> +++ b/test/CodeGen/R600/sdiv.ll
> @@ -9,7 +9,7 @@
>  ; This was fixed by adding an additional pattern in R600Instructions.td to
>  ; match this pattern with a CNDGE_INT.
>  
> -; CHECK: RETURN
> +; CHECK: CF_END
>  
>  define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
>    %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
> diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
> index 47657a6..b81e366 100644
> --- a/test/CodeGen/R600/udiv.ll
> +++ b/test/CodeGen/R600/udiv.ll
> @@ -3,7 +3,7 @@
>  ;The code generated by udiv is long and complex and may frequently change.
>  ;The goal of this test is to make sure the ISel doesn't fail when it gets
>  ;a v4i32 udiv
> -;CHECK: RETURN
> +;CHECK: CF_END
>  
>  define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
> diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
> index 2e7388c..a2cc0bd 100644
> --- a/test/CodeGen/R600/urem.ll
> +++ b/test/CodeGen/R600/urem.ll
> @@ -3,7 +3,7 @@
>  ;The code generated by urem is long and complex and may frequently change.
>  ;The goal of this test is to make sure the ISel doesn't fail when it gets
>  ;a v4i32 urem
> -;CHECK: RETURN
> +;CHECK: CF_END
>  
>  define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
>    %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
> -- 
> 1.8.1.4
> 




More information about the llvm-commits mailing list