R600 Patches (Plus one for SI): Various fixes to get the piglit all_cl tests to stop hanging
Tom Stellard
tom at stellard.net
Mon Apr 22 21:03:27 PDT 2013
On Mon, Apr 22, 2013 at 02:28:44PM -0700, Vincent Lejeune wrote:
> Hi,
>
> Some of our passes use the AMDGPU::RETURN instruction (off hand I remember one case in InstructionEmitter).
> Beside I need to postprocess the return instruction to add PAD.
>
> Vincent
>
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
>
> From 57a4bb947a5fd0feed4c7b8ae67c514039b2a2b0 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sat, 13 Apr 2013 16:34:39 +0200
> Subject: [PATCH] R600: Add CF_END
>
> ---
> lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 6 +-
> lib/Target/R600/R600ControlFlowFinalizer.cpp | 91 ++++++++++++----------
> lib/Target/R600/R600Instructions.td | 24 +++++-
> test/CodeGen/R600/sdiv.ll | 2 +-
> test/CodeGen/R600/udiv.ll | 2 +-
> test/CodeGen/R600/urem.ll | 2 +-
> 6 files changed, 80 insertions(+), 47 deletions(-)
>
> diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> index 416d710..4864b3e 100644
> --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -281,7 +281,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> case AMDGPU::EG_ExportSwz:
> case AMDGPU::R600_ExportSwz:
> case AMDGPU::EG_ExportBuf:
> - case AMDGPU::R600_ExportBuf: {
> + case AMDGPU::R600_ExportBuf:
> + case AMDGPU::PAD:
> + case AMDGPU::CF_END_R600:
> + case AMDGPU::CF_END_EG:
> + case AMDGPU::CF_END_CM: {
> uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
> EmitByte(INSTR_NATIVE, OS);
> Emit(Inst, OS);
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index bc1ca58..e683d75 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -39,7 +39,8 @@ private:
> CF_LOOP_CONTINUE,
> CF_JUMP,
> CF_ELSE,
> - CF_POP
> + CF_POP,
> + CF_END
> };
>
> static char ID;
> @@ -91,49 +92,46 @@ private:
> }
>
> const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
> - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
> - switch (CFI) {
> - case CF_TC:
> - return TII->get(AMDGPU::CF_TC_R600);
> - case CF_CALL_FS:
> - return TII->get(AMDGPU::CF_CALL_FS_R600);
> - case CF_WHILE_LOOP:
> - return TII->get(AMDGPU::WHILE_LOOP_R600);
> - case CF_END_LOOP:
> - return TII->get(AMDGPU::END_LOOP_R600);
> - case CF_LOOP_BREAK:
> - return TII->get(AMDGPU::LOOP_BREAK_R600);
> - case CF_LOOP_CONTINUE:
> - return TII->get(AMDGPU::CF_CONTINUE_R600);
> - case CF_JUMP:
> - return TII->get(AMDGPU::CF_JUMP_R600);
> - case CF_ELSE:
> - return TII->get(AMDGPU::CF_ELSE_R600);
> - case CF_POP:
> - return TII->get(AMDGPU::POP_R600);
> - }
> - } else {
> - switch (CFI) {
> - case CF_TC:
> - return TII->get(AMDGPU::CF_TC_EG);
> - case CF_CALL_FS:
> - return TII->get(AMDGPU::CF_CALL_FS_EG);
> - case CF_WHILE_LOOP:
> - return TII->get(AMDGPU::WHILE_LOOP_EG);
> - case CF_END_LOOP:
> - return TII->get(AMDGPU::END_LOOP_EG);
> - case CF_LOOP_BREAK:
> - return TII->get(AMDGPU::LOOP_BREAK_EG);
> - case CF_LOOP_CONTINUE:
> - return TII->get(AMDGPU::CF_CONTINUE_EG);
> - case CF_JUMP:
> - return TII->get(AMDGPU::CF_JUMP_EG);
> - case CF_ELSE:
> - return TII->get(AMDGPU::CF_ELSE_EG);
> - case CF_POP:
> - return TII->get(AMDGPU::POP_EG);
> + unsigned Opcode = 0;
> + bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
> + switch (CFI) {
> + case CF_TC:
> + Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
> + break;
> + case CF_CALL_FS:
> + Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
> + break;
> + case CF_WHILE_LOOP:
> + Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
> + break;
> + case CF_END_LOOP:
> + Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
> + break;
> + case CF_LOOP_BREAK:
> + Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
> + break;
> + case CF_LOOP_CONTINUE:
> + Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
> + break;
> + case CF_JUMP:
> + Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
> + break;
> + case CF_ELSE:
> + Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
> + break;
> + case CF_POP:
> + Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
> + break;
> + case CF_END:
> + if (ST.device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX) {
> + Opcode = AMDGPU::CF_END_CM;
> + break;
> }
> + Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
> + break;
> }
> + assert (Opcode && "No opcode selected");
> + return TII->get(Opcode);
> }
>
> MachineBasicBlock::iterator
> @@ -310,6 +308,15 @@ public:
> CfCount++;
> break;
> }
> + case AMDGPU::RETURN: {
> + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
> + CfCount++;
> + MI->eraseFromParent();
> + if (CfCount % 2) {
> + BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
> + CfCount++;
> + }
> + }
> default:
> break;
> }
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 361fc98..e0b2a8c 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -897,6 +897,7 @@ class CF_WORD1_EG {
> bits<2> COND;
> bits<6> COUNT;
> bits<1> VALID_PIXEL_MODE;
> + bits<1> END_OF_PROGRAM;
> bits<8> CF_INST;
> bits<1> BARRIER;
>
> @@ -919,6 +920,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
> let CF_CONST = 0;
> let VALID_PIXEL_MODE = 0;
> let COND = 0;
> + let END_OF_PROGRAM = 0;
>
> let Inst{31-0} = Word0;
> let Inst{63-32} = Word1;
> @@ -934,6 +936,10 @@ def STACK_SIZE : AMDGPUInst <(outs),
> let Inst = num;
> }
>
> +def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
> + field bits<64> Inst;
> +}
> +
> let Predicates = [isR600toCayman] in {
>
> //===----------------------------------------------------------------------===//
> @@ -1486,6 +1492,12 @@ let Predicates = [isR600] in {
> "POP @$ADDR POP:$POP_COUNT"> {
> let COUNT = 0;
> }
> + def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
> + let COUNT = 0;
> + let POP_COUNT = 0;
> + let ADDR = 0;
> + let END_OF_PROGRAM = 1;
> + }
>
> }
>
> @@ -1690,7 +1702,12 @@ let hasSideEffects = 1 in {
> "POP @$ADDR POP:$POP_COUNT"> {
> let COUNT = 0;
> }
> -
> + def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> {
> + let COUNT = 0;
> + let POP_COUNT = 0;
> + let ADDR = 0;
> + let END_OF_PROGRAM = 1;
> + }
>
> //===----------------------------------------------------------------------===//
> // Memory read/write instructions
> @@ -1935,6 +1952,11 @@ def : Pat <
> (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
> >;
>
> + def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
> + let ADDR = 0;
> + let POP_COUNT = 0;
> + let COUNT = 0;
> + }
>
> def : Pat<(fsqrt R600_Reg32:$src),
> (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
> diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
> index 3556fac..3dd10c8 100644
> --- a/test/CodeGen/R600/sdiv.ll
> +++ b/test/CodeGen/R600/sdiv.ll
> @@ -9,7 +9,7 @@
> ; This was fixed by adding an additional pattern in R600Instructions.td to
> ; match this pattern with a CNDGE_INT.
>
> -; CHECK: RETURN
> +; CHECK: CF_END
>
> define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
> %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
> diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
> index 47657a6..b81e366 100644
> --- a/test/CodeGen/R600/udiv.ll
> +++ b/test/CodeGen/R600/udiv.ll
> @@ -3,7 +3,7 @@
> ;The code generated by udiv is long and complex and may frequently change.
> ;The goal of this test is to make sure the ISel doesn't fail when it gets
> ;a v4i32 udiv
> -;CHECK: RETURN
> +;CHECK: CF_END
>
> define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
> %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
> diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
> index 2e7388c..a2cc0bd 100644
> --- a/test/CodeGen/R600/urem.ll
> +++ b/test/CodeGen/R600/urem.ll
> @@ -3,7 +3,7 @@
> ;The code generated by urem is long and complex and may frequently change.
> ;The goal of this test is to make sure the ISel doesn't fail when it gets
> ;a v4i32 urem
> -;CHECK: RETURN
> +;CHECK: CF_END
>
> define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
> %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
> --
> 1.8.1.4
>
More information about the llvm-commits
mailing list