[PATCH] R600: improve dump of S_WAITCNT

Tom Stellard tom at stellard.net
Thu Oct 10 10:23:55 PDT 2013


On Thu, Oct 10, 2013 at 09:30:34AM -0700, Vincent Lejeune wrote:
> From b01dc6013f15a28659e9bc76fb51ab71afd60df9 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Thu, 3 Oct 2013 00:31:37 +0200
> Subject: [PATCH] R600: improve dump of S_WAITCNT
> 
> ---
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 14 +++++++++
>  lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h   |  1 +
>  lib/Target/R600/SIInstructions.td                 |  4 ++-
>  test/CodeGen/R600/wait.ll                         | 37 +++++++++++++++++++++++
>  4 files changed, 55 insertions(+), 1 deletion(-)
>  create mode 100644 test/CodeGen/R600/wait.ll
> 
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index fac3c39..fd10a63 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -255,4 +255,18 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
>    }
>  }
>  
> +void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
> +                                      raw_ostream &O) {
> +  unsigned SImm16 = MI->getOperand(OpNo).getImm();
> +  unsigned Vmcnt = SImm16 & 0xF;
> +  unsigned Expcnt = (SImm16 >> 4) & 0xF;
> +  unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;

lgkmcnt is 5-bits, so this should be 0x1F

> +  if (Vmcnt != 0xF)
> +    O << "vmcnt(" << Vmcnt << ") ";
> +  if (Expcnt != 0x7)
> +    O << "expcnt(" << Expcnt << ") ";
> +  if (Lgkmcnt != 0x7)

Same here: 0x1F

> +    O << "lgkmcnt(" << Lgkmcnt << ")";
> +}
> +
>  #include "AMDGPUGenAsmWriter.inc"
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index 4c1dfa6..3524b30 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -52,6 +52,7 @@ private:
>    void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>    void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> +  void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
>  };
>  
>  } // End namespace llvm
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 99fedcb..4dfaa16 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -25,6 +25,8 @@ def InterpSlot : Operand<i32> {
>  def isSI : Predicate<"Subtarget.getGeneration() "
>                        "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
>  
> +def WAIT_FLAG : InstFlag<"printWaitFlag">;
> +
>  let Predicates = [isSI] in {
>  
>  let neverHasSideEffects = 1 in {
> @@ -815,7 +817,7 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
>    let mayStore = 1;
>  }
>  
> -def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
> +def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
>    []
>  >;
>  } // End hasSideEffects
> diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
> new file mode 100644
> index 0000000..dac9e54
> --- /dev/null
> +++ b/test/CodeGen/R600/wait.ll
> @@ -0,0 +1,37 @@
> +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
> +

I just pushed my verification fixes, so you can add:
--verify-machineinstrs

> +;CHECK: @main

Use CHECK-LABEL here

With these changes this patch is

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

> +;CHECK: S_WAITCNT lgkmcnt(0)
> +;CHECK: S_WAITCNT vmcnt(0)
> +;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
> +
> +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
> +main_body:
> +  %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
> +  %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
> +  %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6)
> +  %13 = extractelement <4 x float> %12, i32 0
> +  %14 = extractelement <4 x float> %12, i32 1
> +  %15 = extractelement <4 x float> %12, i32 2
> +  %16 = extractelement <4 x float> %12, i32 3
> +  %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
> +  %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
> +  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6)
> +  %20 = extractelement <4 x float> %19, i32 0
> +  %21 = extractelement <4 x float> %19, i32 1
> +  %22 = extractelement <4 x float> %19, i32 2
> +  %23 = extractelement <4 x float> %19, i32 3
> +  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23)
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16)
> +  ret void
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> +
> +attributes #0 = { "ShaderType"="1" }
> +attributes #1 = { nounwind readnone }
> +
> +!0 = metadata !{metadata !"const", null, i32 1}
> -- 
> 1.8.3.1
> 




More information about the llvm-commits mailing list