[PATCH] R600: improve dump of S_WAITCNT
Tom Stellard
tom at stellard.net
Thu Oct 10 10:23:55 PDT 2013
On Thu, Oct 10, 2013 at 09:30:34AM -0700, Vincent Lejeune wrote:
> From b01dc6013f15a28659e9bc76fb51ab71afd60df9 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Thu, 3 Oct 2013 00:31:37 +0200
> Subject: [PATCH] R600: improve dump of S_WAITCNT
>
> ---
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 14 +++++++++
> lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 1 +
> lib/Target/R600/SIInstructions.td | 4 ++-
> test/CodeGen/R600/wait.ll | 37 +++++++++++++++++++++++
> 4 files changed, 55 insertions(+), 1 deletion(-)
> create mode 100644 test/CodeGen/R600/wait.ll
>
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> index fac3c39..fd10a63 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
> @@ -255,4 +255,18 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
> }
> }
>
> +void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
> + raw_ostream &O) {
> + unsigned SImm16 = MI->getOperand(OpNo).getImm();
> + unsigned Vmcnt = SImm16 & 0xF;
> + unsigned Expcnt = (SImm16 >> 4) & 0xF;
> + unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
lgkmcnt is 5-bits, so this should be 0x1F
> + if (Vmcnt != 0xF)
> + O << "vmcnt(" << Vmcnt << ") ";
> + if (Expcnt != 0x7)
> + O << "expcnt(" << Expcnt << ") ";
> + if (Lgkmcnt != 0x7)
Same here: 0x1F
> + O << "lgkmcnt(" << Lgkmcnt << ")";
> +}
> +
> #include "AMDGPUGenAsmWriter.inc"
> diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> index 4c1dfa6..3524b30 100644
> --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
> @@ -52,6 +52,7 @@ private:
> void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> + void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
> };
>
> } // End namespace llvm
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 99fedcb..4dfaa16 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -25,6 +25,8 @@ def InterpSlot : Operand<i32> {
> def isSI : Predicate<"Subtarget.getGeneration() "
> "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
>
> +def WAIT_FLAG : InstFlag<"printWaitFlag">;
> +
> let Predicates = [isSI] in {
>
> let neverHasSideEffects = 1 in {
> @@ -815,7 +817,7 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
> let mayStore = 1;
> }
>
> -def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
> +def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
> []
> >;
> } // End hasSideEffects
> diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
> new file mode 100644
> index 0000000..dac9e54
> --- /dev/null
> +++ b/test/CodeGen/R600/wait.ll
> @@ -0,0 +1,37 @@
> +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
> +
I just pushed my verification fixes, so you can add:
--verify-machineinstrs
> +;CHECK: @main
Use CHECK-LABEL here
With these changes this patch is
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> +;CHECK: S_WAITCNT lgkmcnt(0)
> +;CHECK: S_WAITCNT vmcnt(0)
> +;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
> +
> +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
> +main_body:
> + %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
> + %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
> + %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6)
> + %13 = extractelement <4 x float> %12, i32 0
> + %14 = extractelement <4 x float> %12, i32 1
> + %15 = extractelement <4 x float> %12, i32 2
> + %16 = extractelement <4 x float> %12, i32 3
> + %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
> + %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
> + %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6)
> + %20 = extractelement <4 x float> %19, i32 0
> + %21 = extractelement <4 x float> %19, i32 1
> + %22 = extractelement <4 x float> %19, i32 2
> + %23 = extractelement <4 x float> %19, i32 3
> + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23)
> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16)
> + ret void
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> +
> +attributes #0 = { "ShaderType"="1" }
> +attributes #1 = { nounwind readnone }
> +
> +!0 = metadata !{metadata !"const", null, i32 1}
> --
> 1.8.3.1
>
More information about the llvm-commits
mailing list