R600 Patches: .td file cleanups and compute fixes for Cayman
Tom Stellard
tom at stellard.net
Mon Jun 10 13:28:28 PDT 2013
Hi,
The attached patches do some re-organization of the R600 .td files and
also fix some issues with compute shaders on Cayman.
-Tom
-------------- next part --------------
>From 4896ff53979c77fcabe224998358ff2350c60678 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Thu, 6 Jun 2013 13:04:56 -0400
Subject: [PATCH 1/4] R600: Move instruction encoding definitions into a
separate .td file
---
lib/Target/R600/R600InstrFormats.td | 392 ++++++++++++++++++++++++++++++++++++
lib/Target/R600/R600Instructions.td | 363 +--------------------------------
2 files changed, 393 insertions(+), 362 deletions(-)
create mode 100644 lib/Target/R600/R600InstrFormats.td
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
new file mode 100644
index 0000000..7cc48f4
--- /dev/null
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -0,0 +1,392 @@
+//===-- R600InstrFormats.td - R600 Instruction Encodings ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin>
+ : AMDGPUInst <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+ bit TransOnly = 0;
+ bit Trig = 0;
+ bit Op3 = 0;
+ bit isVector = 0;
+ bits<2> FlagOperandIdx = 0;
+ bit Op1 = 0;
+ bit Op2 = 0;
+ bit HasNativeOperands = 0;
+ bit VTXInst = 0;
+ bit TEXInst = 0;
+
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ let TSFlags{0} = TransOnly;
+ let TSFlags{4} = Trig;
+ let TSFlags{5} = Op3;
+
+ // Vector instructions are instructions that must fill all slots in an
+ // instruction group
+ let TSFlags{6} = isVector;
+ let TSFlags{8-7} = FlagOperandIdx;
+ let TSFlags{9} = HasNativeOperands;
+ let TSFlags{10} = Op1;
+ let TSFlags{11} = Op2;
+ let TSFlags{12} = VTXInst;
+ let TSFlags{13} = TEXInst;
+}
+
+//===----------------------------------------------------------------------===//
+// ALU instructions
+//===----------------------------------------------------------------------===//
+
+class R600ALU_Word0 {
+ field bits<32> Word0;
+
+ bits<11> src0;
+ bits<1> src0_neg;
+ bits<1> src0_rel;
+ bits<11> src1;
+ bits<1> src1_rel;
+ bits<1> src1_neg;
+ bits<3> index_mode = 0;
+ bits<2> pred_sel;
+ bits<1> last;
+
+ bits<9> src0_sel = src0{8-0};
+ bits<2> src0_chan = src0{10-9};
+ bits<9> src1_sel = src1{8-0};
+ bits<2> src1_chan = src1{10-9};
+
+ let Word0{8-0} = src0_sel;
+ let Word0{9} = src0_rel;
+ let Word0{11-10} = src0_chan;
+ let Word0{12} = src0_neg;
+ let Word0{21-13} = src1_sel;
+ let Word0{22} = src1_rel;
+ let Word0{24-23} = src1_chan;
+ let Word0{25} = src1_neg;
+ let Word0{28-26} = index_mode;
+ let Word0{30-29} = pred_sel;
+ let Word0{31} = last;
+}
+
+class R600ALU_Word1 {
+ field bits<32> Word1;
+
+ bits<11> dst;
+ bits<3> bank_swizzle;
+ bits<1> dst_rel;
+ bits<1> clamp;
+
+ bits<7> dst_sel = dst{6-0};
+ bits<2> dst_chan = dst{10-9};
+
+ let Word1{20-18} = bank_swizzle;
+ let Word1{27-21} = dst_sel;
+ let Word1{28} = dst_rel;
+ let Word1{30-29} = dst_chan;
+ let Word1{31} = clamp;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
+
+ bits<1> src0_abs;
+ bits<1> src1_abs;
+ bits<1> update_exec_mask;
+ bits<1> update_pred;
+ bits<1> write;
+ bits<2> omod;
+
+ let Word1{0} = src0_abs;
+ let Word1{1} = src1_abs;
+ let Word1{2} = update_exec_mask;
+ let Word1{3} = update_pred;
+ let Word1{4} = write;
+ let Word1{6-5} = omod;
+ let Word1{17-7} = alu_inst;
+}
+
+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
+
+ bits<11> src2;
+ bits<1> src2_rel;
+ bits<1> src2_neg;
+
+ bits<9> src2_sel = src2{8-0};
+ bits<2> src2_chan = src2{10-9};
+
+ let Word1{8-0} = src2_sel;
+ let Word1{9} = src2_rel;
+ let Word1{11-10} = src2_chan;
+ let Word1{12} = src2_neg;
+ let Word1{17-13} = alu_inst;
+}
+
+/*
+XXX: R600 subtarget uses a slightly different encoding than the other
+subtargets. We currently handle this in R600MCCodeEmitter, but we may
+want to use these instruction classes in the future.
+
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+ bits<1> fog_merge;
+ bits<10> alu_inst;
+
+ let Inst{37} = fog_merge;
+ let Inst{39-38} = omod;
+ let Inst{49-40} = alu_inst;
+}
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+ bits<11> alu_inst;
+
+ let Inst{38-37} = omod;
+ let Inst{49-39} = alu_inst;
+}
+*/
+
+//===----------------------------------------------------------------------===//
+// Vertex Fetch instructions
+//===----------------------------------------------------------------------===//
+
+class VTX_WORD0 {
+ field bits<32> Word0;
+ bits<7> SRC_GPR;
+ bits<5> VC_INST;
+ bits<2> FETCH_TYPE;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> BUFFER_ID;
+ bits<1> SRC_REL;
+ bits<2> SRC_SEL_X;
+ bits<6> MEGA_FETCH_COUNT;
+
+ let Word0{4-0} = VC_INST;
+ let Word0{6-5} = FETCH_TYPE;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = BUFFER_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{25-24} = SRC_SEL_X;
+ let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD1_GPR {
+ field bits<32> Word1;
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<1> USE_CONST_FIELDS;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL;
+ bits<1> FORMAT_COMP_ALL;
+ bits<1> SRF_MODE_ALL;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{8} = 0; // Reserved
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{21} = USE_CONST_FIELDS;
+ let Word1{27-22} = DATA_FORMAT;
+ let Word1{29-28} = NUM_FORMAT_ALL;
+ let Word1{30} = FORMAT_COMP_ALL;
+ let Word1{31} = SRF_MODE_ALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Texture fetch instructions
+//===----------------------------------------------------------------------===//
+
+class TEX_WORD0 {
+ field bits<32> Word0;
+
+ bits<5> TEX_INST;
+ bits<2> INST_MOD;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> RESOURCE_ID;
+ bits<7> SRC_GPR;
+ bits<1> SRC_REL;
+ bits<1> ALT_CONST;
+ bits<2> RESOURCE_INDEX_MODE;
+ bits<2> SAMPLER_INDEX_MODE;
+
+ let Word0{4-0} = TEX_INST;
+ let Word0{6-5} = INST_MOD;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = RESOURCE_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{24} = ALT_CONST;
+ let Word0{26-25} = RESOURCE_INDEX_MODE;
+ let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+ field bits<32> Word1;
+
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<7> LOD_BIAS;
+ bits<1> COORD_TYPE_X;
+ bits<1> COORD_TYPE_Y;
+ bits<1> COORD_TYPE_Z;
+ bits<1> COORD_TYPE_W;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{27-21} = LOD_BIAS;
+ let Word1{28} = COORD_TYPE_X;
+ let Word1{29} = COORD_TYPE_Y;
+ let Word1{30} = COORD_TYPE_Z;
+ let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+ field bits<32> Word2;
+
+ bits<5> OFFSET_X;
+ bits<5> OFFSET_Y;
+ bits<5> OFFSET_Z;
+ bits<5> SAMPLER_ID;
+ bits<3> SRC_SEL_X;
+ bits<3> SRC_SEL_Y;
+ bits<3> SRC_SEL_Z;
+ bits<3> SRC_SEL_W;
+
+ let Word2{4-0} = OFFSET_X;
+ let Word2{9-5} = OFFSET_Y;
+ let Word2{14-10} = OFFSET_Z;
+ let Word2{19-15} = SAMPLER_ID;
+ let Word2{22-20} = SRC_SEL_X;
+ let Word2{25-23} = SRC_SEL_Y;
+ let Word2{28-26} = SRC_SEL_Z;
+ let Word2{31-29} = SRC_SEL_W;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_WORD1_R600 {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<3> COUNT;
+ bits<6> CALL_COUNT;
+ bits<1> COUNT_3;
+ bits<1> END_OF_PROGRAM;
+ bits<1> VALID_PIXEL_MODE;
+ bits<7> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{12-10} = COUNT;
+ let Word1{18-13} = CALL_COUNT;
+ let Word1{19} = COUNT_3;
+ let Word1{21} = END_OF_PROGRAM;
+ let Word1{22} = VALID_PIXEL_MODE;
+ let Word1{29-23} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
+
+class CF_WORD0_EG {
+ field bits<32> Word0;
+
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
+
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1_EG {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<1> END_OF_PROGRAM;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{21} = END_OF_PROGRAM;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
+}
+
+class CF_ALU_WORD0 {
+ field bits<32> Word0;
+
+ bits<22> ADDR;
+ bits<4> KCACHE_BANK0;
+ bits<4> KCACHE_BANK1;
+ bits<2> KCACHE_MODE0;
+
+ let Word0{21-0} = ADDR;
+ let Word0{25-22} = KCACHE_BANK0;
+ let Word0{29-26} = KCACHE_BANK1;
+ let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+ field bits<32> Word1;
+
+ bits<2> KCACHE_MODE1;
+ bits<8> KCACHE_ADDR0;
+ bits<8> KCACHE_ADDR1;
+ bits<7> COUNT;
+ bits<1> ALT_CONST;
+ bits<4> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{1-0} = KCACHE_MODE1;
+ let Word1{9-2} = KCACHE_ADDR0;
+ let Word1{17-10} = KCACHE_ADDR1;
+ let Word1{24-18} = COUNT;
+ let Word1{25} = ALT_CONST;
+ let Word1{29-26} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index b4131be..7512cf4 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -12,44 +12,7 @@
//===----------------------------------------------------------------------===//
include "R600Intrinsics.td"
-
-class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin>
- : AMDGPUInst <outs, ins, asm, pattern> {
-
- field bits<64> Inst;
- bit TransOnly = 0;
- bit Trig = 0;
- bit Op3 = 0;
- bit isVector = 0;
- bits<2> FlagOperandIdx = 0;
- bit Op1 = 0;
- bit Op2 = 0;
- bit HasNativeOperands = 0;
- bit VTXInst = 0;
- bit TEXInst = 0;
-
- let Namespace = "AMDGPU";
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asm;
- let Pattern = pattern;
- let Itinerary = itin;
-
- let TSFlags{0} = TransOnly;
- let TSFlags{4} = Trig;
- let TSFlags{5} = Op3;
-
- // Vector instructions are instructions that must fill all slots in an
- // instruction group
- let TSFlags{6} = isVector;
- let TSFlags{8-7} = FlagOperandIdx;
- let TSFlags{9} = HasNativeOperands;
- let TSFlags{10} = Op1;
- let TSFlags{11} = Op2;
- let TSFlags{12} = VTXInst;
- let TSFlags{13} = TEXInst;
-}
+include "R600InstrFormats.td"
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
InstR600 <outs, ins, asm, pattern, NullALU> {
@@ -114,235 +77,6 @@ def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffse
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
-class R600ALU_Word0 {
- field bits<32> Word0;
-
- bits<11> src0;
- bits<1> src0_neg;
- bits<1> src0_rel;
- bits<11> src1;
- bits<1> src1_rel;
- bits<1> src1_neg;
- bits<3> index_mode = 0;
- bits<2> pred_sel;
- bits<1> last;
-
- bits<9> src0_sel = src0{8-0};
- bits<2> src0_chan = src0{10-9};
- bits<9> src1_sel = src1{8-0};
- bits<2> src1_chan = src1{10-9};
-
- let Word0{8-0} = src0_sel;
- let Word0{9} = src0_rel;
- let Word0{11-10} = src0_chan;
- let Word0{12} = src0_neg;
- let Word0{21-13} = src1_sel;
- let Word0{22} = src1_rel;
- let Word0{24-23} = src1_chan;
- let Word0{25} = src1_neg;
- let Word0{28-26} = index_mode;
- let Word0{30-29} = pred_sel;
- let Word0{31} = last;
-}
-
-class R600ALU_Word1 {
- field bits<32> Word1;
-
- bits<11> dst;
- bits<3> bank_swizzle;
- bits<1> dst_rel;
- bits<1> clamp;
-
- bits<7> dst_sel = dst{6-0};
- bits<2> dst_chan = dst{10-9};
-
- let Word1{20-18} = bank_swizzle;
- let Word1{27-21} = dst_sel;
- let Word1{28} = dst_rel;
- let Word1{30-29} = dst_chan;
- let Word1{31} = clamp;
-}
-
-class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
-
- bits<1> src0_abs;
- bits<1> src1_abs;
- bits<1> update_exec_mask;
- bits<1> update_pred;
- bits<1> write;
- bits<2> omod;
-
- let Word1{0} = src0_abs;
- let Word1{1} = src1_abs;
- let Word1{2} = update_exec_mask;
- let Word1{3} = update_pred;
- let Word1{4} = write;
- let Word1{6-5} = omod;
- let Word1{17-7} = alu_inst;
-}
-
-class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
-
- bits<11> src2;
- bits<1> src2_rel;
- bits<1> src2_neg;
-
- bits<9> src2_sel = src2{8-0};
- bits<2> src2_chan = src2{10-9};
-
- let Word1{8-0} = src2_sel;
- let Word1{9} = src2_rel;
- let Word1{11-10} = src2_chan;
- let Word1{12} = src2_neg;
- let Word1{17-13} = alu_inst;
-}
-
-class VTX_WORD0 {
- field bits<32> Word0;
- bits<7> SRC_GPR;
- bits<5> VC_INST;
- bits<2> FETCH_TYPE;
- bits<1> FETCH_WHOLE_QUAD;
- bits<8> BUFFER_ID;
- bits<1> SRC_REL;
- bits<2> SRC_SEL_X;
- bits<6> MEGA_FETCH_COUNT;
-
- let Word0{4-0} = VC_INST;
- let Word0{6-5} = FETCH_TYPE;
- let Word0{7} = FETCH_WHOLE_QUAD;
- let Word0{15-8} = BUFFER_ID;
- let Word0{22-16} = SRC_GPR;
- let Word0{23} = SRC_REL;
- let Word0{25-24} = SRC_SEL_X;
- let Word0{31-26} = MEGA_FETCH_COUNT;
-}
-
-class VTX_WORD1_GPR {
- field bits<32> Word1;
- bits<7> DST_GPR;
- bits<1> DST_REL;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
- bits<1> USE_CONST_FIELDS;
- bits<6> DATA_FORMAT;
- bits<2> NUM_FORMAT_ALL;
- bits<1> FORMAT_COMP_ALL;
- bits<1> SRF_MODE_ALL;
-
- let Word1{6-0} = DST_GPR;
- let Word1{7} = DST_REL;
- let Word1{8} = 0; // Reserved
- let Word1{11-9} = DST_SEL_X;
- let Word1{14-12} = DST_SEL_Y;
- let Word1{17-15} = DST_SEL_Z;
- let Word1{20-18} = DST_SEL_W;
- let Word1{21} = USE_CONST_FIELDS;
- let Word1{27-22} = DATA_FORMAT;
- let Word1{29-28} = NUM_FORMAT_ALL;
- let Word1{30} = FORMAT_COMP_ALL;
- let Word1{31} = SRF_MODE_ALL;
-}
-
-class TEX_WORD0 {
- field bits<32> Word0;
-
- bits<5> TEX_INST;
- bits<2> INST_MOD;
- bits<1> FETCH_WHOLE_QUAD;
- bits<8> RESOURCE_ID;
- bits<7> SRC_GPR;
- bits<1> SRC_REL;
- bits<1> ALT_CONST;
- bits<2> RESOURCE_INDEX_MODE;
- bits<2> SAMPLER_INDEX_MODE;
-
- let Word0{4-0} = TEX_INST;
- let Word0{6-5} = INST_MOD;
- let Word0{7} = FETCH_WHOLE_QUAD;
- let Word0{15-8} = RESOURCE_ID;
- let Word0{22-16} = SRC_GPR;
- let Word0{23} = SRC_REL;
- let Word0{24} = ALT_CONST;
- let Word0{26-25} = RESOURCE_INDEX_MODE;
- let Word0{28-27} = SAMPLER_INDEX_MODE;
-}
-
-class TEX_WORD1 {
- field bits<32> Word1;
-
- bits<7> DST_GPR;
- bits<1> DST_REL;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
- bits<7> LOD_BIAS;
- bits<1> COORD_TYPE_X;
- bits<1> COORD_TYPE_Y;
- bits<1> COORD_TYPE_Z;
- bits<1> COORD_TYPE_W;
-
- let Word1{6-0} = DST_GPR;
- let Word1{7} = DST_REL;
- let Word1{11-9} = DST_SEL_X;
- let Word1{14-12} = DST_SEL_Y;
- let Word1{17-15} = DST_SEL_Z;
- let Word1{20-18} = DST_SEL_W;
- let Word1{27-21} = LOD_BIAS;
- let Word1{28} = COORD_TYPE_X;
- let Word1{29} = COORD_TYPE_Y;
- let Word1{30} = COORD_TYPE_Z;
- let Word1{31} = COORD_TYPE_W;
-}
-
-class TEX_WORD2 {
- field bits<32> Word2;
-
- bits<5> OFFSET_X;
- bits<5> OFFSET_Y;
- bits<5> OFFSET_Z;
- bits<5> SAMPLER_ID;
- bits<3> SRC_SEL_X;
- bits<3> SRC_SEL_Y;
- bits<3> SRC_SEL_Z;
- bits<3> SRC_SEL_W;
-
- let Word2{4-0} = OFFSET_X;
- let Word2{9-5} = OFFSET_Y;
- let Word2{14-10} = OFFSET_Z;
- let Word2{19-15} = SAMPLER_ID;
- let Word2{22-20} = SRC_SEL_X;
- let Word2{25-23} = SRC_SEL_Y;
- let Word2{28-26} = SRC_SEL_Z;
- let Word2{31-29} = SRC_SEL_W;
-}
-
-/*
-XXX: R600 subtarget uses a slightly different encoding than the other
-subtargets. We currently handle this in R600MCCodeEmitter, but we may
-want to use these instruction classes in the future.
-
-class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
-
- bits<1> fog_merge;
- bits<10> alu_inst;
-
- let Inst{37} = fog_merge;
- let Inst{39-38} = omod;
- let Inst{49-40} = alu_inst;
-}
-
-class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
-
- bits<11> alu_inst;
-
- let Inst{38-37} = omod;
- let Inst{49-39} = alu_inst;
-}
-*/
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
(ops PRED_SEL_OFF)>;
@@ -779,41 +513,6 @@ class ExportBufInst : InstR600ISA<(
// Control Flow Instructions
//===----------------------------------------------------------------------===//
-class CF_ALU_WORD0 {
- field bits<32> Word0;
-
- bits<22> ADDR;
- bits<4> KCACHE_BANK0;
- bits<4> KCACHE_BANK1;
- bits<2> KCACHE_MODE0;
-
- let Word0{21-0} = ADDR;
- let Word0{25-22} = KCACHE_BANK0;
- let Word0{29-26} = KCACHE_BANK1;
- let Word0{31-30} = KCACHE_MODE0;
-}
-
-class CF_ALU_WORD1 {
- field bits<32> Word1;
-
- bits<2> KCACHE_MODE1;
- bits<8> KCACHE_ADDR0;
- bits<8> KCACHE_ADDR1;
- bits<7> COUNT;
- bits<1> ALT_CONST;
- bits<4> CF_INST;
- bits<1> WHOLE_QUAD_MODE;
- bits<1> BARRIER;
-
- let Word1{1-0} = KCACHE_MODE1;
- let Word1{9-2} = KCACHE_ADDR0;
- let Word1{17-10} = KCACHE_ADDR1;
- let Word1{24-18} = COUNT;
- let Word1{25} = ALT_CONST;
- let Word1{29-26} = CF_INST;
- let Word1{30} = WHOLE_QUAD_MODE;
- let Word1{31} = BARRIER;
-}
def KCACHE : InstFlag<"printKCache">;
@@ -844,34 +543,6 @@ class CF_WORD0_R600 {
let Word0 = ADDR;
}
-class CF_WORD1_R600 {
- field bits<32> Word1;
-
- bits<3> POP_COUNT;
- bits<5> CF_CONST;
- bits<2> COND;
- bits<3> COUNT;
- bits<6> CALL_COUNT;
- bits<1> COUNT_3;
- bits<1> END_OF_PROGRAM;
- bits<1> VALID_PIXEL_MODE;
- bits<7> CF_INST;
- bits<1> WHOLE_QUAD_MODE;
- bits<1> BARRIER;
-
- let Word1{2-0} = POP_COUNT;
- let Word1{7-3} = CF_CONST;
- let Word1{9-8} = COND;
- let Word1{12-10} = COUNT;
- let Word1{18-13} = CALL_COUNT;
- let Word1{19} = COUNT_3;
- let Word1{21} = END_OF_PROGRAM;
- let Word1{22} = VALID_PIXEL_MODE;
- let Word1{29-23} = CF_INST;
- let Word1{30} = WHOLE_QUAD_MODE;
- let Word1{31} = BARRIER;
-}
-
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
@@ -890,38 +561,6 @@ ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
let Inst{63-32} = Word1;
}
-class CF_WORD0_EG {
- field bits<32> Word0;
-
- bits<24> ADDR;
- bits<3> JUMPTABLE_SEL;
-
- let Word0{23-0} = ADDR;
- let Word0{26-24} = JUMPTABLE_SEL;
-}
-
-class CF_WORD1_EG {
- field bits<32> Word1;
-
- bits<3> POP_COUNT;
- bits<5> CF_CONST;
- bits<2> COND;
- bits<6> COUNT;
- bits<1> VALID_PIXEL_MODE;
- bits<1> END_OF_PROGRAM;
- bits<8> CF_INST;
- bits<1> BARRIER;
-
- let Word1{2-0} = POP_COUNT;
- let Word1{7-3} = CF_CONST;
- let Word1{9-8} = COND;
- let Word1{15-10} = COUNT;
- let Word1{20} = VALID_PIXEL_MODE;
- let Word1{21} = END_OF_PROGRAM;
- let Word1{29-22} = CF_INST;
- let Word1{31} = BARRIER;
-}
-
class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
field bits<64> Inst;
--
1.8.1.5
-------------- next part --------------
>From 5792758d781c0ee779ed866f5c3bd79cc6dc7130 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Thu, 6 Jun 2013 15:13:10 -0400
Subject: [PATCH 2/4] R600: Factor the instruction encoding out the
RAT_WRITE_CACHELESS_eg class
---
lib/Target/R600/R600InstrFormats.td | 45 +++++++++++++++++++++++
lib/Target/R600/R600Instructions.td | 73 ++++++++++++-------------------------
2 files changed, 68 insertions(+), 50 deletions(-)
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
index 7cc48f4..618004f 100644
--- a/lib/Target/R600/R600InstrFormats.td
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -390,3 +390,48 @@ class CF_ALU_WORD1 {
let Word1{30} = WHOLE_QUAD_MODE;
let Word1{31} = BARRIER;
}
+
+class CF_ALLOC_EXPORT_WORD0_RAT {
+ field bits<32> Word0;
+
+ bits<4> rat_id;
+ bits<6> rat_inst;
+ bits<2> rim;
+ bits<2> type;
+ bits<7> rw_gpr;
+ bits<1> rw_rel;
+ bits<7> index_gpr;
+ bits<2> elem_size;
+
+ let Word0{3-0} = rat_id;
+ let Word0{9-4} = rat_inst;
+ let Word0{10} = 0; // Reserved
+ let Word0{12-11} = rim;
+ let Word0{14-13} = type;
+ let Word0{21-15} = rw_gpr;
+ let Word0{22} = rw_rel;
+ let Word0{29-23} = index_gpr;
+ let Word0{31-30} = elem_size;
+}
+
+class CF_ALLOC_EXPORT_WORD1_BUF {
+ field bits<32> Word1;
+
+ bits<12> array_size;
+ bits<4> comp_mask;
+ bits<4> burst_count;
+ bits<1> vpm;
+ bits<1> eop;
+ bits<8> cf_inst;
+ bits<1> mark;
+ bits<1> barrier;
+
+ let Word1{11-0} = array_size;
+ let Word1{15-12} = comp_mask;
+ let Word1{19-16} = burst_count;
+ let Word1{20} = vpm;
+ let Word1{21} = eop;
+ let Word1{29-22} = cf_inst;
+ let Word1{30} = mark;
+ let Word1{31} = barrier;
+}
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 7512cf4..9716fcf 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -235,45 +235,18 @@ def TEX_SHADOW_ARRAY : PatLeaf<
}]
>;
-class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
+class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, dag outs,
dag ins, string asm, list<dag> pattern> :
- InstR600ISA <outs, ins, asm, pattern> {
- bits<7> RW_GPR;
- bits<7> INDEX_GPR;
-
- bits<2> RIM;
- bits<2> TYPE;
- bits<1> RW_REL;
- bits<2> ELEM_SIZE;
-
- bits<12> ARRAY_SIZE;
- bits<4> COMP_MASK;
- bits<4> BURST_COUNT;
- bits<1> VPM;
- bits<1> eop;
- bits<1> MARK;
- bits<1> BARRIER;
-
- // CF_ALLOC_EXPORT_WORD0_RAT
- let Inst{3-0} = rat_id;
- let Inst{9-4} = rat_inst;
- let Inst{10} = 0; // Reserved
- let Inst{12-11} = RIM;
- let Inst{14-13} = TYPE;
- let Inst{21-15} = RW_GPR;
- let Inst{22} = RW_REL;
- let Inst{29-23} = INDEX_GPR;
- let Inst{31-30} = ELEM_SIZE;
-
- // CF_ALLOC_EXPORT_WORD1_BUF
- let Inst{43-32} = ARRAY_SIZE;
- let Inst{47-44} = COMP_MASK;
- let Inst{51-48} = BURST_COUNT;
- let Inst{52} = VPM;
- let Inst{53} = eop;
- let Inst{61-54} = cf_inst;
- let Inst{62} = MARK;
- let Inst{63} = BARRIER;
+ InstR600ISA <outs, ins, asm, pattern>,
+ CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF {
+
+ let cf_inst = cfinst;
+ let rat_inst = ratinst;
+ let rat_id = ratid;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
}
class LoadParamFrag <PatFrag load_type> : PatFrag <
@@ -1396,21 +1369,21 @@ let hasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
let usesCustomInserter = 1 in {
-class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name,
list<dag> pattern>
: EG_CF_RAT <0x57, 0x2, 0, (outs), ins, name, pattern> {
- let RIM = 0;
+ let rim = 0;
// XXX: Have a separate instruction for non-indexed writes.
- let TYPE = 1;
- let RW_REL = 0;
- let ELEM_SIZE = 0;
-
- let ARRAY_SIZE = 0;
- let COMP_MASK = comp_mask;
- let BURST_COUNT = 0;
- let VPM = 0;
- let MARK = 0;
- let BARRIER = 1;
+ let type = 1;
+ let rw_rel = 0;
+ let elem_size = 0;
+
+ let array_size = 0;
+ let comp_mask = mask;
+ let burst_count = 0;
+ let vpm = 0;
+ let mark = 0;
+ let barrier = 1;
}
} // End usesCustomInserter = 1
--
1.8.1.5
-------------- next part --------------
>From 9824c3ccf395acfb7cb9089788514981ee956a9c Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Thu, 6 Jun 2013 19:17:00 -0400
Subject: [PATCH 3/4] R600: Use EXPORT_RAT_INST_STORE_DWORD for stores on
Cayman
We were using RAT_INST_STORE_RAW, which seemed to work, but the docs
say this instruction doesn't exist for Cayman, so it's probably safer
to use a documented instruction instead.
---
lib/Target/R600/R600ControlFlowFinalizer.cpp | 1 +
lib/Target/R600/R600Instructions.td | 93 +++++++++++++++++-----------
test/CodeGen/R600/store.ll | 3 +
3 files changed, 60 insertions(+), 37 deletions(-)
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index ab29d60..887c808 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -370,6 +370,7 @@ public:
case AMDGPU::R600_ExportSwz:
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
+ case AMDGPU::RAT_STORE_DWORD_cm:
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 9716fcf..86ddd00 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -235,14 +235,26 @@ def TEX_SHADOW_ARRAY : PatLeaf<
}]
>;
-class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, dag outs,
+class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>,
CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF {
- let cf_inst = cfinst;
+ let rat_id = 0;
let rat_inst = ratinst;
- let rat_id = ratid;
+ let rim = 0;
+ // XXX: Have a separate instruction for non-indexed writes.
+ let type = 1;
+ let rw_rel = 0;
+ let elem_size = 0;
+
+ let array_size = 0;
+ let comp_mask = mask;
+ let burst_count = 0;
+ let vpm = 0;
+ let cf_inst = cfinst;
+ let mark = 0;
+ let barrier = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
@@ -1210,6 +1222,33 @@ def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
def : SIN_PAT <SIN_eg>;
def : COS_PAT <COS_eg>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name,
+ list<dag> pattern>
+ : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> {
+}
+
+} // End usesCustomInserter = 1
+
+// 32-bit store
+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop",
+ [(global_store i32:$rw_gpr, i32:$index_gpr)]
+>;
+
+//128-bit store
+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop",
+ [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
+>;
+
} // End Predicates = [isEG]
//===----------------------------------------------------------------------===//
@@ -1367,40 +1406,6 @@ let hasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1 in {
-
-class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name,
- list<dag> pattern>
- : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, name, pattern> {
- let rim = 0;
- // XXX: Have a separate instruction for non-indexed writes.
- let type = 1;
- let rw_rel = 0;
- let elem_size = 0;
-
- let array_size = 0;
- let comp_mask = mask;
- let burst_count = 0;
- let vpm = 0;
- let mark = 0;
- let barrier = 1;
-}
-
-} // End usesCustomInserter = 1
-
-// 32-bit store
-def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
- (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop",
- [(global_store i32:$rw_gpr, i32:$index_gpr)]
->;
-
-//128-bit store
-def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
- (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop",
- [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
->;
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>,
@@ -1575,6 +1580,10 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+//===----------------------------------------------------------------------===//
+// Cayman Instructions
+//===----------------------------------------------------------------------===//
+
let Predicates = [isCayman] in {
let isVector = 1 in {
@@ -1616,6 +1625,16 @@ def : Pat <
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
+
+def RAT_STORE_DWORD_cm : EG_CF_RAT <
+ 0x57, 0x14, 0x1, (outs),
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
+ "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr",
+ [(global_store i32:$rw_gpr, i32:$index_gpr)]
+> {
+ let eop = 0; // This bit is not used on Cayman.
+}
+
} // End isCayman
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index 4d673f3..e87229a 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -1,9 +1,12 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
; floating-point store
; EG-CHECK: @store_f32
; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1
+; CM-CHECK: @store_f32
+; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
; SI-CHECK: @store_f32
; SI-CHECK: BUFFER_STORE_DWORD
--
1.8.1.5
-------------- next part --------------
>From f1e7087470b5eaaf2232f6cab2bcf9fffd5b6962 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Fri, 7 Jun 2013 11:37:31 -0400
Subject: [PATCH 4/4] R600: Use correct encoding for Vertex Fetch instructions
on Cayman
---
lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 4 +-
lib/Target/R600/R600InstrFormats.td | 20 +-
lib/Target/R600/R600Instructions.td | 392 ++++++++++++++-------
test/CodeGen/R600/vertex-fetch-encoding.ll | 14 +
4 files changed, 291 insertions(+), 139 deletions(-)
create mode 100644 test/CodeGen/R600/vertex-fetch-encoding.ll
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 4d6c25c..f470783 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -99,7 +99,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
} else if (IS_VTX(Desc)) {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
- InstWord2 |= 1 << 19;
+ if (!(STI.getFeatureBits() & AMDGPU::FeatureCaymanISA)) {
+ InstWord2 |= 1 << 19; // Mega-Fetch bit
+ }
Emit(InstWord01, OS);
Emit(InstWord2, OS);
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
index 618004f..1aacda3 100644
--- a/lib/Target/R600/R600InstrFormats.td
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -173,7 +173,6 @@ class VTX_WORD0 {
bits<8> BUFFER_ID;
bits<1> SRC_REL;
bits<2> SRC_SEL_X;
- bits<6> MEGA_FETCH_COUNT;
let Word0{4-0} = VC_INST;
let Word0{6-5} = FETCH_TYPE;
@@ -182,9 +181,28 @@ class VTX_WORD0 {
let Word0{22-16} = SRC_GPR;
let Word0{23} = SRC_REL;
let Word0{25-24} = SRC_SEL_X;
+}
+
+class VTX_WORD0_eg : VTX_WORD0 {
+
+ bits<6> MEGA_FETCH_COUNT;
+
let Word0{31-26} = MEGA_FETCH_COUNT;
}
+class VTX_WORD0_cm : VTX_WORD0 {
+
+ bits<2> SRC_SEL_Y;
+ bits<2> STRUCTURED_READ;
+ bits<1> LDS_REQ;
+ bits<1> COALESCED_READ;
+
+ let Word0{27-26} = SRC_SEL_Y;
+ let Word0{29-28} = STRUCTURED_READ;
+ let Word0{30} = LDS_REQ;
+ let Word0{31} = COALESCED_READ;
+}
+
class VTX_WORD1_GPR {
field bits<32> Word1;
bits<7> DST_GPR;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 86ddd00..732a92c 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -261,6 +261,50 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
}
+class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>,
+ VTX_WORD1_GPR {
+
+ // Static fields
+ let DST_REL = 0;
+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+ // however, based on my testing if USE_CONST_FIELDS is set, then all
+ // these fields need to be set to 0.
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 1;
+ let FORMAT_COMP_ALL = 0;
+ let SRF_MODE_ALL = 0;
+
+ let Inst{63-32} = Word1;
+ // LLVM can only encode 64-bit instructions, so these fields are manually
+ // encoded in R600CodeEmitter
+ //
+ // bits<16> OFFSET;
+ // bits<2> ENDIAN_SWAP = 0;
+ // bits<1> CONST_BUF_NO_STRIDE = 0;
+ // bits<1> MEGA_FETCH = 0;
+ // bits<1> ALT_CONST = 0;
+ // bits<2> BUFFER_INDEX_MODE = 0;
+
+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+ // is done in R600CodeEmitter
+ //
+ // Inst{79-64} = OFFSET;
+ // Inst{81-80} = ENDIAN_SWAP;
+ // Inst{82} = CONST_BUF_NO_STRIDE;
+ // Inst{83} = MEGA_FETCH;
+ // Inst{84} = ALT_CONST;
+ // Inst{86-85} = BUFFER_INDEX_MODE;
+ // Inst{95-86} = 0; Reserved
+
+ // VTX_WORD3 (Padding)
+ //
+ // Inst{127-96} = 0;
+
+ let VTXInst = 1;
+}
+
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
@@ -1249,6 +1293,133 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
+
+ // Static fields
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = buffer_id;
+ let SRC_REL = 0;
+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
+ // to store vertex addresses in any channel, not just X.
+ let SRC_SEL_X = 0;
+
+ let Inst{31-0} = Word0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 1;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+ let MEGA_FETCH_COUNT = 2;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 4;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $ptr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+ // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+ [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+ [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+ [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+ [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+ [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+ [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Constant Loads
+// XXX: We are currently storing all constants in the global address space.
+//===----------------------------------------------------------------------===//
+
+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
+ [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
+>;
+
+
} // End Predicates = [isEG]
//===----------------------------------------------------------------------===//
@@ -1403,13 +1574,71 @@ let hasSideEffects = 1 in {
let END_OF_PROGRAM = 1;
}
+} // End Predicates = [isEGorCayman]
+
//===----------------------------------------------------------------------===//
-// Memory read/write instructions
+// Regist loads and stores - for indirect addressing
//===----------------------------------------------------------------------===//
-class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
- : InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>,
- VTX_WORD1_GPR, VTX_WORD0 {
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
+//===----------------------------------------------------------------------===//
+// Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isCayman] in {
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
+} // End isVector = 1
+
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
+def : SIN_PAT <SIN_cm>;
+def : COS_PAT <COS_cm>;
+
+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+
+// RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+ (AMDGPUurecip i32:$src0),
+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
+ (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
+>;
+
+ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
+ let ADDR = 0;
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+
+def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
+
+
+def RAT_STORE_DWORD_cm : EG_CF_RAT <
+ 0x57, 0x14, 0x1, (outs),
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
+ "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr",
+ [(global_store i32:$rw_gpr, i32:$index_gpr)]
+> {
+ let eop = 0; // This bit is not used on Cayman.
+}
+
+class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
// Static fields
let VC_INST = 0;
@@ -1420,53 +1649,18 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
// XXX: We can infer this field based on the SRC_GPR. This would allow us
// to store vertex addresses in any channel, not just X.
let SRC_SEL_X = 0;
- let DST_REL = 0;
- // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
- // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
- // however, based on my testing if USE_CONST_FIELDS is set, then all
- // these fields need to be set to 0.
- let USE_CONST_FIELDS = 0;
- let NUM_FORMAT_ALL = 1;
- let FORMAT_COMP_ALL = 0;
- let SRF_MODE_ALL = 0;
+ let SRC_SEL_Y = 0;
+ let STRUCTURED_READ = 0;
+ let LDS_REQ = 0;
+ let COALESCED_READ = 0;
let Inst{31-0} = Word0;
- let Inst{63-32} = Word1;
- // LLVM can only encode 64-bit instructions, so these fields are manually
- // encoded in R600CodeEmitter
- //
- // bits<16> OFFSET;
- // bits<2> ENDIAN_SWAP = 0;
- // bits<1> CONST_BUF_NO_STRIDE = 0;
- // bits<1> MEGA_FETCH = 0;
- // bits<1> ALT_CONST = 0;
- // bits<2> BUFFER_INDEX_MODE = 0;
-
-
-
- // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
- // is done in R600CodeEmitter
- //
- // Inst{79-64} = OFFSET;
- // Inst{81-80} = ENDIAN_SWAP;
- // Inst{82} = CONST_BUF_NO_STRIDE;
- // Inst{83} = MEGA_FETCH;
- // Inst{84} = ALT_CONST;
- // Inst{86-85} = BUFFER_INDEX_MODE;
- // Inst{95-86} = 0; Reserved
-
- // VTX_WORD3 (Padding)
- //
- // Inst{127-96} = 0;
-
- let VTXInst = 1;
}
-class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
+class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_cm <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
pattern> {
- let MEGA_FETCH_COUNT = 1;
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
@@ -1474,10 +1668,9 @@ class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
let DATA_FORMAT = 1; // FMT_8
}
-class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
+class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_cm <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
pattern> {
- let MEGA_FETCH_COUNT = 2;
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
@@ -1486,11 +1679,10 @@ class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
}
-class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
- pattern> {
+class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_cm <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
- let MEGA_FETCH_COUNT = 4;
let DST_SEL_X = 0;
let DST_SEL_Y = 7; // Masked
let DST_SEL_Z = 7; // Masked
@@ -1507,11 +1699,10 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
let Constraints = "$ptr.ptr = $dst";
}
-class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
- : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst),
+class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_cm <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst),
pattern> {
- let MEGA_FETCH_COUNT = 16;
let DST_SEL_X = 0;
let DST_SEL_Y = 1;
let DST_SEL_Z = 2;
@@ -1527,20 +1718,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
//===----------------------------------------------------------------------===//
// VTX Read from parameter memory space
//===----------------------------------------------------------------------===//
-
-def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
[(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
>;
-def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
[(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
>;
-def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
[(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
>;
-def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
[(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
>;
@@ -1549,92 +1739,20 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
//===----------------------------------------------------------------------===//
// 8-bit reads
-def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1,
[(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
>;
// 32-bit reads
-def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1,
[(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
>;
// 128-bit reads
-def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1,
[(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
>;
-//===----------------------------------------------------------------------===//
-// Constant Loads
-// XXX: We are currently storing all constants in the global address space.
-//===----------------------------------------------------------------------===//
-
-def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
- [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
->;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Regist loads and stores - for indirect addressing
-//===----------------------------------------------------------------------===//
-
-defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
-
-//===----------------------------------------------------------------------===//
-// Cayman Instructions
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isCayman] in {
-
-let isVector = 1 in {
-
-def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
-
-def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
-def MULHI_INT_cm : MULHI_INT_Common<0x90>;
-def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
-def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
-def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
-def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
-def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
-def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
-def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
-def SIN_cm : SIN_Common<0x8D>;
-def COS_cm : COS_Common<0x8E>;
-} // End isVector = 1
-
-def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
-def : SIN_PAT <SIN_cm>;
-def : COS_PAT <COS_cm>;
-
-defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
-
-// RECIP_UINT emulation for Cayman
-// The multiplication scales from [0,1] to the unsigned integer range
-def : Pat <
- (AMDGPUurecip i32:$src0),
- (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
- (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
->;
-
- def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
- let ADDR = 0;
- let POP_COUNT = 0;
- let COUNT = 0;
- }
-
-def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
-
-def RAT_STORE_DWORD_cm : EG_CF_RAT <
- 0x57, 0x14, 0x1, (outs),
- (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
- "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr",
- [(global_store i32:$rw_gpr, i32:$index_gpr)]
-> {
- let eop = 0; // This bit is not used on Cayman.
-}
-
} // End isCayman
//===----------------------------------------------------------------------===//
@@ -1755,7 +1873,7 @@ def CONST_COPY : Instruction {
def TEX_VTX_CONSTBUF :
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
[(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
- VTX_WORD1_GPR, VTX_WORD0 {
+ VTX_WORD1_GPR, VTX_WORD0_eg {
let VC_INST = 0;
let FETCH_TYPE = 2;
@@ -1809,7 +1927,7 @@ def TEX_VTX_CONSTBUF :
def TEX_VTX_TEXBUF:
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
[(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
-VTX_WORD1_GPR, VTX_WORD0 {
+VTX_WORD1_GPR, VTX_WORD0_eg {
let VC_INST = 0;
let FETCH_TYPE = 2;
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
new file mode 100644
index 0000000..3033742
--- /dev/null
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
+
+; NI-CHECK: @vtx_fetch
+; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
+; CM-CHECK: @vtx_fetch
+; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
+
+define void @vtx_fetch(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+ %0 = load i32 addrspace(1)* %in
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
--
1.8.1.5
More information about the llvm-commits
mailing list