[PATCH 1/2] R600/SI: add Gather4 intrinsics (v2)
Marek Olšák
maraeo at gmail.com
Wed Jun 11 17:11:10 PDT 2014
From: Marek Olšák <marek.olsak at amd.com>
This adds a new type of intrinsic and SDNode: SampleRaw.
All fields of the MIMG opcodes are exposed and can be set by Mesa,
even DMASK. All GATHER4 variants are added and there are a lot of them.
v2: document DMASK behavior
---
lib/Target/R600/AMDGPUISelLowering.cpp | 24 +++++++++
lib/Target/R600/AMDGPUISelLowering.h | 31 +++++++++++
lib/Target/R600/SIISelLowering.cpp | 72 +++++++++++++++++++++++++
lib/Target/R600/SIISelLowering.h | 2 +
lib/Target/R600/SIInstrInfo.td | 91 ++++++++++++++++++++++++++++++++
lib/Target/R600/SIInstructions.td | 96 +++++++++++++++++++++++++---------
lib/Target/R600/SIIntrinsics.td | 48 +++++++++++++++++
7 files changed, 340 insertions(+), 24 deletions(-)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 849f169..359161c 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1542,6 +1542,30 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
NODE_NAME_CASE(SAMPLEL)
+ NODE_NAME_CASE(GATHER4)
+ NODE_NAME_CASE(GATHER4_CL)
+ NODE_NAME_CASE(GATHER4_L)
+ NODE_NAME_CASE(GATHER4_B)
+ NODE_NAME_CASE(GATHER4_B_CL)
+ NODE_NAME_CASE(GATHER4_LZ)
+ NODE_NAME_CASE(GATHER4_C)
+ NODE_NAME_CASE(GATHER4_C_CL)
+ NODE_NAME_CASE(GATHER4_C_L)
+ NODE_NAME_CASE(GATHER4_C_B)
+ NODE_NAME_CASE(GATHER4_C_B_CL)
+ NODE_NAME_CASE(GATHER4_C_LZ)
+ NODE_NAME_CASE(GATHER4_O)
+ NODE_NAME_CASE(GATHER4_CL_O)
+ NODE_NAME_CASE(GATHER4_L_O)
+ NODE_NAME_CASE(GATHER4_B_O)
+ NODE_NAME_CASE(GATHER4_B_CL_O)
+ NODE_NAME_CASE(GATHER4_LZ_O)
+ NODE_NAME_CASE(GATHER4_C_O)
+ NODE_NAME_CASE(GATHER4_C_CL_O)
+ NODE_NAME_CASE(GATHER4_C_L_O)
+ NODE_NAME_CASE(GATHER4_C_B_O)
+ NODE_NAME_CASE(GATHER4_C_B_CL_O)
+ NODE_NAME_CASE(GATHER4_C_LZ_O)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index d5d821d..a9af195 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -203,6 +203,37 @@ enum {
SAMPLEB,
SAMPLED,
SAMPLEL,
+
+ // Gather4 opcodes
+ GATHER4,
+ GATHER4_CL,
+ GATHER4_L,
+ GATHER4_B,
+ GATHER4_B_CL,
+ GATHER4_LZ,
+
+ GATHER4_C,
+ GATHER4_C_CL,
+ GATHER4_C_L,
+ GATHER4_C_B,
+ GATHER4_C_B_CL,
+ GATHER4_C_LZ,
+
+ GATHER4_O,
+ GATHER4_CL_O,
+ GATHER4_L_O,
+ GATHER4_B_O,
+ GATHER4_B_CL_O,
+ GATHER4_LZ_O,
+
+ GATHER4_C_O,
+ GATHER4_C_CL_O,
+ GATHER4_C_L_O,
+ GATHER4_C_B_O,
+ GATHER4_C_B_CL_O,
+ GATHER4_C_LZ_O,
+
+ // Nemory opcodes
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 1a861d4..909255d 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ // Gather4 intrinsics
+ case AMDGPUIntrinsic::SI_gather4:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_l:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_b:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_b_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_lz:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
+
+ case AMDGPUIntrinsic::SI_gather4_c:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_l:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_b:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_b_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_lz:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG);
+
+ case AMDGPUIntrinsic::SI_gather4_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_cl_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_l_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_b_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_b_cl_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_lz_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ_O, Op, DAG);
+
+ case AMDGPUIntrinsic::SI_gather4_c_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_cl_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_l_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_b_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_b_cl_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL_O, Op, DAG);
+ case AMDGPUIntrinsic::SI_gather4_c_lz_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ_O, Op, DAG);
}
}
@@ -876,6 +929,25 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
Op.getOperand(4));
}
+SDValue SITargetLowering::LowerSampleRawIntrinsic(unsigned Opcode,
+ const SDValue &Op,
+ SelectionDAG &DAG) const {
+ SDValue Ops[] = {
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10),
+ Op.getOperand(11)
+ };
+ return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Ops);
+}
+
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() != MVT::i64)
return SDValue();
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index c6eaa81..b48da3b 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue Chain, unsigned Offset, bool Signed) const;
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
SelectionDAG &DAG) const;
+ SDValue LowerSampleRawIntrinsic(unsigned Opcode, const SDValue &Op,
+ SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 3368d49..23a7ca3 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -57,6 +57,50 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
+class SDSampleRaw<string opcode> : SDNode <opcode,
+ SDTypeProfile<1, 11,
+ [SDTCisVT<0, v4f32>, // vdata(VGPR)
+ SDTCisVT<2, v32i8>, // rsrc(SGPR)
+ SDTCisVT<3, v4i32>, // sampler(SGPR)
+ SDTCisVT<4, i32>, // dmask(imm)
+ SDTCisVT<5, i32>, // unorm(imm)
+ SDTCisVT<6, i32>, // r128(imm)
+ SDTCisVT<7, i32>, // da(imm)
+ SDTCisVT<8, i32>, // glc(imm)
+ SDTCisVT<9, i32>, // slc(imm)
+ SDTCisVT<10, i32>, // tfe(imm)
+ SDTCisVT<11, i32> // lwe(imm)
+ ]>
+>;
+
+def SIgather4 : SDSampleRaw<"AMDGPUISD::GATHER4">;
+def SIgather4_cl : SDSampleRaw<"AMDGPUISD::GATHER4_CL">;
+def SIgather4_l : SDSampleRaw<"AMDGPUISD::GATHER4_L">;
+def SIgather4_b : SDSampleRaw<"AMDGPUISD::GATHER4_B">;
+def SIgather4_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL">;
+def SIgather4_lz : SDSampleRaw<"AMDGPUISD::GATHER4_LZ">;
+
+def SIgather4_c : SDSampleRaw<"AMDGPUISD::GATHER4_C">;
+def SIgather4_c_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL">;
+def SIgather4_c_l : SDSampleRaw<"AMDGPUISD::GATHER4_C_L">;
+def SIgather4_c_b : SDSampleRaw<"AMDGPUISD::GATHER4_C_B">;
+def SIgather4_c_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL">;
+def SIgather4_c_lz : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ">;
+
+def SIgather4_o : SDSampleRaw<"AMDGPUISD::GATHER4_O">;
+def SIgather4_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_CL_O">;
+def SIgather4_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_L_O">;
+def SIgather4_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_O">;
+def SIgather4_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL_O">;
+def SIgather4_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_LZ_O">;
+
+def SIgather4_c_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_O">;
+def SIgather4_c_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL_O">;
+def SIgather4_c_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_L_O">;
+def SIgather4_c_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_O">;
+def SIgather4_c_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL_O">;
+def SIgather4_c_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ_O">;
+
// Transformation function, extract the lower 32bit of a 64bit immediate
def LO32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
@@ -658,6 +702,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
}
+class MIMG_Gather_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
+ op,
+ (outs dst_rc:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+ SReg_256:$srsrc, SReg_128:$ssamp),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+
+ // DMASK was repurposed for GATHER4. 4 components are always
+ // returned and DMASK works like a swizzle - it selects
+ // the component to fetch. The only useful DMASK values are
+ // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ // (red,red,red,red) etc.) The ISA document doesn't mention
+ // this.
+ // Therefore, disable all code which updates DMASK by setting these two:
+ let MIMG = 0;
+ let hasPostISelHook = 0;
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+ def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
+ MIMG_Mask<asm#"_V8", channels>;
+ def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
+ MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm> {
+ defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
+}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index d4a7c5c..d65d88b 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -887,30 +887,30 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
+defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
+defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
+defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
+defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
@@ -1655,6 +1655,54 @@ def : SextInReg <i16, 16>;
/********** Image sampling patterns **********/
/********** ======================= **********/
+class SampleRawPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
+ i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+ (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+ (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+ $addr, $rsrc, $sampler)
+>;
+
+// Gather4 patterns. Only the variants which make sense are defined.
+def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V2, v2i32>;
+def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
+def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
+
+def : SampleRawPattern<SIgather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
+
+def : SampleRawPattern<SIgather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
+
+def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
/* SIsample for simple 1D texture lookup */
def : Pat <
(SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 00e32c0..9d85f17 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -56,11 +56,59 @@ let TargetPrefix = "SI", isTarget = 1 in {
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+ // Fully-flexible SAMPLE instruction.
+ class SampleRaw : Intrinsic <
+ [llvm_v4f32_ty], // vdata(VGPR)
+ [llvm_anyvector_ty, // vaddr(VGPR)
+ llvm_v32i8_ty, // rsrc(SGPR)
+ llvm_v16i8_ty, // sampler(SGPR)
+ llvm_i32_ty, // dmask(imm)
+ llvm_i32_ty, // unorm(imm)
+ llvm_i32_ty, // r128(imm)
+ llvm_i32_ty, // da(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty, // tfe(imm)
+ llvm_i32_ty], // lwe(imm)
+ [IntrNoMem]>;
+
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
def int_SI_sampled : Sample;
def int_SI_samplel : Sample;
+ // Basic gather4
+ def int_SI_gather4 : SampleRaw;
+ def int_SI_gather4_cl : SampleRaw;
+ def int_SI_gather4_l : SampleRaw;
+ def int_SI_gather4_b : SampleRaw;
+ def int_SI_gather4_b_cl : SampleRaw;
+ def int_SI_gather4_lz : SampleRaw;
+
+ // Gather4 with comparison
+ def int_SI_gather4_c : SampleRaw;
+ def int_SI_gather4_c_cl : SampleRaw;
+ def int_SI_gather4_c_l : SampleRaw;
+ def int_SI_gather4_c_b : SampleRaw;
+ def int_SI_gather4_c_b_cl : SampleRaw;
+ def int_SI_gather4_c_lz : SampleRaw;
+
+ // Gather4 with offsets
+ def int_SI_gather4_o : SampleRaw;
+ def int_SI_gather4_cl_o : SampleRaw;
+ def int_SI_gather4_l_o : SampleRaw;
+ def int_SI_gather4_b_o : SampleRaw;
+ def int_SI_gather4_b_cl_o : SampleRaw;
+ def int_SI_gather4_lz_o : SampleRaw;
+
+ // Gather4 with comparison and offsets
+ def int_SI_gather4_c_o : SampleRaw;
+ def int_SI_gather4_c_cl_o : SampleRaw;
+ def int_SI_gather4_c_l_o : SampleRaw;
+ def int_SI_gather4_c_b_o : SampleRaw;
+ def int_SI_gather4_c_b_cl_o : SampleRaw;
+ def int_SI_gather4_c_lz_o : SampleRaw;
+
def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
--
1.9.1
More information about the llvm-commits
mailing list