[Mesa-dev] [PATCH 1/2] R600/SI: add Gather4 intrinsics (v2)

Marek Olšák maraeo at gmail.com
Mon Jun 16 12:19:59 PDT 2014


Why are there SDNodes for the other "sample" intrinsics then?

Marek

On Mon, Jun 16, 2014 at 5:45 PM, Tom Stellard <tom at stellard.net> wrote:
> On Thu, Jun 12, 2014 at 02:11:10AM +0200, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> This adds a new type of intrinsic and SDNode: SampleRaw.
>> All fields of the MIMG opcodes are exposed and can be set by Mesa,
>> even DMASK. All GATHER4 variants are added and there are a lot of them.
>>
>> v2: document DMASK behavior
>> ---
>>  lib/Target/R600/AMDGPUISelLowering.cpp | 24 +++++++++
>>  lib/Target/R600/AMDGPUISelLowering.h   | 31 +++++++++++
>>  lib/Target/R600/SIISelLowering.cpp     | 72 +++++++++++++++++++++++++
>>  lib/Target/R600/SIISelLowering.h       |  2 +
>>  lib/Target/R600/SIInstrInfo.td         | 91 ++++++++++++++++++++++++++++++++
>>  lib/Target/R600/SIInstructions.td      | 96 +++++++++++++++++++++++++---------
>>  lib/Target/R600/SIIntrinsics.td        | 48 +++++++++++++++++
>>  7 files changed, 340 insertions(+), 24 deletions(-)
>>
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
>> index 849f169..359161c 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -1542,6 +1542,30 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
>>    NODE_NAME_CASE(SAMPLEB)
>>    NODE_NAME_CASE(SAMPLED)
>>    NODE_NAME_CASE(SAMPLEL)
>> +  NODE_NAME_CASE(GATHER4)
>> +  NODE_NAME_CASE(GATHER4_CL)
>> +  NODE_NAME_CASE(GATHER4_L)
>> +  NODE_NAME_CASE(GATHER4_B)
>> +  NODE_NAME_CASE(GATHER4_B_CL)
>> +  NODE_NAME_CASE(GATHER4_LZ)
>> +  NODE_NAME_CASE(GATHER4_C)
>> +  NODE_NAME_CASE(GATHER4_C_CL)
>> +  NODE_NAME_CASE(GATHER4_C_L)
>> +  NODE_NAME_CASE(GATHER4_C_B)
>> +  NODE_NAME_CASE(GATHER4_C_B_CL)
>> +  NODE_NAME_CASE(GATHER4_C_LZ)
>> +  NODE_NAME_CASE(GATHER4_O)
>> +  NODE_NAME_CASE(GATHER4_CL_O)
>> +  NODE_NAME_CASE(GATHER4_L_O)
>> +  NODE_NAME_CASE(GATHER4_B_O)
>> +  NODE_NAME_CASE(GATHER4_B_CL_O)
>> +  NODE_NAME_CASE(GATHER4_LZ_O)
>> +  NODE_NAME_CASE(GATHER4_C_O)
>> +  NODE_NAME_CASE(GATHER4_C_CL_O)
>> +  NODE_NAME_CASE(GATHER4_C_L_O)
>> +  NODE_NAME_CASE(GATHER4_C_B_O)
>> +  NODE_NAME_CASE(GATHER4_C_B_CL_O)
>> +  NODE_NAME_CASE(GATHER4_C_LZ_O)
>
> You don't need to add new SDNodes for all these instructions, you can just use
> the intrinsic directly in the pattern.
>
> The only reason to add SDNodes, is if there are optimizations / special lowering
> we can do for these instructions.
>
>>    NODE_NAME_CASE(STORE_MSKOR)
>>    NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
>>    }
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
>> index d5d821d..a9af195 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.h
>> +++ b/lib/Target/R600/AMDGPUISelLowering.h
>> @@ -203,6 +203,37 @@ enum {
>>    SAMPLEB,
>>    SAMPLED,
>>    SAMPLEL,
>> +
>> +  // Gather4 opcodes
>> +  GATHER4,
>> +  GATHER4_CL,
>> +  GATHER4_L,
>> +  GATHER4_B,
>> +  GATHER4_B_CL,
>> +  GATHER4_LZ,
>> +
>> +  GATHER4_C,
>> +  GATHER4_C_CL,
>> +  GATHER4_C_L,
>> +  GATHER4_C_B,
>> +  GATHER4_C_B_CL,
>> +  GATHER4_C_LZ,
>> +
>> +  GATHER4_O,
>> +  GATHER4_CL_O,
>> +  GATHER4_L_O,
>> +  GATHER4_B_O,
>> +  GATHER4_B_CL_O,
>> +  GATHER4_LZ_O,
>> +
>> +  GATHER4_C_O,
>> +  GATHER4_C_CL_O,
>> +  GATHER4_C_L_O,
>> +  GATHER4_C_B_O,
>> +  GATHER4_C_B_CL_O,
>> +  GATHER4_C_LZ_O,
>> +
>> +  // Nemory opcodes
>>    FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>>    STORE_MSKOR,
>>    LOAD_CONSTANT,
>> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
>> index 1a861d4..909255d 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
>>                           Op.getOperand(1),
>>                           Op.getOperand(2),
>>                           Op.getOperand(3));
>> +
>> +    // Gather4 intrinsics
>> +    case AMDGPUIntrinsic::SI_gather4:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_cl:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_l:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_b:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_b_cl:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_lz:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
>> +
>> +    case AMDGPUIntrinsic::SI_gather4_c:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_cl:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_l:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_b:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_b_cl:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_lz:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG);
>> +
>> +    case AMDGPUIntrinsic::SI_gather4_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_cl_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_l_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_b_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_b_cl_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_lz_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ_O, Op, DAG);
>> +
>> +    case AMDGPUIntrinsic::SI_gather4_c_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_cl_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_l_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_b_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_b_cl_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL_O, Op, DAG);
>> +    case AMDGPUIntrinsic::SI_gather4_c_lz_o:
>> +      return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ_O, Op, DAG);
>>      }
>>    }
>>
>> @@ -876,6 +929,25 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
>>                       Op.getOperand(4));
>>  }
>>
>> +SDValue SITargetLowering::LowerSampleRawIntrinsic(unsigned Opcode,
>> +                                                  const SDValue &Op,
>> +                                                  SelectionDAG &DAG) const {
>> +  SDValue Ops[] = {
>> +    Op.getOperand(1),
>> +    Op.getOperand(2),
>> +    Op.getOperand(3),
>> +    Op.getOperand(4),
>> +    Op.getOperand(5),
>> +    Op.getOperand(6),
>> +    Op.getOperand(7),
>> +    Op.getOperand(8),
>> +    Op.getOperand(9),
>> +    Op.getOperand(10),
>> +    Op.getOperand(11)
>> +  };
>> +  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Ops);
>> +}
>> +
>>  SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
>>    if (Op.getValueType() != MVT::i64)
>>      return SDValue();
>> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
>> index c6eaa81..b48da3b 100644
>> --- a/lib/Target/R600/SIISelLowering.h
>> +++ b/lib/Target/R600/SIISelLowering.h
>> @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
>>                           SDValue Chain, unsigned Offset, bool Signed) const;
>>    SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
>>                                 SelectionDAG &DAG) const;
>> +  SDValue LowerSampleRawIntrinsic(unsigned Opcode, const SDValue &Op,
>> +                                  SelectionDAG &DAG) const;
>>    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
>>    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
>> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
>> index 3368d49..23a7ca3 100644
>> --- a/lib/Target/R600/SIInstrInfo.td
>> +++ b/lib/Target/R600/SIInstrInfo.td
>> @@ -57,6 +57,50 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
>>  def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
>>  def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
>>
>> +class SDSampleRaw<string opcode> : SDNode <opcode,
>> +  SDTypeProfile<1, 11,
>> +    [SDTCisVT<0, v4f32>, // vdata(VGPR)
>> +     SDTCisVT<2, v32i8>, // rsrc(SGPR)
>> +     SDTCisVT<3, v4i32>, // sampler(SGPR)
>> +     SDTCisVT<4, i32>,   // dmask(imm)
>> +     SDTCisVT<5, i32>,   // unorm(imm)
>> +     SDTCisVT<6, i32>,   // r128(imm)
>> +     SDTCisVT<7, i32>,   // da(imm)
>> +     SDTCisVT<8, i32>,   // glc(imm)
>> +     SDTCisVT<9, i32>,   // slc(imm)
>> +     SDTCisVT<10, i32>,   // tfe(imm)
>> +     SDTCisVT<11, i32>   // lwe(imm)
>> +    ]>
>> +>;
>> +
>> +def SIgather4 : SDSampleRaw<"AMDGPUISD::GATHER4">;
>> +def SIgather4_cl : SDSampleRaw<"AMDGPUISD::GATHER4_CL">;
>> +def SIgather4_l : SDSampleRaw<"AMDGPUISD::GATHER4_L">;
>> +def SIgather4_b : SDSampleRaw<"AMDGPUISD::GATHER4_B">;
>> +def SIgather4_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL">;
>> +def SIgather4_lz : SDSampleRaw<"AMDGPUISD::GATHER4_LZ">;
>> +
>> +def SIgather4_c : SDSampleRaw<"AMDGPUISD::GATHER4_C">;
>> +def SIgather4_c_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL">;
>> +def SIgather4_c_l : SDSampleRaw<"AMDGPUISD::GATHER4_C_L">;
>> +def SIgather4_c_b : SDSampleRaw<"AMDGPUISD::GATHER4_C_B">;
>> +def SIgather4_c_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL">;
>> +def SIgather4_c_lz : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ">;
>> +
>> +def SIgather4_o : SDSampleRaw<"AMDGPUISD::GATHER4_O">;
>> +def SIgather4_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_CL_O">;
>> +def SIgather4_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_L_O">;
>> +def SIgather4_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_O">;
>> +def SIgather4_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL_O">;
>> +def SIgather4_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_LZ_O">;
>> +
>> +def SIgather4_c_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_O">;
>> +def SIgather4_c_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL_O">;
>> +def SIgather4_c_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_L_O">;
>> +def SIgather4_c_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_O">;
>> +def SIgather4_c_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL_O">;
>> +def SIgather4_c_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ_O">;
>> +
>>  // Transformation function, extract the lower 32bit of a 64bit immediate
>>  def LO32 : SDNodeXForm<imm, [{
>>    return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
>> @@ -658,6 +702,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
>>    defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
>>  }
>>
>> +class MIMG_Gather_Helper <bits<7> op, string asm,
>> +                          RegisterClass dst_rc,
>> +                          RegisterClass src_rc> : MIMG <
>> +  op,
>> +  (outs dst_rc:$vdata),
>> +  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
>> +       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
>> +       SReg_256:$srsrc, SReg_128:$ssamp),
>> +  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
>> +     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
>> +  []> {
>> +  let mayLoad = 1;
>> +  let mayStore = 0;
>> +
>> +  // DMASK was repurposed for GATHER4. 4 components are always
>> +  // returned and DMASK works like a swizzle - it selects
>> +  // the component to fetch. The only useful DMASK values are
>> +  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
>> +  // (red,red,red,red) etc.) The ISA document doesn't mention
>> +  // this.
>> +  // Therefore, disable all code which updates DMASK by setting these two:
>> +  let MIMG = 0;
>> +  let hasPostISelHook = 0;
>> +}
>> +
>> +multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
>> +                                    RegisterClass dst_rc,
>> +                                    int channels> {
>> +  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
>> +            MIMG_Mask<asm#"_V1", channels>;
>> +  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
>> +            MIMG_Mask<asm#"_V2", channels>;
>> +  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
>> +            MIMG_Mask<asm#"_V4", channels>;
>> +  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
>> +            MIMG_Mask<asm#"_V8", channels>;
>> +  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
>> +            MIMG_Mask<asm#"_V16", channels>;
>> +}
>> +
>> +multiclass MIMG_Gather <bits<7> op, string asm> {
>> +  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
>> +  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
>> +  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
>> +  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
>> +}
>> +
>>  //===----------------------------------------------------------------------===//
>>  // Vector instruction mappings
>>  //===----------------------------------------------------------------------===//
>> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
>> index d4a7c5c..d65d88b 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -887,30 +887,30 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
>>  //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
>>  //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
>>  //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
>> -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
>> -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
>> -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
>> -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
>> -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
>> -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
>> -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
>> -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
>> -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
>> -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
>> -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
>> -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
>> -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
>> -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
>> -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
>> -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
>> -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
>> -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
>> -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
>> -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
>> -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
>> -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
>> -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
>> -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
>> +defm IMAGE_GATHER4          : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
>> +defm IMAGE_GATHER4_CL       : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
>> +defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
>> +defm IMAGE_GATHER4_B        : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
>> +defm IMAGE_GATHER4_B_CL     : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
>> +defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
>> +defm IMAGE_GATHER4_C        : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
>> +defm IMAGE_GATHER4_C_CL     : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
>> +defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
>> +defm IMAGE_GATHER4_C_B      : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
>> +defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
>> +defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
>> +defm IMAGE_GATHER4_O        : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
>> +defm IMAGE_GATHER4_CL_O     : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
>> +defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
>> +defm IMAGE_GATHER4_B_O      : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
>> +defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
>> +defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
>> +defm IMAGE_GATHER4_C_O      : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
>> +defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
>> +defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
>> +defm IMAGE_GATHER4_C_B_O    : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
>> +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
>> +defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
>>  //def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
>>  //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
>>  //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
>> @@ -1655,6 +1655,54 @@ def : SextInReg <i16, 16>;
>>  /********** Image sampling patterns **********/
>>  /********** ======================= **********/
>>
>> +class SampleRawPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
>> +  (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
>> +        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
>> +  (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
>> +          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
>> +          $addr, $rsrc, $sampler)
>> +>;
>> +
>> +// Gather4 patterns. Only the variants which make sense are defined.
>> +def : SampleRawPattern<SIgather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
>> +def : SampleRawPattern<SIgather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
>> +def : SampleRawPattern<SIgather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
>> +def : SampleRawPattern<SIgather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
>> +def : SampleRawPattern<SIgather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
>> +def : SampleRawPattern<SIgather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
>> +def : SampleRawPattern<SIgather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
>> +def : SampleRawPattern<SIgather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
>> +def : SampleRawPattern<SIgather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
>> +
>> +def : SampleRawPattern<SIgather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
>> +def : SampleRawPattern<SIgather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
>> +def : SampleRawPattern<SIgather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
>> +def : SampleRawPattern<SIgather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
>> +def : SampleRawPattern<SIgather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
>> +def : SampleRawPattern<SIgather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
>> +
>> +def : SampleRawPattern<SIgather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
>> +def : SampleRawPattern<SIgather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
>> +def : SampleRawPattern<SIgather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
>> +def : SampleRawPattern<SIgather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
>> +def : SampleRawPattern<SIgather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
>> +def : SampleRawPattern<SIgather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
>> +def : SampleRawPattern<SIgather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
>> +def : SampleRawPattern<SIgather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
>> +
>> +def : SampleRawPattern<SIgather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
>> +def : SampleRawPattern<SIgather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
>> +def : SampleRawPattern<SIgather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
>> +def : SampleRawPattern<SIgather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
>> +def : SampleRawPattern<SIgather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
>> +def : SampleRawPattern<SIgather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
>> +
>>  /* SIsample for simple 1D texture lookup */
>>  def : Pat <
>>    (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
>> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
>> index 00e32c0..9d85f17 100644
>> --- a/lib/Target/R600/SIIntrinsics.td
>> +++ b/lib/Target/R600/SIIntrinsics.td
>> @@ -56,11 +56,59 @@ let TargetPrefix = "SI", isTarget = 1 in {
>>
>>    class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
>>
>> +  // Fully-flexible SAMPLE instruction.
>> +  class SampleRaw : Intrinsic <
>> +    [llvm_v4f32_ty],    // vdata(VGPR)
>> +    [llvm_anyvector_ty, // vaddr(VGPR)
>> +     llvm_v32i8_ty,     // rsrc(SGPR)
>> +     llvm_v16i8_ty,     // sampler(SGPR)
>> +     llvm_i32_ty,       // dmask(imm)
>> +     llvm_i32_ty,       // unorm(imm)
>> +     llvm_i32_ty,       // r128(imm)
>> +     llvm_i32_ty,       // da(imm)
>> +     llvm_i32_ty,       // glc(imm)
>> +     llvm_i32_ty,       // slc(imm)
>> +     llvm_i32_ty,       // tfe(imm)
>> +     llvm_i32_ty],      // lwe(imm)
>> +    [IntrNoMem]>;
>> +
>>    def int_SI_sample : Sample;
>>    def int_SI_sampleb : Sample;
>>    def int_SI_sampled : Sample;
>>    def int_SI_samplel : Sample;
>>
>> +  // Basic gather4
>> +  def int_SI_gather4 : SampleRaw;
>> +  def int_SI_gather4_cl : SampleRaw;
>> +  def int_SI_gather4_l : SampleRaw;
>> +  def int_SI_gather4_b : SampleRaw;
>> +  def int_SI_gather4_b_cl : SampleRaw;
>> +  def int_SI_gather4_lz : SampleRaw;
>> +
>> +  // Gather4 with comparison
>> +  def int_SI_gather4_c : SampleRaw;
>> +  def int_SI_gather4_c_cl : SampleRaw;
>> +  def int_SI_gather4_c_l : SampleRaw;
>> +  def int_SI_gather4_c_b : SampleRaw;
>> +  def int_SI_gather4_c_b_cl : SampleRaw;
>> +  def int_SI_gather4_c_lz : SampleRaw;
>> +
>> +  // Gather4 with offsets
>> +  def int_SI_gather4_o : SampleRaw;
>> +  def int_SI_gather4_cl_o : SampleRaw;
>> +  def int_SI_gather4_l_o : SampleRaw;
>> +  def int_SI_gather4_b_o : SampleRaw;
>> +  def int_SI_gather4_b_cl_o : SampleRaw;
>> +  def int_SI_gather4_lz_o : SampleRaw;
>> +
>> +  // Gather4 with comparison and offsets
>> +  def int_SI_gather4_c_o : SampleRaw;
>> +  def int_SI_gather4_c_cl_o : SampleRaw;
>> +  def int_SI_gather4_c_l_o : SampleRaw;
>> +  def int_SI_gather4_c_b_o : SampleRaw;
>> +  def int_SI_gather4_c_b_cl_o : SampleRaw;
>> +  def int_SI_gather4_c_lz_o : SampleRaw;
>> +
>>    def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
>>
>>    def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
>> --
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev




More information about the llvm-commits mailing list