[Mesa-dev] [PATCH] R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback

Marek Olšák maraeo at gmail.com
Thu Sep 5 09:43:33 PDT 2013


No, we use 11 user data SGPRs for the vertex shader, but there are
also 6 additional SGPRs loaded by the hw based on the VGT state (4
streamout offsets, streamout_enable, and streamout_write_index). The 6
SGPRs can be enabled by setting SPI_SHADER_PGM_RSRC2_VS.SO_* = 1.

Marek

On Thu, Sep 5, 2013 at 5:44 PM, Tom Stellard <tom at stellard.net> wrote:
> On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote:
>> For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist.
>>
>> The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take
>> a resource descriptor might be nicer.
>>
>> The maximum number of input SGPRs is bumped to 17.
>>
>> Signed-off-by: Marek Olšák <marek.olsak at amd.com>
>> ---
>>  lib/Target/R600/AMDGPUCallingConv.td   |  3 ++-
>>  lib/Target/R600/AMDGPUISelLowering.cpp |  1 +
>>  lib/Target/R600/AMDGPUISelLowering.h   |  1 +
>>  lib/Target/R600/SIISelLowering.cpp     | 39 ++++++++++++++++++++++++++++++++++
>>  lib/Target/R600/SIInstrInfo.td         | 27 +++++++++++++++++++++++
>>  lib/Target/R600/SIInstructions.td      | 29 +++++++++++++++++++++----
>>  lib/Target/R600/SIIntrinsics.td        | 18 ++++++++++++++++
>>  7 files changed, 113 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
>> index 84d3118..d26be32 100644
>> --- a/lib/Target/R600/AMDGPUCallingConv.td
>> +++ b/lib/Target/R600/AMDGPUCallingConv.td
>> @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[
>>
>>    CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
>>      SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
>> -    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
>> +    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
>> +    SGPR16
>
> Why is this necessary?  Are we using all 16 user sgprs now?
>
>>    ]>>>,
>>
>>    CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
>> index 1237323..30d9503 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -718,5 +718,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
>>    NODE_NAME_CASE(SAMPLED)
>>    NODE_NAME_CASE(SAMPLEL)
>>    NODE_NAME_CASE(STORE_MSKOR)
>> +  NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
>>    }
>>  }
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
>> index 75ac4c2..8a68356 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.h
>> +++ b/lib/Target/R600/AMDGPUISelLowering.h
>> @@ -160,6 +160,7 @@ enum {
>>    FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
>>    STORE_MSKOR,
>>    LOAD_CONSTANT,
>> +  TBUFFER_STORE_FORMAT,
>>    LAST_AMDGPU_ISD_NUMBER
>>  };
>>
>> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
>> index f196059..6fa0c85 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>>    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
>>    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
>>
>> +  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
>> +
>>    setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
>>
>>    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
>> @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
>>                           Op.getOperand(3));
>>      }
>>    }
>> +
>> +  case ISD::INTRINSIC_VOID:
>> +    SDValue Chain = Op.getOperand(0);
>> +    unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
>> +
>> +    switch (IntrinsicID) {
>> +      case AMDGPUIntrinsic::SI_tbuffer_store: {
>> +        SDLoc DL(Op);
>> +        SDValue Ops [] = {
>> +          Chain,
>> +          ResourceDescriptorToi128(Op.getOperand(2), DAG),
>> +          Op.getOperand(3),
>> +          Op.getOperand(4),
>> +          Op.getOperand(5),
>> +          Op.getOperand(6),
>> +          Op.getOperand(7),
>> +          Op.getOperand(8),
>> +          Op.getOperand(9),
>> +          Op.getOperand(10),
>> +          Op.getOperand(11),
>> +          Op.getOperand(12),
>> +          Op.getOperand(13),
>> +          Op.getOperand(14)
>> +        };
>> +        EVT VT = Op.getOperand(3).getValueType();
>> +
>> +        MachineMemOperand *MMO = MF.getMachineMemOperand(
>> +            MachinePointerInfo(),
>> +            MachineMemOperand::MOStore,
>> +            VT.getSizeInBits() / 8, 4);
>> +        return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
>> +                                       Op->getVTList(), Ops,
>> +                                       sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
>> +      }
>> +      default:
>> +        break;
>> +    }
>>    }
>>    return SDValue();
>>  }
>> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
>> index ecc4718..c902feb 100644
>> --- a/lib/Target/R600/SIInstrInfo.td
>> +++ b/lib/Target/R600/SIInstrInfo.td
>> @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
>>                        [SDNPMayLoad, SDNPMemOperand]
>>  >;
>>
>> +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
>> +  SDTypeProfile<0, 13,
>> +    [SDTCisVT<0, i128>,   // rsrc(SGPR)
>> +     SDTCisVT<1, iAny>,   // vdata(VGPR)
>> +     SDTCisVT<2, i32>,    // num_channels(imm)
>> +     SDTCisVT<3, i32>,    // vaddr(VGPR)
>> +     SDTCisVT<4, i32>,    // soffset(SGPR)
>> +     SDTCisVT<5, i32>,    // inst_offset(imm)
>> +     SDTCisVT<6, i32>,    // dfmt(imm)
>> +     SDTCisVT<7, i32>,    // nfmt(imm)
>> +     SDTCisVT<8, i32>,    // offen(imm)
>> +     SDTCisVT<9, i32>,    // idxen(imm)
>> +     SDTCisVT<10, i32>,   // glc(imm)
>> +     SDTCisVT<11, i32>,   // slc(imm)
>> +     SDTCisVT<12, i32>    // tfe(imm)
>> +    ]>,
>> +  [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>> +>;
>> +
>>  def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
>>    SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
>>                         SDTCisVT<3, i32>]>
>> @@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf <
>>    }]>
>>  >;
>>
>> +def as_i1imm : SDNodeXForm<imm, [{
>> +  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
>> +}]>;
>> +
>> +def as_i8imm : SDNodeXForm<imm, [{
>> +  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
>> +}]>;
>> +
>>  def as_i16imm : SDNodeXForm<imm, [{
>>    return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
>>  }]>;
>> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
>> index 136f69c..cd59c80 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -475,10 +475,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
>>  //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
>>  //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
>>  def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
>> -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
>> -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
>> -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
>> -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
>> +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
>> +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
>> +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
>> +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
>>
>>  let mayLoad = 1 in {
>>
>> @@ -1873,6 +1873,27 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
>>  defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
>>  defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
>>
>> +//===----------------------------------------------------------------------===//
>> +// MTBUF Patterns
>> +//===----------------------------------------------------------------------===//
>> +
>> +// TBUFFER_STORE_FORMAT_*, addr64=0
>> +class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
>> +  (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
>> +                   i32:$soffset, imm:$inst_offset, imm:$dfmt,
>> +                   imm:$nfmt, imm:$offen, imm:$idxen,
>> +                   imm:$glc, imm:$slc, imm:$tfe),
>> +  (opcode
>> +    $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
>> +    (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
>> +    (as_i1imm $slc), (as_i1imm $tfe), $soffset)
>> +>;
>> +
>> +def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
>> +def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
>> +def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
>> +def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
>> +
>>  /********** ====================== **********/
>>  /**********   Indirect adressing   **********/
>>  /********** ====================== **********/
>> diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
>> index d6e26ad..7fcc964 100644
>> --- a/lib/Target/R600/SIIntrinsics.td
>> +++ b/lib/Target/R600/SIIntrinsics.td
>> @@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in {
>>    def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
>>    def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
>>
>> +  // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
>> +  def int_SI_tbuffer_store : Intrinsic <
>> +    [],
>> +    [llvm_anyint_ty, // rsrc(SGPR)
>> +     llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
>> +     llvm_i32_ty,    // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
>> +     llvm_i32_ty,    // vaddr(VGPR)
>> +     llvm_i32_ty,    // soffset(SGPR)
>> +     llvm_i32_ty,    // inst_offset(imm)
>> +     llvm_i32_ty,    // dfmt(imm)
>> +     llvm_i32_ty,    // nfmt(imm)
>> +     llvm_i32_ty,    // offen(imm)
>> +     llvm_i32_ty,    // idxen(imm)
>> +     llvm_i32_ty,    // glc(imm)
>> +     llvm_i32_ty,    // slc(imm)
>> +     llvm_i32_ty],   // tfe(imm)
>> +    []>;
>> +
>>    class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
>>
>>    def int_SI_sample : Sample;
>> --
>> 1.8.1.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev




More information about the llvm-commits mailing list