[PATCH 1/2] R600: Emit native instructions for tex
Tom Stellard
tom at stellard.net
Wed Mar 27 20:55:29 PDT 2013
On Thu, Mar 28, 2013 at 12:40:18AM +0100, Vincent Lejeune wrote:
> ---
Just a few style issues, with those changes, this patch is:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 169 +++++++++------------
> lib/Target/R600/R600Instructions.td | 156 +++++++++++++++----
> 2 files changed, 196 insertions(+), 129 deletions(-)
>
> diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> index d207160..00ebb44 100644
> --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
> @@ -66,8 +66,6 @@ private:
> void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
> raw_ostream &OS) const;
> void EmitDst(const MCInst &MI, raw_ostream &OS) const;
> - void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
> - raw_ostream &OS) const;
> void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
>
> void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
> @@ -140,9 +138,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
>
> void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> SmallVectorImpl<MCFixup> &Fixups) const {
> - if (isTexOp(MI.getOpcode())) {
> - EmitTexInstr(MI, Fixups, OS);
> - } else if (isFCOp(MI.getOpcode())){
> + if (isFCOp(MI.getOpcode())){
> EmitFCInstr(MI, OS);
> } else if (MI.getOpcode() == AMDGPU::RETURN ||
> MI.getOpcode() == AMDGPU::BUNDLE ||
> @@ -175,6 +171,76 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
> Emit(InstWord2, OS);
> break;
> }
> + case AMDGPU::TEX_LD:
> + case AMDGPU::TEX_GET_TEXTURE_RESINFO:
> + case AMDGPU::TEX_SAMPLE:
> + case AMDGPU::TEX_SAMPLE_C:
> + case AMDGPU::TEX_SAMPLE_L:
> + case AMDGPU::TEX_SAMPLE_C_L:
> + case AMDGPU::TEX_SAMPLE_LB:
> + case AMDGPU::TEX_SAMPLE_C_LB:
> + case AMDGPU::TEX_SAMPLE_G:
> + case AMDGPU::TEX_SAMPLE_C_G:
> + case AMDGPU::TEX_GET_GRADIENTS_H:
> + case AMDGPU::TEX_GET_GRADIENTS_V:
> + case AMDGPU::TEX_SET_GRADIENTS_H:
> + case AMDGPU::TEX_SET_GRADIENTS_V: {
> + unsigned Opcode = MI.getOpcode();
> + bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
Might as well fix the coding style errors while you are moving the code
around: hasOffsets => HasOffsets
> + unsigned OpOffset = hasOffsets ? 3 : 0;
> + int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
> + int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
> +
> + uint32_t srcSelect[4] = {0, 1, 2, 3};
srcSelect => SrcSelect
> + uint32_t Offsets[3] = {0 , 0, 0};
Extra space before the first comma.
> + uint64_t coordType[4] = {1, 1, 1, 1};
> +
coordType => CoordType
> + if (hasOffsets)
> + for (unsigned i = 0; i < 3; i++)
> + Offsets[i] = MI.getOperand(i + 2).getImm();
> +
> + if (TextureType == TEXTURE_RECT
> + || TextureType == TEXTURE_SHADOWRECT) {
> + coordType[ELEMENT_X] = 0;
> + coordType[ELEMENT_Y] = 0;
> + }
> +
> + if (TextureType == TEXTURE_1D_ARRAY
> + || TextureType == TEXTURE_SHADOW1D_ARRAY) {
According to LLVM style, the || needs to go on the previous line.
> + if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
> + coordType[ELEMENT_Y] = 0;
> + } else {
> + coordType[ELEMENT_Z] = 0;
> + srcSelect[ELEMENT_Z] = ELEMENT_Y;
> + }
> + } else if (TextureType == TEXTURE_2D_ARRAY
> + || TextureType == TEXTURE_SHADOW2D_ARRAY) {
Same here, || on previous line
> + coordType[ELEMENT_Z] = 0;
> + }
> +
> +
> + if ((TextureType == TEXTURE_SHADOW1D
> + || TextureType == TEXTURE_SHADOW2D
> + || TextureType == TEXTURE_SHADOWRECT
> + || TextureType == TEXTURE_SHADOW1D_ARRAY)
> + && Opcode != AMDGPU::TEX_SAMPLE_C_L
> + && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
> + srcSelect[ELEMENT_W] = ELEMENT_Z;
Same here too, || and && go on the previous line.
> + }
> +
> + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
> + coordType[ELEMENT_X] << 60 | coordType[ELEMENT_Y] << 61 |
> + coordType[ELEMENT_Z] << 62 | coordType[ELEMENT_W] << 63;
> + uint32_t Word2 = Sampler << 15 | srcSelect[ELEMENT_X] << 20 |
> + srcSelect[ELEMENT_Y] << 23 | srcSelect[ELEMENT_Z] << 26 |
> + srcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
> + Offsets[2] << 10;
> +
> + EmitByte(INSTR_TEX, OS);
> + Emit(Word01, OS);
> + Emit(Word2, OS);
> + break;
> + }
> case AMDGPU::EG_ExportSwz:
> case AMDGPU::R600_ExportSwz:
> case AMDGPU::EG_ExportBuf:
> @@ -334,99 +400,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
> Emit(InlineConstant.i, OS);
> }
>
> -void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
> - SmallVectorImpl<MCFixup> &Fixups,
> - raw_ostream &OS) const {
> -
> - unsigned Opcode = MI.getOpcode();
> - bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
> - unsigned OpOffset = hasOffsets ? 3 : 0;
> - int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
> - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
> - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
> - unsigned srcSelect[4] = {0, 1, 2, 3};
> -
> - // Emit instruction type
> - EmitByte(1, OS);
> -
> - // Emit instruction
> - EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
> -
> - // Emit resource id
> - EmitByte(Resource, OS);
> -
> - // Emit source register
> - EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
> -
> - // XXX: Emit src isRelativeAddress
> - EmitByte(0, OS);
> -
> - // Emit destination register
> - EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
> -
> - // XXX: Emit dst isRealtiveAddress
> - EmitByte(0, OS);
> -
> - // XXX: Emit dst select
> - EmitByte(0, OS); // X
> - EmitByte(1, OS); // Y
> - EmitByte(2, OS); // Z
> - EmitByte(3, OS); // W
> -
> - // XXX: Emit lod bias
> - EmitByte(0, OS);
> -
> - // XXX: Emit coord types
> - unsigned coordType[4] = {1, 1, 1, 1};
> -
> - if (TextureType == TEXTURE_RECT
> - || TextureType == TEXTURE_SHADOWRECT) {
> - coordType[ELEMENT_X] = 0;
> - coordType[ELEMENT_Y] = 0;
> - }
> -
> - if (TextureType == TEXTURE_1D_ARRAY
> - || TextureType == TEXTURE_SHADOW1D_ARRAY) {
> - if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
> - coordType[ELEMENT_Y] = 0;
> - } else {
> - coordType[ELEMENT_Z] = 0;
> - srcSelect[ELEMENT_Z] = ELEMENT_Y;
> - }
> - } else if (TextureType == TEXTURE_2D_ARRAY
> - || TextureType == TEXTURE_SHADOW2D_ARRAY) {
> - coordType[ELEMENT_Z] = 0;
> - }
> -
> - for (unsigned i = 0; i < 4; i++) {
> - EmitByte(coordType[i], OS);
> - }
> -
> - // XXX: Emit offsets
> - if (hasOffsets)
> - for (unsigned i = 2; i < 5; i++)
> - EmitByte(MI.getOperand(i).getImm()<<1, OS);
> - else
> - EmitNullBytes(3, OS);
> -
> - // Emit sampler id
> - EmitByte(Sampler, OS);
> -
> - // XXX:Emit source select
> - if ((TextureType == TEXTURE_SHADOW1D
> - || TextureType == TEXTURE_SHADOW2D
> - || TextureType == TEXTURE_SHADOWRECT
> - || TextureType == TEXTURE_SHADOW1D_ARRAY)
> - && Opcode != AMDGPU::TEX_SAMPLE_C_L
> - && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
> - srcSelect[ELEMENT_W] = ELEMENT_Z;
> - }
> -
> - for (unsigned i = 0; i < 4; i++) {
> - EmitByte(srcSelect[i], OS);
> - }
> -}
> -
> void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
>
> // Emit instruction type
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index a6daadf..69e3d7e 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -234,6 +234,80 @@ class VTX_WORD1_GPR {
> let Word1{31} = SRF_MODE_ALL;
> }
>
> +class TEX_WORD0 {
> + field bits<32> Word0;
> +
> + bits<5> TEX_INST;
> + bits<2> INST_MOD;
> + bits<1> FETCH_WHOLE_QUAD;
> + bits<8> RESOURCE_ID;
> + bits<7> SRC_GPR;
> + bits<1> SRC_REL;
> + bits<1> ALT_CONST;
> + bits<2> RESOURCE_INDEX_MODE;
> + bits<2> SAMPLER_INDEX_MODE;
> +
> + let Word0{4-0} = TEX_INST;
> + let Word0{6-5} = INST_MOD;
> + let Word0{7} = FETCH_WHOLE_QUAD;
> + let Word0{15-8} = RESOURCE_ID;
> + let Word0{22-16} = SRC_GPR;
> + let Word0{23} = SRC_REL;
> + let Word0{24} = ALT_CONST;
> + let Word0{26-25} = RESOURCE_INDEX_MODE;
> + let Word0{28-27} = SAMPLER_INDEX_MODE;
> +}
> +
> +class TEX_WORD1 {
> + field bits<32> Word1;
> +
> + bits<7> DST_GPR;
> + bits<1> DST_REL;
> + bits<3> DST_SEL_X;
> + bits<3> DST_SEL_Y;
> + bits<3> DST_SEL_Z;
> + bits<3> DST_SEL_W;
> + bits<7> LOD_BIAS;
> + bits<1> COORD_TYPE_X;
> + bits<1> COORD_TYPE_Y;
> + bits<1> COORD_TYPE_Z;
> + bits<1> COORD_TYPE_W;
> +
> + let Word1{6-0} = DST_GPR;
> + let Word1{7} = DST_REL;
> + let Word1{11-9} = DST_SEL_X;
> + let Word1{14-12} = DST_SEL_Y;
> + let Word1{17-15} = DST_SEL_Z;
> + let Word1{20-18} = DST_SEL_W;
> + let Word1{27-21} = LOD_BIAS;
> + let Word1{28} = COORD_TYPE_X;
> + let Word1{29} = COORD_TYPE_Y;
> + let Word1{30} = COORD_TYPE_Z;
> + let Word1{31} = COORD_TYPE_W;
> +}
> +
> +class TEX_WORD2 {
> + field bits<32> Word2;
> +
> + bits<5> OFFSET_X;
> + bits<5> OFFSET_Y;
> + bits<5> OFFSET_Z;
> + bits<5> SAMPLER_ID;
> + bits<3> SRC_SEL_X;
> + bits<3> SRC_SEL_Y;
> + bits<3> SRC_SEL_Z;
> + bits<3> SRC_SEL_W;
> +
> + let Word2{4-0} = OFFSET_X;
> + let Word2{9-5} = OFFSET_Y;
> + let Word2{14-10} = OFFSET_Z;
> + let Word2{19-15} = SAMPLER_ID;
> + let Word2{22-20} = SRC_SEL_X;
> + let Word2{25-23} = SRC_SEL_Y;
> + let Word2{28-26} = SRC_SEL_Z;
> + let Word2{31-29} = SRC_SEL_W;
> +}
> +
> /*
> XXX: R600 subtarget uses a slightly different encoding than the other
> subtargets. We currently handle this in R600MCCodeEmitter, but we may
> @@ -386,12 +460,32 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
> class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
> InstrItinClass itin = AnyALU> :
> InstR600 <inst,
> - (outs R600_Reg128:$dst),
> - (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
> - !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
> + (outs R600_Reg128:$DST_GPR),
> + (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
> + !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
> pattern,
> - itin>{
> - let Inst {10-0} = inst;
> + itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
> + let Inst{31-0} = Word0;
> + let Inst{63-32} = Word1;
> +
> + let TEX_INST = inst{4-0};
> + let SRC_REL = 0;
> + let DST_REL = 0;
> + let DST_SEL_X = 0;
> + let DST_SEL_Y = 1;
> + let DST_SEL_Z = 2;
> + let DST_SEL_W = 3;
> + let LOD_BIAS = 0;
> +
> + let INST_MOD = 0;
> + let FETCH_WHOLE_QUAD = 0;
> + let ALT_CONST = 0;
> + let SAMPLER_INDEX_MODE = 0;
> +
> + let COORD_TYPE_X = 0;
> + let COORD_TYPE_Y = 0;
> + let COORD_TYPE_Z = 0;
> + let COORD_TYPE_W = 0;
> }
>
> } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
> @@ -866,75 +960,75 @@ def CNDGT_INT : R600_3OP <
> //===----------------------------------------------------------------------===//
>
> def TEX_LD : R600_TEX <
> - 0x03, "TEX_LD",
> - [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 3, "TEX_LD",
Unless you have a good reason for changing them, I think the opcodes
should be specified in hexidecimal notation. This is consistent with
the rest of the instruction definitions and also makes it easier to
use the .td files to interpret the binary code.
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR, imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
Again, as long as your making changes might as well wrap all these long
lines too.
> > {
> -let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
> -let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
> +let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z, $RESOURCE_ID, $SAMPLER_ID, $textureTarget";
> +let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget);
> }
>
> def TEX_GET_TEXTURE_RESINFO : R600_TEX <
> - 0x04, "TEX_GET_TEXTURE_RESINFO",
> - [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 4, "TEX_GET_TEXTURE_RESINFO",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> >;
>
> def TEX_GET_GRADIENTS_H : R600_TEX <
> - 0x07, "TEX_GET_GRADIENTS_H",
> - [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 7, "TEX_GET_GRADIENTS_H",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> >;
>
> def TEX_GET_GRADIENTS_V : R600_TEX <
> - 0x08, "TEX_GET_GRADIENTS_V",
> - [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 8, "TEX_GET_GRADIENTS_V",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> >;
>
> def TEX_SET_GRADIENTS_H : R600_TEX <
> - 0x0B, "TEX_SET_GRADIENTS_H",
> + 11, "TEX_SET_GRADIENTS_H",
> []
> >;
>
> def TEX_SET_GRADIENTS_V : R600_TEX <
> - 0x0C, "TEX_SET_GRADIENTS_V",
> + 12, "TEX_SET_GRADIENTS_V",
> []
> >;
>
> def TEX_SAMPLE : R600_TEX <
> - 0x10, "TEX_SAMPLE",
> - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 16, "TEX_SAMPLE",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> >;
>
> def TEX_SAMPLE_C : R600_TEX <
> - 0x18, "TEX_SAMPLE_C",
> - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
> + 24, "TEX_SAMPLE_C",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
> >;
>
> def TEX_SAMPLE_L : R600_TEX <
> - 0x11, "TEX_SAMPLE_L",
> - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 17, "TEX_SAMPLE_L",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> >;
>
> def TEX_SAMPLE_C_L : R600_TEX <
> - 0x19, "TEX_SAMPLE_C_L",
> - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
> + 25, "TEX_SAMPLE_C_L",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
> >;
>
> def TEX_SAMPLE_LB : R600_TEX <
> - 0x12, "TEX_SAMPLE_LB",
> - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
> + 18, "TEX_SAMPLE_LB",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
> >;
>
> def TEX_SAMPLE_C_LB : R600_TEX <
> - 0x1A, "TEX_SAMPLE_C_LB",
> - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
> + 26, "TEX_SAMPLE_C_LB",
> + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
> >;
>
> def TEX_SAMPLE_G : R600_TEX <
> - 0x14, "TEX_SAMPLE_G",
> + 20, "TEX_SAMPLE_G",
> []
> >;
>
> def TEX_SAMPLE_C_G : R600_TEX <
> - 0x1C, "TEX_SAMPLE_C_G",
> + 28, "TEX_SAMPLE_C_G",
> []
> >;
>
> --
> 1.8.1.4
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list