[llvm] r282234 - [AMDGPU] Refactor VOP1 and VOP2 instruction TD definitions
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 23 02:08:07 PDT 2016
Author: vpykhtin
Date: Fri Sep 23 04:08:07 2016
New Revision: 282234
URL: http://llvm.org/viewvc/llvm-project?rev=282234&view=rev
Log:
[AMDGPU] Refactor VOP1 and VOP2 instruction TD definitions
Differential revision: https://reviews.llvm.org/D24738
Added:
llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
Modified:
llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/VIInstructions.td
llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td
llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
Modified: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CIInstructions.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td Fri Sep 23 04:08:07 2016
@@ -12,36 +12,4 @@
// S_CBRANCH_CDBGUSER
// S_CBRANCH_CDBGSYS
// S_CBRANCH_CDBGSYS_OR_USER
-// S_CBRANCH_CDBGSYS_AND_USER
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let SubtargetPredicate = isCIVI in {
-
-let SchedRW = [WriteDoubleAdd] in {
-defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
- VOP_F64_F64, ftrunc
->;
-defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
- VOP_F64_F64, fceil
->;
-defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
- VOP_F64_F64, ffloor
->;
-defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
- VOP_F64_F64, frint
->;
-} // End SchedRW = [WriteDoubleAdd]
-
-let SchedRW = [WriteQuarterRate32] in {
-defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
- VOP_F32_F32
->;
-defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
- VOP_F32_F32
->;
-} // End SchedRW = [WriteQuarterRate32]
-
-} // End SubtargetPredicate = isCIVI
+// S_CBRANCH_CDBGSYS_AND_USER
\ No newline at end of file
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Fri Sep 23 04:08:07 2016
@@ -145,161 +145,6 @@ class Enc64 {
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
-let Uses = [EXEC] in {
-
-class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VALU = 1;
-}
-
-class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <outs, ins, asm, pattern> {
-
- let VOP1 = 1;
- let Size = 4;
-}
-
-class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <outs, ins, asm, pattern> {
-
- let VOP2 = 1;
- let Size = 4;
-}
-
-class VOP3Common <dag outs, dag ins, string asm = "",
- list<dag> pattern = [], bit HasMods = 0,
- bit VOP3Only = 0> :
- VOPAnyCommon <outs, ins, asm, pattern> {
-
- // Using complex patterns gives VOP3 patterns a very high complexity rating,
- // but standalone patterns are almost always prefered, so we need to adjust the
- // priority lower. The goal is to use a high number to reduce complexity to
- // zero (or less than zero).
- let AddedComplexity = -1000;
-
- let VOP3 = 1;
- let VALU = 1;
-
- let AsmMatchConverter =
- !if(!eq(VOP3Only,1),
- "cvtVOP3",
- !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
-
- let AsmVariantName = AMDGPUAsmVariants.VOP3;
-
- let isCodeGenOnly = 0;
-
- int Size = 8;
-
- // Because SGPRs may be allowed if there are multiple operands, we
- // need a post-isel hook to insert copies in order to avoid
- // violating constant bus requirements.
- let hasPostISelHook = 1;
-}
-
-} // End Uses = [EXEC]
-
-//===----------------------------------------------------------------------===//
-// Vector ALU operations
-//===----------------------------------------------------------------------===//
-
-class VOP1e <bits<8> op> : Enc32 {
- bits<8> vdst;
- bits<9> src0;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = op;
- let Inst{24-17} = vdst;
- let Inst{31-25} = 0x3f; //encoding
-}
-
-class VOP2e <bits<6> op> : Enc32 {
- bits<8> vdst;
- bits<9> src0;
- bits<8> src1;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = src1;
- let Inst{24-17} = vdst;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; //encoding
-}
-
-class VOP2_MADKe <bits<6> op> : Enc64 {
-
- bits<8> vdst;
- bits<9> src0;
- bits<8> src1;
- bits<32> imm;
-
- let Inst{8-0} = src0;
- let Inst{16-9} = src1;
- let Inst{24-17} = vdst;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; // encoding
- let Inst{63-32} = imm;
-}
-
-class VOP3a <bits<9> op> : Enc64 {
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<1> clamp;
- bits<2> omod;
-
- let Inst{8} = src0_modifiers{1};
- let Inst{9} = src1_modifiers{1};
- let Inst{10} = src2_modifiers{1};
- let Inst{11} = clamp;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class VOP3e <bits<9> op> : VOP3a <op> {
- bits<8> vdst;
-
- let Inst{7-0} = vdst;
-}
-
-class VOP3be <bits<9> op> : Enc64 {
- bits<8> vdst;
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<7> sdst;
- bits<2> omod;
-
- let Inst{7-0} = vdst;
- let Inst{14-8} = sdst;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
class VINTRPe <bits<2> op> : Enc32 {
bits<8> vdst;
bits<8> vsrc;
@@ -369,17 +214,6 @@ class EXPe : Enc64 {
let Uses = [EXEC] in {
-class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP1Common <outs, ins, asm, pattern>,
- VOP1e<op> {
- let isCodeGenOnly = 0;
-}
-
-class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
- let isCodeGenOnly = 0;
-}
-
class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
let mayLoad = 1;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Sep 23 04:08:07 2016
@@ -14,38 +14,6 @@ def isCIOnly : Predicate<"Subtarget->get
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
-class vop {
- field bits<9> SI3;
- field bits<10> VI3;
-}
-
-class vop1 <bits<8> si, bits<8> vi = si> : vop {
- field bits<8> SI = si;
- field bits<8> VI = vi;
-
- field bits<9> SI3 = {1, 1, si{6-0}};
- field bits<10> VI3 = !add(0x140, vi);
-}
-
-class vop2 <bits<6> si, bits<6> vi = si> : vop {
- field bits<6> SI = si;
- field bits<6> VI = vi;
-
- field bits<9> SI3 = {1, 0, 0, si{5-0}};
- field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
-}
-
-// Specify a VOP2 opcode for SI and VOP3 opcode for VI
-// that doesn't have VOP2 encoding on VI
-class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
- let VI3 = vi;
-}
-
-class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
- let SI3 = si;
- let VI3 = vi;
-}
-
// Execpt for the NONE field, this must be kept in sync with the
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
def SIEncodingFamily {
@@ -639,18 +607,20 @@ class getVOP3SrcForVT<ValueType VT> {
// Returns 1 if the source arguments have modifiers, 0 if they do not.
// XXX - do f16 instructions?
-class hasModifiers<ValueType SrcVT> {
+class isFloatType<ValueType SrcVT> {
bit ret =
+ !if(!eq(SrcVT.Value, f16.Value), 1,
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
- 0));
+ 0)));
}
-class hasIntModifiers<ValueType SrcVT> {
+class isIntType<ValueType SrcVT> {
bit ret =
+ !if(!eq(SrcVT.Value, i16.Value), 1,
!if(!eq(SrcVT.Value, i32.Value), 1,
!if(!eq(SrcVT.Value, i64.Value), 1,
- 0));
+ 0)));
}
@@ -756,39 +726,21 @@ class getInsSDWA <RegisterClass Src0RC,
// VOP1 without input operands (V_NOP)
(ins),
!if(!eq(NumSrcArgs, 1),
- !if(HasFloatModifiers,
- // VOP1_SDWA with float modifiers
- (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel),
- // VOP1_SDWA with int modifiers
- (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel))
- /* NumSrcArgs == 2 */,
- !if(HasFloatModifiers,
- !if(!eq(DstVT.Size, 1),
- // VOPC_SDWA with float modifiers
- (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
- Src1Mod:$src1_fmodifiers, Src1RC:$src1,
- clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA or VOPC_SDWA with float modifiers
- (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
- Src1Mod:$src1_fmodifiers, Src1RC:$src1,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel)),
-
- !if(!eq(DstVT.Size, 1),
- // VOPC_SDWA with int modifiers
- (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
- Src1Mod:$src1_imodifiers, Src1RC:$src1,
- clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
- // VOP2_SDWA or VOPC_SDWA with int modifiers
- (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
- Src1Mod:$src1_imodifiers, Src1RC:$src1,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel))
- /* endif */)));
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel),
+ !if(!eq(NumSrcArgs, 2),
+ !if(!eq(DstVT.Size, 1),
+ // VOPC_SDWA with float modifiers
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
+ // VOP2_SDWA or VOPC_SDWA with float modifiers
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel)),
+ (ins)/* endif */)));
}
// Outs for DPP and SDWA
@@ -852,8 +804,8 @@ class getAsmSDWA <bit HasDst, int NumSrc
" vcc", // use vcc token as dst for VOPC instructioins
"$vdst"),
"");
- string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers");
- string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers");
+ string src0 = "$src0_modifiers";
+ string src1 = "$src1_modifiers";
string args = !if(!eq(NumSrcArgs, 0),
"",
!if(!eq(NumSrcArgs, 1),
@@ -892,6 +844,14 @@ class getHasExt <int NumSrcArgs, ValueTy
);
}
+class BitOr<bit a, bit b> {
+ bit ret = !if(a, 1, !if(b, 1, 0));
+}
+
+class BitAnd<bit a, bit b> {
+ bit ret = !if(a, !if(b, 1, 0), 0);
+}
+
class VOPProfile <list<ValueType> _ArgVT> {
field list<ValueType> ArgVT = _ArgVT;
@@ -918,19 +878,27 @@ class VOPProfile <list<ValueType> _ArgVT
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
field bit HasDst32 = HasDst;
+ field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
- field bit HasSrc0Mods = hasModifiers<Src0VT>.ret;
- field bit HasSrc1Mods = hasModifiers<Src1VT>.ret;
- field bit HasSrc2Mods = hasModifiers<Src2VT>.ret;
-
- field bit HasSrc0IntMods = hasIntModifiers<Src0VT>.ret;
- field bit HasSrc1IntMods = hasIntModifiers<Src1VT>.ret;
- field bit HasSrc2IntMods = hasIntModifiers<Src2VT>.ret;
- field bit HasModifiers = HasSrc0Mods;
+ // TODO: Modifiers logic is somewhat adhoc here, to be refined later
+ field bit HasModifiers = isFloatType<Src0VT>.ret;
+
+ field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
+ field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
+ field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
+
+ field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
+ field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
+ field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
+
+ field bit HasSrc0Mods = HasModifiers;
+ field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
+ field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
+
field bit HasOMod = HasModifiers;
field bit HasClamp = HasModifiers;
field bit HasSDWAClamp = HasSrc0;
@@ -997,115 +965,11 @@ def VOP_I32_F32_F32 : VOPProfile <[i32,
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-// Restrict src0 to be VGPR
-def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
- let Src0RC32 = VRegSrc_32;
- let Src0RC64 = VRegSrc_32;
-
- let HasExt = 0;
-}
-
-// Special case because there are no true output operands. Hack vdst
-// to be a src operand. The custom inserter must add a tied implicit
-// def and use of the super register since there seems to be no way to
-// add an implicit def of a virtual register in tablegen.
-def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
- let Src0RC32 = VOPDstOperand<VGPR_32>;
- let Src0RC64 = VOPDstOperand<VGPR_32>;
-
- let Outs = (outs);
- let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
- let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
-
- let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
- let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0,
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel);
-
- let Asm32 = getAsm32<1, 1>.ret;
- let Asm64 = getAsm64<1, 1, 0>.ret;
- let AsmDPP = getAsmDPP<1, 1, 0>.ret;
- let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
-
- let HasExt = 0;
- let HasDst = 0;
-}
-
-// Write out to vcc or arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
- let Asm32 = "$vdst, vcc, $src0, $src1";
- let Asm64 = "$vdst, $sdst, $src0, $src1";
- let Outs32 = (outs DstRC:$vdst);
- let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-}
-
-// Write out to vcc or arbitrary SGPR and read in from vcc or
-// arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
- // We use VCSrc_b32 to exclude literal constants, even though the
- // encoding normally allows them since the implicit VCC use means
- // using one would always violate the constant bus
- // restriction. SGPRs are still allowed because it should
- // technically be possible to use VCC again as src0.
- let Src0RC32 = VCSrc_b32;
- let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
- let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
- let Outs32 = (outs DstRC:$vdst);
- let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-
- // Suppress src2 implied by type since the 32-bit encoding uses an
- // implicit VCC use.
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
-}
-
-// Read in from vcc or arbitrary SGPR
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
- let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
- let Asm32 = "$vdst, $src0, $src1, vcc";
- let Asm64 = "$vdst, $src0, $src1, $src2";
- let Outs32 = (outs DstRC:$vdst);
- let Outs64 = (outs DstRC:$vdst);
-
- // Suppress src2 implied by type since the 32-bit encoding uses an
- // implicit VCC use.
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
-}
-
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
-def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
- field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
- field string Asm32 = "$vdst, $src0, $src1, $imm";
- field bit HasExt = 0;
-}
-def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
- field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
- field string Asm32 = "$vdst, $src0, $imm, $src1";
- field bit HasExt = 0;
-}
-def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
- let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
- let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
- FP32InputMods:$src1_modifiers, Src1RC32:$src1,
- VGPR_32:$src2, // stub argument
- dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
- let InsSDWA = (ins FP32InputMods:$src0_fmodifiers, Src0RC32:$src0,
- FP32InputMods:$src1_fmodifiers, Src1RC32:$src1,
- VGPR_32:$src2, // stub argument
- clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
- src0_sel:$src0_sel, src1_sel:$src1_sel);
- let Asm32 = getAsm32<1, 2, f32>.ret;
- let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
- let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
- let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
-}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
@@ -1113,10 +977,6 @@ def VOP_I32_F32_I32_I32 : VOPProfile <[i
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
-class VOP <string opName> {
- string OpName = opName;
-}
-
class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
@@ -1127,556 +987,6 @@ class AtomicNoRet <string noRetOp, bit i
bit IsRet = isRet;
}
-class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
- VOP1Common <outs, ins, "", pattern>,
- VOP <opName>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-
- field bits<8> vdst;
- field bits<9> src0;
-}
-
-class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
- VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
- let AssemblerPredicate = SIAssemblerPredicate;
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
- VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
- string asm = opName#p.Asm32> {
- def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
-
- def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
-
- def _vi : VOP1_Real_vi <opName, op, p.Outs, p.Ins32, asm>;
-
-}
-
-class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
- VOP1_DPPe <op.VI>,
- VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
- let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
- let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "DPP";
- let DisableDecoder = DisableVIDecoder;
- let src0_modifiers = !if(p.HasModifiers, ?, 0);
- let src1_modifiers = 0;
-}
-
-class SDWADisableFields <VOPProfile p> {
- bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
- bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
- bits<2> src0_fmodifiers = !if(!eq(p.NumSrcArgs, 0),
- 0,
- !if(p.HasModifiers, ?, 0));
- bits<1> src0_imodifiers = !if(!eq(p.NumSrcArgs, 0),
- 0,
- !if(p.HasModifiers, 0, ?));
- bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
- !if(!eq(p.NumSrcArgs, 1), 6,
- ?));
- bits<2> src1_fmodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
- !if(!eq(p.NumSrcArgs, 1), 0,
- !if(p.HasModifiers, ?, 0)));
- bits<1> src1_imodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
- !if(!eq(p.NumSrcArgs, 1), 0,
- !if(p.HasModifiers, 0, ?)));
- bits<3> dst_sel = !if(p.HasDst, ?, 6);
- bits<2> dst_unused = !if(p.HasDst, ?, 2);
- bits<1> clamp = !if(!eq(p.NumSrcArgs, 0), 0, ?);
-}
-
-class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
- VOP1_SDWAe <op.VI>,
- VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
- SDWADisableFields <p> {
- let AsmMatchConverter = "cvtSdwaVOP1";
- let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
- let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA";
- let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
- string asm = opName#p.Asm32> {
-
- def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
-
- def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
-}
-
-class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
- VOP2Common <outs, ins, "", pattern>,
- VOP <opName>,
- SIMCInstr<opName#"_e32", SIEncodingFamily.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-}
-
-class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
- VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
- VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
- string revOp> {
-
- def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
- Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
-}
-
-multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
- string revOp> {
-
- def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
- Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
-
- def _vi : VOP2_Real_vi <opName, op, p.Outs32, p.Ins32, p.Asm32>;
-
-}
-
-class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
- VOP2_DPPe <op.VI>,
- VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
- let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
- let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "DPP";
- let DisableDecoder = DisableVIDecoder;
- let src0_modifiers = !if(p.HasModifiers, ?, 0);
- let src1_modifiers = !if(p.HasModifiers, ?, 0);
-}
-
-class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
- VOP2_SDWAe <op.VI>,
- VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
- SDWADisableFields <p> {
- let AsmMatchConverter = "cvtSdwaVOP2";
- let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
- let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA";
- let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
-
- bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
- bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
- bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
- bits<2> omod = !if(HasModifiers, ?, 0);
- bits<1> clamp = !if(HasModifiers, ?, 0);
- bits<9> src1 = !if(HasSrc1, ?, 0);
- bits<9> src2 = !if(HasSrc2, ?, 0);
-}
-
-class VOP3DisableModFields <bit HasSrc0Mods,
- bit HasSrc1Mods = 0,
- bit HasSrc2Mods = 0,
- bit HasOutputMods = 0> {
- bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
- bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
- bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
- bits<2> omod = !if(HasOutputMods, ?, 0);
- bits<1> clamp = !if(HasOutputMods, ?, 0);
-}
-
-class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, "", pattern, HasMods, VOP3Only>,
- VOP <opName>,
- SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
- MnemonicAlias<opName#"_e64", opName> {
- let isPseudo = 1;
- let isCodeGenOnly = 1;
-
- field bit vdst;
- field bit src0;
-}
-
-class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
- VOP3e <op>,
- SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
- VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
- VOP3be <op>,
- SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
- VOP3be_vi <op>,
- SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3e_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
- VOP3e <op>,
- SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3e_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
- bit HasMods = 0, bit VOP3Only = 0> :
- VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
- VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
- VOP3DisableFields<0, 0, HasMods>;
-
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
- VOP3DisableFields<0, 0, HasMods>;
-}
-
-multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
- VOP3DisableFields<0, 0, HasMods>;
- // No VI instruction. This class is for SI only.
-}
-
-multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
- VOP3DisableFields<1, 0, HasMods>;
-
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
- VOP3DisableFields<1, 0, HasMods>;
-}
-
-multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
- VOP3DisableFields<1, 0, HasMods>;
-
- // No VI instruction. This class is for SI only.
-}
-
-// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
-// instead of an implicit VCC as in the VOP2b format.
-multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
-
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
- VOP3DisableFields<1, useSrc2Input, HasMods>;
-
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
- VOP3DisableFields<1, useSrc2Input, HasMods>;
-}
-
-// Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32.
-multiclass VOP3e_2_3_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
-
- def _si : VOP3e_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
- VOP3DisableFields<1, useSrc2Input, HasMods>;
-
- def _vi : VOP3e_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
- VOP3DisableFields<1, useSrc2Input, HasMods>;
-}
-
-
-// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
-multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
- string asm, list<dag> pattern = []> {
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : VOPAnyCommon <outs, ins, "", pattern>,
- SIMCInstr<opName, SIEncodingFamily.NONE>;
- }
-
- def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
- SIMCInstr <opName, SIEncodingFamily.SI> {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
- }
-
- def _vi : VOP3Common <outs, ins, asm, []>,
- VOP3e_vi <op.VI3>,
- VOP3DisableFields <1, 0, 0>,
- SIMCInstr <opName, SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
- }
-}
-
-multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
- list<dag> pat64> {
-
- defm _e32 : VOP1_m <op, opName, p, pat32>;
-
- defm _e64 : VOP3_1_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
- p.HasModifiers>;
-
- def _dpp : VOP1_DPP <op, opName, p>;
-
- def _sdwa : VOP1_SDWA <op, opName, p>;
-}
-
-multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag> : VOP1_Helper <
- op, opName, P, [],
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0))])
->;
-
-multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag> {
-
- defm _e32 : VOP1SI_m <op, opName, P, []>;
-
- defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]),
- opName, P.HasModifiers>;
-}
-
-multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
- list<dag> pat64, string revOp> {
-
- defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
-
- defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
- revOp, p.HasModifiers>;
-
- def _dpp : VOP2_DPP <op, opName, p>;
-
- def _sdwa : VOP2_SDWA <op, opName, p>;
-}
-
-multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> : VOP2_Helper <
- op, opName, P, [],
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp
->;
-
-multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> {
-
- defm _e32 : VOP2SI_m <op, opName, P, [], revOp>;
-
- defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- opName, revOp, P.HasModifiers>;
-}
-
-multiclass VOP2e_Helper <vop2 op, string opName, VOPProfile p,
- list<dag> pat32, list<dag> pat64,
- string revOp, bit useSGPRInput> {
-
- let SchedRW = [Write32Bit] in {
- let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
- defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
- }
-
- defm _e64 : VOP3e_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
- opName, revOp, p.HasModifiers, useSGPRInput>;
- }
-}
-
-multiclass VOP2eInst <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> : VOP2e_Helper <
- op, opName, P, [],
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, !eq(P.NumSrcArgs, 3)
->;
-
-multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
- list<dag> pat32, list<dag> pat64,
- string revOp, bit useSGPRInput> {
-
- let SchedRW = [Write32Bit, WriteSALU] in {
- let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
- defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
- }
-
- defm _e64 : VOP3b_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
- opName, revOp, p.HasModifiers, useSGPRInput>;
- }
-}
-
-multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName> : VOP2b_Helper <
- op, opName, P, [],
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, !eq(P.NumSrcArgs, 3)
->;
-
-// A VOP2 instruction that is VOP3-only on VI.
-multiclass VOP2_VI3_Helper <vop23 op, string opName, VOPProfile p,
- list<dag> pat32, list<dag> pat64, string revOp> {
-
- defm _e32 : VOP2SI_m <op, opName, p, pat32, revOp>;
-
- defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
- revOp, p.HasModifiers>;
-}
-
-multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName>
- : VOP2_VI3_Helper <
- op, opName, P, [],
- !if(P.HasModifiers,
- [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp
->;
-
-multiclass VOP2MADK <vop2 op, string opName, VOPProfile P, list<dag> pattern = []> {
-
- def "" : VOP2_Pseudo <P.Outs, P.Ins32, pattern, opName>;
-
-let isCodeGenOnly = 0 in {
- def _si : VOP2Common <P.Outs, P.Ins32,
- !strconcat(opName, P.Asm32), []>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.SI>,
- VOP2_MADKe <op.SI> {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
- let DisableDecoder = DisableSIDecoder;
- }
-
- def _vi : VOP2Common <P.Outs, P.Ins32,
- !strconcat(opName, P.Asm32), []>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.VI>,
- VOP2_MADKe <op.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
- let DisableDecoder = DisableVIDecoder;
- }
-} // End isCodeGenOnly = 0
-}
-
-class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
- (Inst i32:$src0_modifiers, P.Src0VT:$src0,
- i32:$src1_modifiers, P.Src1VT:$src1,
- i32:$src2_modifiers, P.Src2VT:$src2,
- i1:$clamp,
- i32:$omod)>;
-
//===----------------------------------------------------------------------===//
// Interpolation opcodes
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Sep 23 04:08:07 2016
@@ -36,236 +36,6 @@ let SubtargetPredicate = isGCN in {
defm EXP : EXP_m;
//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
-defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
-}
-
-let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
-} // End isMoveImm = 1
-
-let Uses = [EXEC] in {
-
-// FIXME: Specify SchedRW for READFIRSTLANE_B32
-
-def V_READFIRSTLANE_B32 : VOP1 <
- 0x00000002,
- (outs SReg_32:$vdst),
- (ins VGPR_32:$src0),
- "v_readfirstlane_b32 $vdst, $src0",
- [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]
-> {
- let isConvergent = 1;
-}
-
-}
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
- VOP_I32_F64, fp_to_sint
->;
-defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
- VOP_F64_I32, sint_to_fp
->;
-defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
- VOP_F32_I32, sint_to_fp
->;
-defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
- VOP_F32_I32, uint_to_fp
->;
-defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
- VOP_I32_F32, fp_to_uint
->;
-defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
- VOP_I32_F32, fp_to_sint
->;
-defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
- VOP_I32_F32, fp_to_f16
->;
-defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
- VOP_F32_I32, f16_to_fp
->;
-defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
- VOP_I32_F32, cvt_rpi_i32_f32>;
-defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
- VOP_I32_F32, cvt_flr_i32_f32>;
-defm V_CVT_OFF_F32_I4 : VOP1Inst <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
-defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
- VOP_F32_F64, fpround
->;
-defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
- VOP_F64_F32, fpextend
->;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte0
->;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte1
->;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte2
->;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
- VOP_F32_I32, AMDGPUcvt_f32_ubyte3
->;
-defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
- VOP_I32_F64, fp_to_uint
->;
-defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
- VOP_F64_I32, uint_to_fp
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
- VOP_F32_F32, AMDGPUfract
->;
-defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32",
- VOP_F32_F32, ftrunc
->;
-defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32",
- VOP_F32_F32, fceil
->;
-defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32",
- VOP_F32_F32, frint
->;
-defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32",
- VOP_F32_F32, ffloor
->;
-defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32",
- VOP_F32_F32, fexp2
->;
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32",
- VOP_F32_F32, flog2
->;
-defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32",
- VOP_F32_F32, AMDGPUrcp
->;
-defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32",
- VOP_F32_F32
->;
-defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
- VOP_F32_F32, AMDGPUrsq
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-
-defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64",
- VOP_F64_F64, AMDGPUrcp
->;
-defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
- VOP_F64_F64, AMDGPUrsq
->;
-
-} // End SchedRW = [WriteDouble];
-
-defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
- VOP_F32_F32, fsqrt
->;
-
-let SchedRW = [WriteDouble] in {
-
-defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
- VOP_F64_F64, fsqrt
->;
-
-} // End SchedRW = [WriteDouble]
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
- VOP_F32_F32, AMDGPUsin
->;
-defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
- VOP_F32_F32, AMDGPUcos
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
-defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
-defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
-defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
-defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
-defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
- VOP_I32_F64, int_amdgcn_frexp_exp
->;
-
-let SchedRW = [WriteDoubleAdd] in {
-defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
- VOP_F64_F64, int_amdgcn_frexp_mant
->;
-
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
- VOP_F64_F64, AMDGPUfract
->;
-} // End SchedRW = [WriteDoubleAdd]
-
-
-defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
- VOP_I32_F32, int_amdgcn_frexp_exp
->;
-defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
- VOP_F32_F32, int_amdgcn_frexp_mant
->;
-let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
-defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
-}
-
-let Uses = [M0, EXEC] in {
-// v_movreld_b32 is a special case because the destination output
- // register is really a source. It isn't actually read (but may be
- // written), and is only to provide the base register to start
- // indexing from. Tablegen seems to not let you define an implicit
- // virtual register output for the super register being written into,
- // so this must have an implicit def of the register added to it.
-defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_MOVRELD>;
-defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_VI32_NO_EXT>;
-defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
-
-} // End Uses = [M0, EXEC]
-
-// These instruction only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
-defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32",
- VOP_F32_F32, int_amdgcn_log_clamp>;
-defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
-defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32",
- VOP_F32_F32, AMDGPUrcp_legacy>;
-defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
- VOP_F32_F32, AMDGPUrsq_clamp
->;
-defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
- VOP_F32_F32, AMDGPUrsq_legacy
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-
-defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
-defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
- VOP_F64_F64, AMDGPUrsq_clamp
->;
-
-} // End SchedRW = [WriteDouble]
-
-} // End SubtargetPredicate = isSICI
-
-//===----------------------------------------------------------------------===//
// VINTRP Instructions
//===----------------------------------------------------------------------===//
@@ -317,198 +87,6 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
} // End Uses = [M0, EXEC]
//===----------------------------------------------------------------------===//
-// VOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-defm V_CNDMASK_B32 : VOP2eInst <vop2<0x0, 0x0>, "v_cndmask_b32",
- VOP2e_I32_I32_I32_I1
->;
-
-let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
- VOP_F32_F32_F32, fadd
->;
-
-defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
-defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
- VOP_F32_F32_F32, null_frag, "v_sub_f32"
->;
-} // End isCommutable = 1
-
-let isCommutable = 1 in {
-
-defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmul_legacy
->;
-
-defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
- VOP_F32_F32_F32, fmul
->;
-
-defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
- VOP_I32_I32_I32, AMDGPUmul_i24
->;
-
-defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
- VOP_I32_I32_I32, AMDGPUmulhi_i24
->;
-
-defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
- VOP_I32_I32_I32, AMDGPUmul_u24
->;
-
-defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
- VOP_I32_I32_I32, AMDGPUmulhi_u24
->;
-
-defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
- fminnum>;
-defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
- fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
-defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
-defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
-defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
-
-defm V_LSHRREV_B32 : VOP2Inst <
- vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
- "v_lshr_b32"
->;
-
-defm V_ASHRREV_I32 : VOP2Inst <
- vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
- "v_ashr_i32"
->;
-
-defm V_LSHLREV_B32 : VOP2Inst <
- vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
- "v_lshl_b32"
->;
-
-defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
-defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
-defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
-
-let Constraints = "$vdst = $src2", DisableEncoding="$src2",
- isConvertibleToThreeAddress = 1 in {
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
-}
-} // End isCommutable = 1
-
-defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32", VOP_MADMK>;
-
-let isCommutable = 1 in {
-defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32", VOP_MADAK>;
-} // End isCommutable = 1
-
-let isCommutable = 1 in {
-// No patterns so that the scalar instructions are always selected.
-// The scalar versions will be replaced with vector when needed later.
-
-// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
-// but the VI instructions behave the same as the SI versions.
-defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
- VOP2b_I32_I1_I32_I32
->;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
-
-defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
- VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
->;
-
-defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP2b_I32_I1_I32_I32_I1
->;
-defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP2b_I32_I1_I32_I32_I1
->;
-defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
- VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
->;
-
-} // End isCommutable = 1
-
-// These are special and do not read the exec mask.
-let isConvergent = 1, Uses = []<Register> in {
-
-defm V_READLANE_B32 : VOP2SI_3VI_m <
- vop3 <0x001, 0x289>,
- "v_readlane_b32",
- (outs SReg_32:$vdst),
- (ins VGPR_32:$src0, SCSrc_b32:$src1),
- "v_readlane_b32 $vdst, $src0, $src1",
- [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]
->;
-
-defm V_WRITELANE_B32 : VOP2SI_3VI_m <
- vop3 <0x002, 0x28a>,
- "v_writelane_b32",
- (outs VGPR_32:$vdst),
- (ins SReg_32:$src0, SCSrc_b32:$src1),
- "v_writelane_b32 $vdst, $src0, $src1"
->;
-
-} // End isConvergent = 1
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32",
- VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
-defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmin_legacy
->;
-defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmax_legacy
->;
-
-let isCommutable = 1 in {
-defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
-} // End isCommutable = 1
-} // End let SubtargetPredicate = SICI
-
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
- VOP_I32_I32_I32
->;
-defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
- VOP_I32_I32_I32
->;
-defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
- VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
->;
-defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
- VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
->;
-defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
- VOP_F32_F32_I32, AMDGPUldexp
->;
-
-defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
- VOP_I32_F32_I32>; // TODO: set "Uses = dst"
-
-defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
- VOP_I32_F32_F32
->;
-defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
- VOP_I32_F32_F32
->;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
- VOP_I32_F32_F32, int_SI_packf16
->;
-defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
- VOP_I32_I32_I32
->;
-defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
- VOP_I32_I32_I32
->;
-
-//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td Fri Sep 23 04:08:07 2016
@@ -11,163 +11,6 @@
//
//===----------------------------------------------------------------------===//
-class VOP3a_vi <bits<10> op> : Enc64 {
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<1> clamp;
- bits<2> omod;
-
- let Inst{8} = src0_modifiers{1};
- let Inst{9} = src1_modifiers{1};
- let Inst{10} = src2_modifiers{1};
- let Inst{15} = clamp;
- let Inst{25-16} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class VOP3e_vi <bits<10> op> : VOP3a_vi <op> {
- bits<8> vdst;
-
- let Inst{7-0} = vdst;
-}
-
-class VOP3be_vi <bits<10> op> : Enc64 {
- bits<8> vdst;
- bits<2> src0_modifiers;
- bits<9> src0;
- bits<2> src1_modifiers;
- bits<9> src1;
- bits<2> src2_modifiers;
- bits<9> src2;
- bits<7> sdst;
- bits<2> omod;
- bits<1> clamp;
-
- let Inst{7-0} = vdst;
- let Inst{14-8} = sdst;
- let Inst{15} = clamp;
- let Inst{25-16} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
- let Inst{60-59} = omod;
- let Inst{61} = src0_modifiers{0};
- let Inst{62} = src1_modifiers{0};
- let Inst{63} = src2_modifiers{0};
-}
-
-class VOP_DPP <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
- VOPAnyCommon <outs, ins, asm, pattern> {
- let DPP = 1;
- let Size = 8;
-
- let AsmMatchConverter = !if(!eq(HasMods,1), "cvtDPP", "");
- let AsmVariantName = AMDGPUAsmVariants.DPP;
-}
-
-class VOP_DPPe : Enc64 {
- bits<2> src0_modifiers;
- bits<8> src0;
- bits<2> src1_modifiers;
- bits<9> dpp_ctrl;
- bits<1> bound_ctrl;
- bits<4> bank_mask;
- bits<4> row_mask;
-
- let Inst{39-32} = src0;
- let Inst{48-40} = dpp_ctrl;
- let Inst{51} = bound_ctrl;
- let Inst{52} = src0_modifiers{0}; // src0_neg
- let Inst{53} = src0_modifiers{1}; // src0_abs
- let Inst{54} = src1_modifiers{0}; // src1_neg
- let Inst{55} = src1_modifiers{1}; // src1_abs
- let Inst{59-56} = bank_mask;
- let Inst{63-60} = row_mask;
-}
-
-class VOP1_DPPe <bits<8> op> : VOP_DPPe {
- bits<8> vdst;
-
- let Inst{8-0} = 0xfa; // dpp
- let Inst{16-9} = op;
- let Inst{24-17} = vdst;
- let Inst{31-25} = 0x3f; //encoding
-}
-
-class VOP2_DPPe <bits<6> op> : VOP_DPPe {
- bits<8> vdst;
- bits<8> src1;
-
- let Inst{8-0} = 0xfa; //dpp
- let Inst{16-9} = src1;
- let Inst{24-17} = vdst;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; //encoding
-}
-
-class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
- VOPAnyCommon <outs, ins, asm, pattern> {
- let SDWA = 1;
- let Size = 8;
- let AsmVariantName = AMDGPUAsmVariants.SDWA;
-}
-
-class VOP_SDWAe : Enc64 {
- bits<8> src0;
- bits<3> src0_sel;
- bits<2> src0_fmodifiers; // {abs,neg}
- bits<1> src0_imodifiers; // sext
- bits<3> src1_sel;
- bits<2> src1_fmodifiers;
- bits<1> src1_imodifiers;
- bits<3> dst_sel;
- bits<2> dst_unused;
- bits<1> clamp;
-
- let Inst{39-32} = src0;
- let Inst{42-40} = dst_sel;
- let Inst{44-43} = dst_unused;
- let Inst{45} = clamp;
- let Inst{50-48} = src0_sel;
- let Inst{53-52} = src0_fmodifiers;
- let Inst{51} = src0_imodifiers;
- let Inst{58-56} = src1_sel;
- let Inst{61-60} = src1_fmodifiers;
- let Inst{59} = src1_imodifiers;
-}
-
-class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
- bits<8> vdst;
-
- let Inst{8-0} = 0xf9; // sdwa
- let Inst{16-9} = op;
- let Inst{24-17} = vdst;
- let Inst{31-25} = 0x3f; // encoding
-}
-
-class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
- bits<8> vdst;
- bits<8> src1;
-
- let Inst{8-0} = 0xf9; // sdwa
- let Inst{16-9} = src1;
- let Inst{24-17} = vdst;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; // encoding
-}
-
class EXPe_vi : EXPe {
let Inst{31-26} = 0x31; //encoding
}
Modified: llvm/trunk/lib/Target/AMDGPU/VIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VIInstructions.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VIInstructions.td Fri Sep 23 04:08:07 2016
@@ -8,109 +8,3 @@
//===----------------------------------------------------------------------===//
// Instruction definitions for VI and newer.
//===----------------------------------------------------------------------===//
-
-let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
-
-let DisableSIDecoder = 1 in {
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
-defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
-defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
-defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
-defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
-defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
-defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
-defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
-defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
-defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
- VOP_F16_F16
->;
-defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
- VOP_I16_F16
->;
-defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
-defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
-defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
-defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
-defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
-defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
-defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
-
-//===----------------------------------------------------------------------===//
-// VOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-let isCommutable = 1 in {
-
-defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
-defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
-defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
- null_frag, "v_sub_f16"
->;
-defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
-defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
-} // End isCommutable = 1
-defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16", VOP_MADMK>;
-let isCommutable = 1 in {
-defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16", VOP_MADAK>;
-defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
-defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
-defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
-defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
-} // End isCommutable = 1
-defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
-defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
-defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
-let isCommutable = 1 in {
-defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
-defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
-defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
-defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
-defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
-defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
-} // End isCommutable = 1
-defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
-
-} // let DisableSIDecoder = 1
-
-// Aliases to simplify matching of floating-point instructions that
-// are VOP2 on SI and VOP3 on VI.
-
-class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
- name#" $dst, $src0, $src1",
- (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
->, PredicateControl {
- let UseInstAsmMatchConverter = 0;
- let AsmVariantName = AMDGPUAsmVariants.VOP3;
-}
-
-def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
-
-} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-
-let Predicates = [isVI] in {
-
-//===----------------------------------------------------------------------===//
-// DPP Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
- (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
- imm:$bound_ctrl),
- (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
- (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
->;
-
-//===----------------------------------------------------------------------===//
-// Misc Patterns
-//===----------------------------------------------------------------------===//
-
-} // End Predicates = [isVI]
Added: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td?rev=282234&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td (added)
+++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td Fri Sep 23 04:08:07 2016
@@ -0,0 +1,539 @@
+//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP1 Classes
+//===----------------------------------------------------------------------===//
+
+class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
+ bits<8> vdst;
+ bits<9> src0;
+
+ let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0);
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f; //encoding
+}
+
+class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+ InstSI <P.Outs32, P.Ins32, "", pattern>,
+ VOP <opName>,
+ SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
+
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+ let UseNamedOperandTable = 1;
+
+ string Mnemonic = opName;
+ string AsmOperands = P.Asm32;
+
+ let Size = 4;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SubtargetPredicate = isGCN;
+
+ let VOP1 = 1;
+ let VALU = 1;
+ let Uses = [EXEC];
+
+ let AsmVariantName = AMDGPUAsmVariants.Default;
+
+ VOPProfile Pfl = P;
+}
+
+class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ // copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let AsmVariantName = ps.AsmVariantName;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+}
+
+class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
+ list<dag> ret = !if(P.HasModifiers,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
+}
+
+multiclass VOP1Inst <string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> {
+ def _e32 : VOP1_Pseudo <opName, P>;
+ def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+let VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
+}
+
+let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
+} // End isMoveImm = 1
+
+// FIXME: Specify SchedRW for READFIRSTLANE_B32
+// TODO: Make profile for this, there is VOP3 encoding also
+def V_READFIRSTLANE_B32 :
+ InstSI <(outs SReg_32:$vdst),
+ (ins VGPR_32:$src0),
+ "v_readfirstlane_b32 $vdst, $src0",
+ [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
+ Enc32 {
+
+ let isCodeGenOnly = 0;
+ let UseNamedOperandTable = 1;
+
+ let Size = 4;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SubtargetPredicate = isGCN;
+
+ let VOP1 = 1;
+ let VALU = 1;
+ let Uses = [EXEC];
+ let isConvergent = 1;
+
+ bits<8> vdst;
+ bits<9> src0;
+
+ let Inst{8-0} = src0;
+ let Inst{16-9} = 0x2;
+ let Inst{24-17} = vdst;
+ let Inst{31-25} = 0x3f; //encoding
+}
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
+defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
+defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
+defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
+defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
+defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
+defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
+defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
+defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
+defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
+defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
+defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
+defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
+defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
+defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
+} // End SchedRW = [WriteQuarterRate32]
+
+defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
+defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
+defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
+defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
+defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
+defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
+defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
+defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>;
+defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
+} // End SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
+defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
+} // End SchedRW = [WriteDouble];
+
+defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
+
+let SchedRW = [WriteDouble] in {
+defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
+} // End SchedRW = [WriteDouble]
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
+defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
+} // End SchedRW = [WriteQuarterRate32]
+
+defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>;
+defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
+defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
+
+let SchedRW = [WriteDoubleAdd] in {
+defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
+defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
+} // End SchedRW = [WriteDoubleAdd]
+
+defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
+defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>;
+
+let VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
+}
+
+// Restrict src0 to be VGPR
+def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
+ let Src0RC32 = VRegSrc_32;
+ let Src0RC64 = VRegSrc_32;
+
+ let HasExt = 0;
+}
+
+// Special case because there are no true output operands. Hack vdst
+// to be a src operand. The custom inserter must add a tied implicit
+// def and use of the super register since there seems to be no way to
+// add an implicit def of a virtual register in tablegen.
+def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
+ let Src0RC32 = VOPDstOperand<VGPR_32>;
+ let Src0RC64 = VOPDstOperand<VGPR_32>;
+
+ let Outs = (outs);
+ let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
+ let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
+
+ let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_modifiers, VCSrc_b32:$src0,
+ clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel);
+
+ let Asm32 = getAsm32<1, 1>.ret;
+ let Asm64 = getAsm64<1, 1, 0>.ret;
+ let AsmDPP = getAsmDPP<1, 1, 0>.ret;
+ let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
+
+ let HasExt = 0;
+ let HasDst = 0;
+ let EmitDst = 1; // force vdst emission
+}
+
+let Uses = [M0, EXEC] in {
+// v_movreld_b32 is a special case because the destination output
+ // register is really a source. It isn't actually read (but may be
+ // written), and is only to provide the base register to start
+ // indexing from. Tablegen seems to not let you define an implicit
+ // virtual register output for the super register being written into,
+ // so this must have an implicit def of the register added to it.
+defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
+defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
+defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
+} // End Uses = [M0, EXEC]
+
+// These instruction only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
+defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
+defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
+defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
+defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
+defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
+} // End SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
+defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
+} // End SchedRW = [WriteDouble]
+
+} // End SubtargetPredicate = isSICI
+
+
+let SubtargetPredicate = isCIVI in {
+
+let SchedRW = [WriteDoubleAdd] in {
+defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
+defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
+defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
+defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
+} // End SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
+defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
+} // End SchedRW = [WriteQuarterRate32]
+
+} // End SubtargetPredicate = isCIVI
+
+
+let SubtargetPredicate = isVI in {
+
+defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16>;
+defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16>;
+defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16>;
+defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16>;
+defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16>;
+defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16>;
+defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16>;
+defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16>;
+defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16>;
+defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16>;
+defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16>;
+defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16>;
+defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16>;
+defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16>;
+defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16>;
+defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16>;
+defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16>;
+defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Target
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+multiclass VOP1_Real_si <bits<9> op> {
+ let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+ def _e32_si :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+ def _e64_si :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+}
+
+defm V_NOP : VOP1_Real_si <0x0>;
+defm V_MOV_B32 : VOP1_Real_si <0x1>;
+defm V_CVT_I32_F64 : VOP1_Real_si <0x3>;
+defm V_CVT_F64_I32 : VOP1_Real_si <0x4>;
+defm V_CVT_F32_I32 : VOP1_Real_si <0x5>;
+defm V_CVT_F32_U32 : VOP1_Real_si <0x6>;
+defm V_CVT_U32_F32 : VOP1_Real_si <0x7>;
+defm V_CVT_I32_F32 : VOP1_Real_si <0x8>;
+defm V_MOV_FED_B32 : VOP1_Real_si <0x9>;
+defm V_CVT_F16_F32 : VOP1_Real_si <0xa>;
+defm V_CVT_F32_F16 : VOP1_Real_si <0xb>;
+defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>;
+defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>;
+defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>;
+defm V_CVT_F32_F64 : VOP1_Real_si <0xf>;
+defm V_CVT_F64_F32 : VOP1_Real_si <0x10>;
+defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>;
+defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>;
+defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>;
+defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>;
+defm V_CVT_U32_F64 : VOP1_Real_si <0x15>;
+defm V_CVT_F64_U32 : VOP1_Real_si <0x16>;
+defm V_FRACT_F32 : VOP1_Real_si <0x20>;
+defm V_TRUNC_F32 : VOP1_Real_si <0x21>;
+defm V_CEIL_F32 : VOP1_Real_si <0x22>;
+defm V_RNDNE_F32 : VOP1_Real_si <0x23>;
+defm V_FLOOR_F32 : VOP1_Real_si <0x24>;
+defm V_EXP_F32 : VOP1_Real_si <0x25>;
+defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>;
+defm V_LOG_F32 : VOP1_Real_si <0x27>;
+defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>;
+defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>;
+defm V_RCP_F32 : VOP1_Real_si <0x2a>;
+defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>;
+defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>;
+defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>;
+defm V_RSQ_F32 : VOP1_Real_si <0x2e>;
+defm V_RCP_F64 : VOP1_Real_si <0x2f>;
+defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>;
+defm V_RSQ_F64 : VOP1_Real_si <0x31>;
+defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>;
+defm V_SQRT_F32 : VOP1_Real_si <0x33>;
+defm V_SQRT_F64 : VOP1_Real_si <0x34>;
+defm V_SIN_F32 : VOP1_Real_si <0x35>;
+defm V_COS_F32 : VOP1_Real_si <0x36>;
+defm V_NOT_B32 : VOP1_Real_si <0x37>;
+defm V_BFREV_B32 : VOP1_Real_si <0x38>;
+defm V_FFBH_U32 : VOP1_Real_si <0x39>;
+defm V_FFBL_B32 : VOP1_Real_si <0x3a>;
+defm V_FFBH_I32 : VOP1_Real_si <0x3b>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
+defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>;
+defm V_FRACT_F64 : VOP1_Real_si <0x3e>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
+defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>;
+defm V_CLREXCP : VOP1_Real_si <0x41>;
+defm V_MOVRELD_B32 : VOP1_Real_si <0x42>;
+defm V_MOVRELS_B32 : VOP1_Real_si <0x43>;
+defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>;
+
+//===----------------------------------------------------------------------===//
+// CI
+//===----------------------------------------------------------------------===//
+
+multiclass VOP1_Real_ci <bits<9> op> {
+ let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
+ def _e32_ci :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+ def _e64_ci :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+}
+
+defm V_TRUNC_F64 : VOP1_Real_ci <0x17>;
+defm V_CEIL_F64 : VOP1_Real_ci <0x18>;
+defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>;
+defm V_RNDNE_F64 : VOP1_Real_ci <0x19>;
+defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>;
+defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class VOP1_SDWA <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_SDWA <ps.OpName, P> {
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+ let AsmMatchConverter = "cvtSdwaVOP1";
+
+ bits<8> vdst;
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f; // encoding
+}
+
+class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPP <ps.OpName, P> {
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+
+ bits<8> vdst;
+ let Inst{8-0} = 0xfa; // dpp
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f; //encoding
+}
+
+multiclass VOP1_Real_vi <bits<10> op> {
+ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ def _e32_vi :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+ def _e64_vi :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+
+ // for now left sdwa/dpp only for asm/dasm
+ // TODO: add corresponding pseudo
+ def _sdwa : VOP1_SDWA<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+}
+
+defm V_NOP : VOP1_Real_vi <0x0>;
+defm V_MOV_B32 : VOP1_Real_vi <0x1>;
+defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>;
+defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>;
+defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
+defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
+defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
+defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
+defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
+defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
+defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
+defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>;
+defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>;
+defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>;
+defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>;
+defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>;
+defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>;
+defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>;
+defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>;
+defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>;
+defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>;
+defm V_FRACT_F32 : VOP1_Real_vi <0x1b>;
+defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>;
+defm V_CEIL_F32 : VOP1_Real_vi <0x1d>;
+defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>;
+defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>;
+defm V_EXP_F32 : VOP1_Real_vi <0x20>;
+defm V_LOG_F32 : VOP1_Real_vi <0x21>;
+defm V_RCP_F32 : VOP1_Real_vi <0x22>;
+defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>;
+defm V_RSQ_F32 : VOP1_Real_vi <0x24>;
+defm V_RCP_F64 : VOP1_Real_vi <0x25>;
+defm V_RSQ_F64 : VOP1_Real_vi <0x26>;
+defm V_SQRT_F32 : VOP1_Real_vi <0x27>;
+defm V_SQRT_F64 : VOP1_Real_vi <0x28>;
+defm V_SIN_F32 : VOP1_Real_vi <0x29>;
+defm V_COS_F32 : VOP1_Real_vi <0x2a>;
+defm V_NOT_B32 : VOP1_Real_vi <0x2b>;
+defm V_BFREV_B32 : VOP1_Real_vi <0x2c>;
+defm V_FFBH_U32 : VOP1_Real_vi <0x2d>;
+defm V_FFBL_B32 : VOP1_Real_vi <0x2e>;
+defm V_FFBH_I32 : VOP1_Real_vi <0x2f>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>;
+defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>;
+defm V_FRACT_F64 : VOP1_Real_vi <0x32>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>;
+defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>;
+defm V_CLREXCP : VOP1_Real_vi <0x35>;
+defm V_MOVRELD_B32 : VOP1_Real_vi <0x36>;
+defm V_MOVRELS_B32 : VOP1_Real_vi <0x37>;
+defm V_MOVRELSD_B32 : VOP1_Real_vi <0x38>;
+defm V_TRUNC_F64 : VOP1_Real_vi <0x17>;
+defm V_CEIL_F64 : VOP1_Real_vi <0x18>;
+defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>;
+defm V_RNDNE_F64 : VOP1_Real_vi <0x19>;
+defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>;
+defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>;
+defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>;
+defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>;
+defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>;
+defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>;
+defm V_RCP_F16 : VOP1_Real_vi <0x3d>;
+defm V_SQRT_F16 : VOP1_Real_vi <0x3e>;
+defm V_RSQ_F16 : VOP1_Real_vi <0x3f>;
+defm V_LOG_F16 : VOP1_Real_vi <0x40>;
+defm V_EXP_F16 : VOP1_Real_vi <0x41>;
+defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>;
+defm V_FLOOR_F16 : VOP1_Real_vi <0x44>;
+defm V_CEIL_F16 : VOP1_Real_vi <0x45>;
+defm V_TRUNC_F16 : VOP1_Real_vi <0x46>;
+defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
+defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
+defm V_SIN_F16 : VOP1_Real_vi <0x49>;
+defm V_COS_F16 : VOP1_Real_vi <0x4a>;
+
+let Predicates = [isVI] in {
+
+def : Pat <
+ (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
+ imm:$bound_ctrl),
+ (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
+ (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
+>;
+
+} // End Predicates = [isVI]
Added: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=282234&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (added)
+++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Fri Sep 23 04:08:07 2016
@@ -0,0 +1,608 @@
+//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP2 Classes
+//===----------------------------------------------------------------------===//
+
+class VOP2e <bits<6> op, VOPProfile P> : Enc32 {
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> src1;
+
+ let Inst{8-0} = !if(P.HasSrc0, src0, 0);
+ let Inst{16-9} = !if(P.HasSrc1, src1, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+}
+
+class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> src1;
+ bits<32> imm;
+
+ let Inst{8-0} = !if(P.HasSrc0, src0, 0);
+ let Inst{16-9} = !if(P.HasSrc1, src1, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+ let Inst{63-32} = imm;
+}
+
+class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
+ InstSI <P.Outs32, P.Ins32, "", pattern>,
+ VOP <opName>,
+ SIMCInstr <opName#suffix, SIEncodingFamily.NONE>,
+ MnemonicAlias<opName#suffix, opName> {
+
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+ let UseNamedOperandTable = 1;
+
+ string Mnemonic = opName;
+ string AsmOperands = P.Asm32;
+
+ let Size = 4;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SubtargetPredicate = isGCN;
+
+ let VOP2 = 1;
+ let VALU = 1;
+ let Uses = [EXEC];
+
+ let AsmVariantName = AMDGPUAsmVariants.Default;
+
+ VOPProfile Pfl = P;
+}
+
+class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ // copy relevant pseudo op flags
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let AsmVariantName = ps.AsmVariantName;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
+}
+
+class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
+ list<dag> ret = !if(P.HasModifiers,
+ [(set P.DstVT:$vdst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
+}
+
+multiclass VOP2Inst <string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName> {
+
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+ def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+}
+
+multiclass VOP2bInst <string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
+
+ let SchedRW = [Write32Bit, WriteSALU] in {
+ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ }
+ def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+ }
+}
+
+multiclass VOP2eInst <string opName,
+ VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
+
+ let SchedRW = [Write32Bit] in {
+ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ }
+ def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+ }
+}
+
+def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
+ field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
+ field string Asm32 = "$vdst, $src0, $src1, $imm";
+ field bit HasExt = 0;
+}
+
+def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
+ field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
+ field string Asm32 = "$vdst, $src0, $imm, $src1";
+ field bit HasExt = 0;
+}
+
+def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
+ let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC32:$src1,
+ VGPR_32:$src2, // stub argument
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsSDWA = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC32:$src1,
+ VGPR_32:$src2, // stub argument
+ clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
+ let Asm32 = getAsm32<1, 2, f32>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
+ let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
+ let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
+ let HasSrc2 = 0;
+ let HasSrc2Mods = 0;
+}
+
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+ let Asm32 = "$vdst, vcc, $src0, $src1";
+ let Asm64 = "$vdst, $sdst, $src0, $src1";
+ let Outs32 = (outs DstRC:$vdst);
+ let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+}
+
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+ // We use VCSrc_b32 to exclude literal constants, even though the
+ // encoding normally allows them since the implicit VCC use means
+ // using one would always violate the constant bus
+ // restriction. SGPRs are still allowed because it should
+ // technically be possible to use VCC again as src0.
+ let Src0RC32 = VCSrc_b32;
+ let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
+ let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
+ let Outs32 = (outs DstRC:$vdst);
+ let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+
+ // Suppress src2 implied by type since the 32-bit encoding uses an
+ // implicit VCC use.
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+}
+
+// Read in from vcc or arbitrary SGPR
+def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+ let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
+ let Asm32 = "$vdst, $src0, $src1, vcc";
+ let Asm64 = "$vdst, $src0, $src1, $src2";
+ let Outs32 = (outs DstRC:$vdst);
+ let Outs64 = (outs DstRC:$vdst);
+
+ // Suppress src2 implied by type since the 32-bit encoding uses an
+ // implicit VCC use.
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+}
+
+def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
+ let Outs32 = (outs SReg_32:$vdst);
+ let Outs64 = Outs32;
+ let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
+ let Ins64 = Ins32;
+ let Asm32 = " $vdst, $src0, $src1";
+ let Asm64 = Asm32;
+}
+
+def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
+ let Outs32 = (outs VGPR_32:$vdst);
+ let Outs64 = Outs32;
+ let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1);
+ let Ins64 = Ins32;
+ let Asm32 = " $vdst, $src0, $src1";
+ let Asm64 = Asm32;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGCN in {
+
+defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
+def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK>;
+
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
+defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
+defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
+defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
+defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
+defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
+defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
+defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
+defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
+defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
+defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
+defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
+defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
+
+let Constraints = "$vdst = $src2", DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1 in {
+defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC>;
+}
+
+def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK>;
+
+// No patterns so that the scalar instructions are always selected.
+// The scalar versions will be replaced with vector when needed later.
+
+// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
+// but the VI instructions behave the same as the SI versions.
+defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>;
+defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">;
+defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
+defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
+defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
+} // End isCommutable = 1
+
+// These are special and do not read the exec mask.
+let isConvergent = 1, Uses = []<Register> in {
+def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
+ [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
+
+def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">;
+} // End isConvergent = 1
+
+defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
+defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
+defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
+
+} // End SubtargetPredicate = isGCN
+
+
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
+defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
+
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
+} // End isCommutable = 1
+
+} // End let SubtargetPredicate = SICI
+
+let SubtargetPredicate = isVI in {
+
+def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK>;
+defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
+defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
+defm V_ASHRREV_B16 : VOP2Inst <"v_ashrrev_b16", VOP_I16_I16_I16>;
+defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16>;
+
+let isCommutable = 1 in {
+defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16>;
+defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16>;
+defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
+defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16>;
+defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_F16_F16_F16>;
+def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK>;
+defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
+defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
+defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16>;
+defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
+defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16>;
+defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16>;
+defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
+defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
+defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
+defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>;
+} // End isCommutable = 1
+
+} // End SubtargetPredicate = isVI
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+
+multiclass VOP2_Real_si <bits<6> op> {
+ def _si :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_MADK_si <bits<6> op> {
+ def _si : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_e32_si <bits<6> op> {
+ def _e32_si :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+}
+
+multiclass VOP2_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
+ def _e64_si :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass VOP2be_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
+ def _e64_si :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3be_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
+
+defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>;
+defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>;
+defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>;
+defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>;
+defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>;
+defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>;
+defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>;
+defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>;
+defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>;
+defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>;
+defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>;
+defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>;
+defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>;
+defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>;
+defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>;
+defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>;
+defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>;
+defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>;
+defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>;
+defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>;
+defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>;
+defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>;
+defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>;
+defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>;
+defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>;
+defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>;
+defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>;
+defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>;
+defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>;
+defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>;
+defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
+
+defm V_READLANE_B32 : VOP2_Real_si <0x01>;
+defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
+
+defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>;
+defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>;
+defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>;
+defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>;
+defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>;
+defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>;
+
+defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>;
+defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>;
+defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>;
+defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>;
+defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>;
+defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>;
+defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>;
+defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
+
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class VOP2_SDWA <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_SDWA <ps.OpName, P> {
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+ let AsmMatchConverter = "cvtSdwaVOP2";
+
+ bits<8> vdst;
+ bits<8> src1;
+ let Inst{8-0} = 0xf9; // sdwa
+ let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+}
+
+class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPP <ps.OpName, P> {
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+
+ bits<8> vdst;
+ bits<8> src1;
+ let Inst{8-0} = 0xfa; //dpp
+ let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+}
+
+let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+
+multiclass VOP32_Real_vi <bits<10> op> {
+ def _vi :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_MADK_vi <bits<6> op> {
+ def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_e32_vi <bits<6> op> {
+ def _e32_vi :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+}
+
+multiclass VOP2_Real_e64_vi <bits<10> op> {
+ def _e64_vi :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
+ def _e64_vi :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
+ VOP2_Real_e32_vi<op>,
+ VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
+
+} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+
+multiclass VOP2_Real_e32e64_vi <bits<6> op> :
+ Base_VOP2_Real_e32e64_vi<op> {
+ // for now left sdwa/dpp only for asm/dasm
+ // TODO: add corresponding pseudo
+ def _sdwa : VOP2_SDWA<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+ def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+}
+
+defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>;
+defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>;
+defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>;
+defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>;
+defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>;
+defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>;
+defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>;
+defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>;
+defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>;
+defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>;
+defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>;
+defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>;
+defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>;
+defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>;
+defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>;
+defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>;
+defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>;
+defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>;
+defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>;
+defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>;
+defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>;
+defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>;
+defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>;
+defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>;
+defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>;
+defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>;
+defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>;
+defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>;
+defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>;
+defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>;
+defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>;
+
+defm V_READLANE_B32 : VOP32_Real_vi <0x289>;
+defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>;
+
+defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>;
+defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>;
+defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>;
+defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>;
+defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>;
+defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>;
+defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>;
+defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>;
+
+defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>;
+defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>;
+defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>;
+defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>;
+defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>;
+defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>;
+defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>;
+defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>;
+defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>;
+defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>;
+defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>;
+defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>;
+defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>;
+defm V_ASHRREV_B16 : VOP2_Real_e32e64_vi <0x2c>;
+defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>;
+defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>;
+defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>;
+defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>;
+defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>;
+defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>;
+defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>;
+
+let SubtargetPredicate = isVI in {
+
+// Aliases to simplify matching of floating-point instructions that
+// are VOP2 on SI and VOP3 on VI.
+class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
+ name#" $dst, $src0, $src1",
+ (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
+>, PredicateControl {
+ let UseInstAsmMatchConverter = 0;
+ let AsmVariantName = AMDGPUAsmVariants.VOP3;
+}
+
+def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
+
+} // End SubtargetPredicate = isVI
Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Fri Sep 23 04:08:07 2016
@@ -39,7 +39,7 @@ class getVOP3Pat<VOPProfile P, SDPattern
}
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
- VOP3_PseudoNew<OpName, P,
+ VOP3_Pseudo<OpName, P,
!if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret),
VOP3Only>;
@@ -118,7 +118,7 @@ let Uses = [VCC, EXEC] in {
// if (vcc)
// result *= 2^32
//
-def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
+def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
let SchedRW = [WriteFloatFMA];
}
@@ -127,7 +127,7 @@ def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_
// if (vcc)
// result *= 2^64
//
-def V_DIV_FMAS_F64 : VOP3_PseudoNew <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
+def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
let SchedRW = [WriteDouble];
}
@@ -165,12 +165,12 @@ def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_f
def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
} // End SchedRW = [WriteDoubleAdd]
-def V_DIV_SCALE_F32 : VOP3_PseudoNew <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
+def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
}
// Double precision division pre-scale.
-def V_DIV_SCALE_F64 : VOP3_PseudoNew <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
+def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU];
}
@@ -234,13 +234,13 @@ let isCommutable = 1 in {
let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
multiclass VOP3_Real_si<bits<9> op> {
- def _si : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.SI>,
- VOP3e_siNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
+ def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_si<bits<9> op> {
- def _si : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.SI>,
- VOP3be_siNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
+ def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
@@ -303,8 +303,8 @@ defm V_TRIG_PREOP_F64 : VOP3_Real_si <
//===----------------------------------------------------------------------===//
multiclass VOP3_Real_ci<bits<9> op> {
- def _ci : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.SI>,
- VOP3e_siNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl> {
+ def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicates = [isCIOnly];
let DecoderNamespace = "CI";
}
@@ -323,13 +323,13 @@ defm V_MAD_I64_I32 : VOP3_Real_ci <
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
multiclass VOP3_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.VI>,
- VOP3e_viNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
multiclass VOP3be_Real_vi<bits<10> op> {
- def _vi : VOP3_Real<!cast<VOP3_PseudoNew>(NAME), SIEncodingFamily.VI>,
- VOP3be_viNew <op, !cast<VOP3_PseudoNew>(NAME).Pfl>;
+ def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
Modified: llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td Fri Sep 23 04:08:07 2016
@@ -80,7 +80,7 @@ class VOPC_Real <VOPC_Pseudo ps, int Enc
}
// This class is used only with VOPC instructions. Use $sdst for out operand
-class VOPCInstAlias <VOP3_PseudoNew ps, Instruction inst, VOPProfile p = ps.Pfl> :
+class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
field bit isCompare;
@@ -128,7 +128,7 @@ multiclass VOPC_Pseudos <string opName,
let isCompare = 1;
let isCommutable = 1;
}
- def _e64 : VOP3_PseudoNew<opName, P,
+ def _e64 : VOP3_Pseudo<opName, P,
!if(P.HasModifiers,
[(set i1:$sdst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -398,10 +398,11 @@ class VOPC_Class_Profile<list<SchedReadW
VOPC_Profile<sched, vt, i32> {
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
let Asm64 = "$sdst, $src0_modifiers, $src1";
- let InsSDWA = (ins Src0Mod:$src0_fmodifiers, Src0RC64:$src0,
- Int32InputMods:$src1_imodifiers, Src1RC64:$src1,
+ let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
+ Int32InputMods:$src1_modifiers, Src1RC64:$src1,
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
- let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel";
+ let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
+ let HasSrc1Mods = 0;
let HasClamp = 0;
let HasOMod = 0;
}
@@ -422,7 +423,7 @@ multiclass VOPC_Class_Pseudos <string op
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
- def _e64 : VOP3_PseudoNew<opName, p, getVOPCClassPat64<p>.ret> {
+ def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = p.Schedule;
}
@@ -533,15 +534,15 @@ multiclass VOPC_Real_si <bits<9> op> {
VOPCe<op{7-0}>;
def _e64_si :
- VOP3_Real<!cast<VOP3_PseudoNew>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3a_siNew <op, !cast<VOP3_PseudoNew>(NAME#"_e64").Pfl> {
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3a_si <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
// Encoding used for VOPC instructions encoded as VOP3
// Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
bits<8> sdst;
let Inst{7-0} = sdst;
}
}
- def : VOPCInstAlias <!cast<VOP3_PseudoNew>(NAME#"_e64"),
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
!cast<Instruction>(NAME#"_e32_si")> {
let AssemblerPredicate = isSICI;
}
@@ -764,9 +765,15 @@ defm V_CMPX_CLASS_F64 : VOPC_Real_si <0x
// VI
//===----------------------------------------------------------------------===//
-class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAeNew<P> {
- bits<8> src1;
+class VOPC_SDWA<bits<8> op, VOPC_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_SDWA <ps.OpName, P> {
+ let Defs = ps.Defs;
+ let hasSideEffects = ps.hasSideEffects;
+ let AsmMatchConverter = "cvtSdwaVOPC";
+ let isCompare = ps.isCompare;
+ let isCommutable = ps.isCommutable;
+ bits<8> src1;
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
let Inst{24-17} = op;
@@ -777,21 +784,6 @@ class VOPC_SDWAe <bits<8> op, VOPProfile
let Inst{44-43} = SDWA_UNUSED_PRESERVE;
}
-class VOPC_SDWA<bits<8> op, VOPC_Pseudo ps, VOPProfile p = ps.Pfl> :
- VOP_SDWA <p.OutsSDWA, p.InsSDWA, ps.OpName#p.AsmSDWA, [], p.HasModifiers>,
- VOPC_SDWAe <op, p> {
- let Defs = ps.Defs;
- let hasSideEffects = ps.hasSideEffects;
- let AsmMatchConverter = "cvtSdwaVOPC";
- let SubtargetPredicate = isVI;
- let AssemblerPredicate = !if(p.HasExt, isVI, DisableInst);
- let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA,
- AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA";
- let isCompare = ps.isCompare;
- let isCommutable = ps.isCommutable;
-}
-
multiclass VOPC_Real_vi <bits<10> op> {
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
def _e32_vi :
@@ -799,8 +791,8 @@ multiclass VOPC_Real_vi <bits<10> op> {
VOPCe<op{7-0}>;
def _e64_vi :
- VOP3_Real<!cast<VOP3_PseudoNew>(NAME#"_e64"), SIEncodingFamily.VI>,
- VOP3a_viNew <op, !cast<VOP3_PseudoNew>(NAME#"_e64").Pfl> {
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3a_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
// Encoding used for VOPC instructions encoded as VOP3
// Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
bits<8> sdst;
@@ -812,7 +804,7 @@ multiclass VOPC_Real_vi <bits<10> op> {
// TODO: add corresponding pseudo
def _sdwa : VOPC_SDWA<op{7-0}, !cast<VOPC_Pseudo>(NAME#"_e32")>;
- def : VOPCInstAlias <!cast<VOP3_PseudoNew>(NAME#"_e64"),
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
!cast<Instruction>(NAME#"_e32_vi")> {
let AssemblerPredicate = isVI;
}
Modified: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td?rev=282234&r1=282233&r2=282234&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td Fri Sep 23 04:08:07 2016
@@ -7,7 +7,70 @@
//
//===----------------------------------------------------------------------===//
-class VOP3_PseudoNew <string opName, VOPProfile P, list<dag> pattern, bit VOP3Only = 0> :
+// dummies for outer let
+class LetDummies {
+ bit isCommutable;
+ bit isConvertibleToThreeAddress;
+ bit isMoveImm;
+ bit isReMaterializable;
+ bit isAsCheapAsAMove;
+ bit VOPAsmPrefer32Bit;
+ Predicate SubtargetPredicate;
+ string Constraints;
+ string DisableEncoding;
+ list<SchedReadWrite> SchedRW;
+ list<Register> Uses;
+ list<Register> Defs;
+}
+
+class VOP <string opName> {
+ string OpName = opName;
+}
+
+class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VALU = 1;
+}
+
+class VOP3Common <dag outs, dag ins, string asm = "",
+ list<dag> pattern = [], bit HasMods = 0,
+ bit VOP3Only = 0> :
+ VOPAnyCommon <outs, ins, asm, pattern> {
+
+ // Using complex patterns gives VOP3 patterns a very high complexity rating,
+ // but standalone patterns are almost always prefered, so we need to adjust the
+ // priority lower. The goal is to use a high number to reduce complexity to
+ // zero (or less than zero).
+ let AddedComplexity = -1000;
+
+ let VOP3 = 1;
+ let VALU = 1;
+ let Uses = [EXEC];
+
+ let AsmMatchConverter =
+ !if(!eq(VOP3Only,1),
+ "cvtVOP3",
+ !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
+
+ let AsmVariantName = AMDGPUAsmVariants.VOP3;
+
+ let isCodeGenOnly = 0;
+
+ int Size = 8;
+
+ // Because SGPRs may be allowed if there are multiple operands, we
+ // need a post-isel hook to insert copies in order to avoid
+ // violating constant bus requirements.
+ let hasPostISelHook = 1;
+}
+
+
+class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
InstSI <P.Outs64, P.Ins64, "", pattern>,
VOP <opName>,
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
@@ -50,7 +113,7 @@ class VOP3_PseudoNew <string opName, VOP
VOPProfile Pfl = P;
}
-class VOP3_Real <VOP3_PseudoNew ps, int EncodingFamily> :
+class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
@@ -66,7 +129,7 @@ class VOP3_Real <VOP3_PseudoNew ps, int
let TSFlags = ps.TSFlags;
}
-class VOP3aNew<VOPProfile P> : Enc64 {
+class VOP3a<VOPProfile P> : Enc64 {
bits<2> src0_modifiers;
bits<9> src0;
bits<2> src1_modifiers;
@@ -81,7 +144,7 @@ class VOP3aNew<VOPProfile P> : Enc64 {
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = src0;
+ let Inst{40-32} = !if(P.HasSrc0, src0, 0);
let Inst{49-41} = !if(P.HasSrc1, src1, 0);
let Inst{58-50} = !if(P.HasSrc2, src2, 0);
let Inst{60-59} = !if(P.HasOMod, omod, 0);
@@ -90,27 +153,27 @@ class VOP3aNew<VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
-class VOP3a_siNew <bits<9> op, VOPProfile P> : VOP3aNew<P> {
+class VOP3a_si <bits<9> op, VOPProfile P> : VOP3a<P> {
let Inst{25-17} = op;
- let Inst{11} = !if(P.HasClamp, clamp, 0);
+ let Inst{11} = !if(P.HasClamp, clamp{0}, 0);
}
-class VOP3a_viNew <bits<10> op, VOPProfile P> : VOP3aNew<P> {
+class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
let Inst{25-16} = op;
- let Inst{15} = !if(P.HasClamp, clamp, 0);
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
}
-class VOP3e_siNew <bits<9> op, VOPProfile P> : VOP3a_siNew <op, P> {
+class VOP3e_si <bits<9> op, VOPProfile P> : VOP3a_si <op, P> {
bits<8> vdst;
- let Inst{7-0} = vdst;
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
}
-class VOP3e_viNew <bits<10> op, VOPProfile P> : VOP3a_viNew <op, P> {
+class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
bits<8> vdst;
- let Inst{7-0} = vdst;
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
}
-class VOP3beNew <VOPProfile P> : Enc64 {
+class VOP3be <VOPProfile P> : Enc64 {
bits<8> vdst;
bits<2> src0_modifiers;
bits<9> src0;
@@ -133,24 +196,22 @@ class VOP3beNew <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
-class VOP3be_siNew <bits<9> op, VOPProfile P> : VOP3beNew<P> {
+class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
let Inst{25-17} = op;
}
-class VOP3be_viNew <bits<10> op, VOPProfile P> : VOP3beNew<P> {
+class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
bits<1> clamp;
let Inst{25-16} = op;
- let Inst{15} = !if(P.HasClamp, clamp, 0);
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
}
-class VOP_SDWAeNew<VOPProfile P> : Enc64 {
+class VOP_SDWAe<VOPProfile P> : Enc64 {
bits<8> src0;
bits<3> src0_sel;
- bits<2> src0_fmodifiers; // {abs,neg}
- bits<1> src0_imodifiers; // sext
+ bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
bits<3> src1_sel;
- bits<2> src1_fmodifiers;
- bits<1> src1_imodifiers;
+ bits<2> src1_modifiers;
bits<3> dst_sel;
bits<2> dst_unused;
bits<1> clamp;
@@ -159,16 +220,77 @@ class VOP_SDWAeNew<VOPProfile P> : Enc64
bits<2> SDWA_UNUSED_PRESERVE = 2;
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
- let Inst{42-40} = !if(P.HasDst, dst_sel{2-0}, SDWA_DWORD{2-0});
- let Inst{44-43} = !if(P.HasDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0});
+ let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0});
+ let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0});
let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0);
let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0});
- let Inst{53-52} = !if(P.HasSrc0Mods, src0_fmodifiers{1-0}, 0);
- let Inst{51} = !if(P.HasSrc0IntMods, src0_imodifiers{0}, 0);
+ let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
+ let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA_DWORD{2-0});
- let Inst{61-60} = !if(P.HasSrc1Mods, src1_fmodifiers{1-0}, 0);
- let Inst{59} = !if(P.HasSrc1IntMods, src1_imodifiers{0}, 0);
+ let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
+ let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
+}
+
+class VOP_SDWA <string OpName, VOPProfile P> :
+ InstSI <P.OutsSDWA, P.InsSDWA, OpName#P.AsmSDWA, []>,
+ VOP_SDWAe<P> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VALU = 1;
+ let SDWA = 1;
+ let Size = 8;
+
+ let SubtargetPredicate = isVI;
+ let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+ let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
+ AMDGPUAsmVariants.Disable);
+ let DecoderNamespace = "SDWA";
+}
+
+class VOP_DPPe<VOPProfile P> : Enc64 {
+ bits<2> src0_modifiers;
+ bits<8> src0;
+ bits<2> src1_modifiers;
+ bits<9> dpp_ctrl;
+ bits<1> bound_ctrl;
+ bits<4> bank_mask;
+ bits<4> row_mask;
+
+ let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{48-40} = dpp_ctrl;
+ let Inst{51} = bound_ctrl;
+ let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
+ let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
+ let Inst{54} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg
+ let Inst{55} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs
+ let Inst{59-56} = bank_mask;
+ let Inst{63-60} = row_mask;
+}
+
+class VOP_DPP <string OpName, VOPProfile P> :
+ InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
+ VOP_DPPe<P> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+
+ let VALU = 1;
+ let DPP = 1;
+ let Size = 8;
+
+ let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
+ let SubtargetPredicate = isVI;
+ let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+ let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
+ AMDGPUAsmVariants.Disable);
+ let DecoderNamespace = "DPP";
}
include "VOPCInstructions.td"
+include "VOP1Instructions.td"
+include "VOP2Instructions.td"
include "VOP3Instructions.td"
More information about the llvm-commits
mailing list