[llvm] r311006 - [AMDGPU][MC][GFX9] Added integer clamping support for VOP3 opcodes
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 16 06:51:56 PDT 2017
Author: dpreobra
Date: Wed Aug 16 06:51:56 2017
New Revision: 311006
URL: http://llvm.org/viewvc/llvm-project?rev=311006&view=rev
Log:
[AMDGPU][MC][GFX9] Added integer clamping support for VOP3 opcodes
See Bug 34152: https://bugs.llvm.org//show_bug.cgi?id=34152
Reviewers: SamWot, artem.tamazov, arsenm
Differential Revision: https://reviews.llvm.org/D36674
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s
llvm/trunk/test/MC/AMDGPU/vop3.s
llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt
llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Wed Aug 16 06:51:56 2017
@@ -274,6 +274,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
+def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
+ "HasIntClamp",
+ "true",
+ "Support clamp for integer destination"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -452,7 +458,8 @@ def FeatureVolcanicIslands : SubtargetFe
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
- FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP
+ FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
+ FeatureIntClamp
]
>;
@@ -462,7 +469,7 @@ def FeatureGFX9 : SubtargetFeatureGenera
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
- FeatureFastFMAF32, FeatureDPP,
+ FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts
@@ -704,6 +711,9 @@ def HasSDWA9 : Predicate<"Subtarget->has
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
+def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
+ AssemblerPredicate<"FeatureIntClamp">;
+
class PredicateControl {
Predicate SubtargetPredicate;
Predicate SIAssemblerPredicate = isSICI;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Wed Aug 16 06:51:56 2017
@@ -702,14 +702,16 @@ def cvt_flr_i32_f32 : PatFrag <
[{ (void)N; return TM.Options.NoNaNsFPMath; }]
>;
-class IMad24Pat<Instruction Inst> : Pat <
+class IMad24Pat<Instruction Inst, bit HasClamp = 0> : Pat <
(add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
- (Inst $src0, $src1, $src2)
+ !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
+ (Inst $src0, $src1, $src2))
>;
-class UMad24Pat<Instruction Inst> : Pat <
+class UMad24Pat<Instruction Inst, bit HasClamp = 0> : Pat <
(add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
- (Inst $src0, $src1, $src2)
+ !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
+ (Inst $src0, $src1, $src2))
>;
class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Aug 16 06:51:56 2017
@@ -135,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
+ HasIntClamp(false),
HasVOP3PInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Aug 16 06:51:56 2017
@@ -145,6 +145,7 @@ protected:
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool HasIntClamp;
bool HasVOP3PInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
@@ -241,6 +242,10 @@ public:
return Has16BitInsts;
}
+ bool hasIntClamp() const {
+ return HasIntClamp;
+ }
+
bool hasVOP3PInsts() const {
return HasVOP3PInsts;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Aug 16 06:51:56 2017
@@ -911,6 +911,10 @@ public:
return !isVI();
}
+ bool hasIntClamp() const {
+ return getFeatureBits()[AMDGPU::FeatureIntClamp];
+ }
+
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -1011,6 +1015,7 @@ private:
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
bool validateConstantBusLimitations(const MCInst &Inst);
bool validateEarlyClobberLimitations(const MCInst &Inst);
+ bool validateIntClampSupported(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -2199,6 +2204,20 @@ bool AMDGPUAsmParser::validateEarlyClobb
return true;
}
+bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
+
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
+ int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
+ assert(ClampIdx != -1);
+ return Inst.getOperand(ClampIdx).getImm() == 0;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc) {
if (!validateConstantBusLimitations(Inst)) {
@@ -2211,6 +2230,11 @@ bool AMDGPUAsmParser::validateInstructio
"destination must be different than all sources");
return false;
}
+ if (!validateIntClampSupported(Inst)) {
+ Error(IDLoc,
+ "integer clamping is not supported on this GPU");
+ return false;
+ }
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Wed Aug 16 06:51:56 2017
@@ -70,7 +70,8 @@ enum : uint64_t {
HasFPClamp = UINT64_C(1) << 42,
VOP3_OPSEL = UINT64_C(1) << 43,
maybeAtomic = UINT64_C(1) << 44,
- F16_ZFILL = UINT64_C(1) << 45
+ F16_ZFILL = UINT64_C(1) << 45,
+ IntClamp = UINT64_C(1) << 46
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Wed Aug 16 06:51:56 2017
@@ -94,6 +94,10 @@ class InstSI <dag outs, dag ins, string
// unused bits in dst. Note that new GFX9 opcodes preserve unused bits.
field bit F16_ZFILL = 0;
+ // This bit indicates that instruction may support integer clamping
+ // which depends on GPU features.
+ field bit IntClamp = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -142,6 +146,7 @@ class InstSI <dag outs, dag ins, string
let TSFlags{44} = maybeAtomic;
let TSFlags{45} = F16_ZFILL;
+ let TSFlags{46} = IntClamp;
let SchedRW = [Write32Bit];
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Aug 16 06:51:56 2017
@@ -1072,7 +1072,7 @@ class getIns32 <RegisterOperand Src0RC,
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasModifiers, bit HasOMod,
+ bit HasIntClamp, bit HasModifiers, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
@@ -1087,7 +1087,9 @@ class getIns64 <RegisterOperand Src0RC,
clampmod:$clamp, omod:$omod)
/* else */,
// VOP1 without modifiers
- (ins Src0RC:$src0)
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0RC:$src0, clampmod:$clamp),
+ (ins Src0RC:$src0))
/* endif */ ),
!if (!eq(NumSrcArgs, 2),
!if (!eq(HasModifiers, 1),
@@ -1101,7 +1103,10 @@ class getIns64 <RegisterOperand Src0RC,
clampmod:$clamp))
/* else */,
// VOP2 without modifiers
- (ins Src0RC:$src0, Src1RC:$src1)
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0RC:$src0, Src1RC:$src1, clampmod:$clamp),
+ (ins Src0RC:$src0, Src1RC:$src1))
+
/* endif */ )
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
@@ -1117,7 +1122,9 @@ class getIns64 <RegisterOperand Src0RC,
clampmod:$clamp))
/* else */,
// VOP3 without modifiers
- (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod:$clamp),
+ (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
/* endif */ ))));
}
@@ -1305,7 +1312,7 @@ class getAsm32 <bit HasDst, int NumSrcAr
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
-class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers,
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
bit HasOMod, ValueType DstVT = i32> {
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
@@ -1313,9 +1320,10 @@ class getAsm64 <bit HasDst, int NumSrcAr
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
" $src1_modifiers,"));
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+ string iclamp = !if(HasIntClamp, "$clamp", "");
string ret =
!if(!eq(HasModifiers, 0),
- getAsm32<HasDst, NumSrcArgs, DstVT>.ret,
+ getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp,
dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
}
@@ -1520,6 +1528,7 @@ class VOPProfile <list<ValueType> _ArgVT
field bit HasClamp = HasModifiers;
field bit HasSDWAClamp = EmitDst;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
+ field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
field bit HasHigh = 0;
field bit IsPacked = isPackedType<Src0VT>.ret;
@@ -1545,7 +1554,7 @@ class VOPProfile <list<ValueType> _ArgVT
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasModifiers, HasOMod, Src0Mod, Src1Mod,
+ HasIntClamp, HasModifiers, HasOMod, Src0Mod, Src1Mod,
Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp,
@@ -1564,7 +1573,7 @@ class VOPProfile <list<ValueType> _ArgVT
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
- field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
+ field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
HasClamp,
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Aug 16 06:51:56 2017
@@ -1033,8 +1033,8 @@ def : Pat <
// VOP3 Patterns
//===----------------------------------------------------------------------===//
-def : IMad24Pat<V_MAD_I32_I24>;
-def : UMad24Pat<V_MAD_U32_U24>;
+def : IMad24Pat<V_MAD_I32_I24, 1>;
+def : UMad24Pat<V_MAD_U32_U24, 1>;
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
@@ -1083,7 +1083,7 @@ def : Pat <
(add (sub_oneuse (umax i32:$src0, i32:$src1),
(umin i32:$src0, i32:$src1)),
i32:$src2),
- (V_SAD_U32 $src0, $src1, $src2)
+ (V_SAD_U32 $src0, $src1, $src2, (i1 0))
>;
def : Pat <
@@ -1091,7 +1091,7 @@ def : Pat <
(sub i32:$src0, i32:$src1),
(sub i32:$src1, i32:$src0)),
i32:$src2),
- (V_SAD_U32 $src0, $src1, $src2)
+ (V_SAD_U32 $src0, $src1, $src2, (i1 0))
>;
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td Wed Aug 16 06:51:56 2017
@@ -275,7 +275,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i
src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
- let Asm64 = getAsm64<1, 1, 0, 1>.ret;
+ let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1>.ret;
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Wed Aug 16 06:51:56 2017
@@ -208,7 +208,7 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+ 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins DstRCDPP:$old,
Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
@@ -222,7 +222,7 @@ class VOP_MAC <ValueType vt> : VOPProfil
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
- let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
+ let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
let AsmSDWA = getAsmSDWA<1, 2, vt>.ret;
let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
@@ -235,13 +235,13 @@ class VOP_MAC <ValueType vt> : VOPProfil
def VOP_MAC_F16 : VOP_MAC <f16> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret;
+ let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f16>.ret;
}
def VOP_MAC_F32 : VOP_MAC <f32> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret;
+ let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, f32>.ret;
}
// Write out to vcc or arbitrary SGPR.
Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Wed Aug 16 06:51:56 2017
@@ -102,10 +102,25 @@ class getVOP3Pat<VOPProfile P, SDPattern
ret1));
}
+class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))];
+ list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))];
+ list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))];
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
VOP3_Pseudo<OpName, P,
- !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret),
- VOP3Only>;
+ !if(P.HasModifiers,
+ getVOP3ModPat<P, node>.ret,
+ !if(P.HasIntClamp,
+ getVOP3ClampPat<P, node>.ret,
+ getVOP3Pat<P, node>.ret)),
+ VOP3Only> {
+ let IntClamp = P.HasIntClamp;
+}
class VOP3OpSelInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
VOP3_Pseudo<OpName, P,
@@ -143,6 +158,14 @@ class VOP3_Profile<VOPProfile P> : VOPPr
let Asm64 = " " # P.Asm64;
}
+class VOP3Clamp_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
+ let HasClamp = 1;
+
+ // FIXME: Hack to stop printing _e64
+ let Outs64 = (outs DstRC.RegClass:$vdst);
+ let Asm64 = " " # getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
+}
+
class VOP3OpSel_Profile<VOPProfile P> : VOP3_Profile<P> {
let HasClamp = 1;
let HasOpSel = 1;
@@ -167,11 +190,13 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Pro
}
def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
+ let HasClamp = 1;
+
// FIXME: Hack to stop printing _e64
let DstRC = RegisterOperand<VReg_64>;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
- let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
+ let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp";
}
//===----------------------------------------------------------------------===//
@@ -244,8 +269,8 @@ let isCommutable = 1 in {
def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
-def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUmad_i24>;
-def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUmad_u24>;
+def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
+def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
@@ -305,10 +330,10 @@ def V_MAX3_U32 : VOP3Inst <"v_max3_u32",
def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
-def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_u8>;
-def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_hi_u8>;
-def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_u16>;
-def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
+def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
+def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
+def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
@@ -330,10 +355,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_di
let AsmMatchConverter = "";
}
-def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>;
let Constraints = "@earlyclobber $vdst" in {
-def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3Clamp_Profile<VOP_I64_I64_I32_I64>>;
} // End Constraints = "@earlyclobber $vdst"
def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
@@ -358,8 +383,8 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev
let SubtargetPredicate = isCIVI in {
let Constraints = "@earlyclobber $vdst" in {
-def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
-def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3Clamp_Profile<VOP_I64_I64_I32_I64>>;
+def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3Clamp_Profile<VOP_V4I32_I64_I32_V4I32>>;
} // End Constraints = "@earlyclobber $vdst"
let isCommutable = 1 in {
@@ -383,15 +408,15 @@ let isCommutable = 1 in {
let F16_ZFILL = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
-def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
-def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
+def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>;
+def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>;
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
}
let SubtargetPredicate = isGFX9 in {
def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>;
-def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16>>;
-def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16>>;
+def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>;
+def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>;
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>;
} // End SubtargetPredicate = isGFX9
@@ -416,18 +441,18 @@ multiclass Ternary_i16_Pats <SDPatternOp
Instruction inst, SDPatternOperator op3> {
def : Pat<
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
- (inst i16:$src0, i16:$src1, i16:$src2)
+ (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
>;
def : Pat<
(i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
- (inst i16:$src0, i16:$src1, i16:$src2)
+ (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
>;
def : Pat<
(i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
(REG_SEQUENCE VReg_64,
- (inst i16:$src0, i16:$src1, i16:$src2), sub0,
+ (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0,
(V_MOV_B32_e32 (i32 0)), sub1)
>;
}
@@ -470,6 +495,45 @@ def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst
def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>;
} // End SubtargetPredicate = isGFX9
+//===----------------------------------------------------------------------===//
+// Integer Clamp Patterns
+//===----------------------------------------------------------------------===//
+
+class getClampPat<VOPProfile P, SDPatternOperator node> {
+ dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2));
+ dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1));
+ dag ret1 = (P.DstVT (node P.Src0VT:$src0));
+ dag ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getClampRes<VOPProfile P, Instruction inst> {
+ dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0));
+ dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0));
+ dag ret1 = (inst P.Src0VT:$src0, (i1 0));
+ dag ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class IntClampPat<VOP3Inst inst, SDPatternOperator node> : Pat<
+ getClampPat<inst.Pfl, node>.ret,
+ getClampRes<inst.Pfl, inst>.ret
+>;
+
+def : IntClampPat<V_MAD_I32_I24, AMDGPUmad_i24>;
+def : IntClampPat<V_MAD_U32_U24, AMDGPUmad_u24>;
+
+def : IntClampPat<V_SAD_U8, int_amdgcn_sad_u8>;
+def : IntClampPat<V_SAD_HI_U8, int_amdgcn_sad_hi_u8>;
+def : IntClampPat<V_SAD_U16, int_amdgcn_sad_u16>;
+
+def : IntClampPat<V_MSAD_U8, int_amdgcn_msad_u8>;
+def : IntClampPat<V_MQSAD_PK_U16_U8, int_amdgcn_mqsad_pk_u16_u8>;
+
+def : IntClampPat<V_QSAD_PK_U16_U8, int_amdgcn_qsad_pk_u16_u8>;
+def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
//===----------------------------------------------------------------------===//
// Target
Modified: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td Wed Aug 16 06:51:56 2017
@@ -112,7 +112,7 @@ class VOP3_Pseudo <string opName, VOPPro
let AsmMatchConverter =
!if(!and(P.IsPacked, isVOP3P),
"cvtVOP3P",
- !if(!or(P.HasModifiers, P.HasOMod),
+ !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)),
"cvtVOP3",
""));
Modified: llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s Wed Aug 16 06:51:56 2017
@@ -278,6 +278,9 @@ v_mad_i16 v5, v1, -1, v3
v_mad_i16 v5, v1, v2, -4.0
// GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
+v_mad_i16 v5, v1, v2, v3 clamp
+// GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04]
+
v_mad_legacy_f16_e64 v5, 0.5, v2, v3
// GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
@@ -305,6 +308,9 @@ v_mad_legacy_i16 v5, v1, -1, v3
v_mad_legacy_i16 v5, v1, v2, -4.0
// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
+v_mad_legacy_i16 v5, v1, v2, -4.0 clamp
+// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03]
+
v_mad_legacy_u16_e64 v5, 0, v2, v3
// GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
@@ -314,6 +320,9 @@ v_mad_legacy_u16 v5, v1, -1, v3
v_mad_legacy_u16 v5, v1, v2, -4.0
// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
+v_mad_legacy_u16 v5, v1, v2, -4.0 clamp
+// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03]
+
v_mad_u16_e64 v5, 0, v2, v3
// GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
@@ -322,3 +331,6 @@ v_mad_u16 v5, v1, -1, v3
v_mad_u16 v5, v1, v2, -4.0
// GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]
+
+v_mad_u16 v5, v1, v2, v3 clamp
+// GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04]
Modified: llvm/trunk/test/MC/AMDGPU/vop3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3.s?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop3.s Wed Aug 16 06:51:56 2017
@@ -518,6 +518,58 @@ v_mad_u16 v5, v1, 0, v3
v_mad_u16 v5, v1, v2, -4.0
// VI: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
+///===---------------------------------------------------------------------===//
+// VOP3 with Integer Clamp
+///===---------------------------------------------------------------------===//
+
+v_mad_i32_i24 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_mad_i32_i24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc2,0xd1,0x01,0x05,0x0e,0x04]
+
+v_mad_u32_u24 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_mad_u32_u24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc3,0xd1,0x01,0x05,0x0e,0x04]
+
+v_sad_u8 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_sad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xd9,0xd1,0x01,0x05,0x0e,0x04]
+
+v_sad_hi_u8 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_sad_hi_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xda,0xd1,0x01,0x05,0x0e,0x04]
+
+v_sad_u16 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_sad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdb,0xd1,0x01,0x05,0x0e,0x04]
+
+v_sad_u32 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_sad_u32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdc,0xd1,0x01,0x05,0x0e,0x04]
+
+v_msad_u8 v5, v1, v2, v3 clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_msad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xe4,0xd1,0x01,0x05,0x0e,0x04]
+
+v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp
+// NOSICI: error: integer clamping is not supported on this GPU
+// VI: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04]
+
+v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp
+// NOSICI: error:
+// VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04]
+
+v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp
+// NOSICI: error:
+// VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04]
+
+v_mad_u16 v5, v1, v2, v3 clamp
+// NOSICI: error:
+// VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04]
+
+v_mad_i16 v5, v1, v2, v3 clamp
+// NOSICI: error:
+// VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04]
+
//
// v_interp*
//
Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt Wed Aug 16 06:51:56 2017
@@ -12,122 +12,134 @@
# GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04
-# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_fma_legacy_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84]
+# GFX9: v_fma_legacy_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84]
0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84
-# CHECK: v_fma_legacy_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_fma_legacy_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04]
+# GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04]
0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04
-# CHECK: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04]
+# GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04]
0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04
-# CHECK: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03]
+# GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03]
0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03
-# CHECK: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4]
+# GFX9: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4]
0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4
-# CHECK: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04]
+# GFX9: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04
-# CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04]
+# GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04
-# CHECK: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04]
+# GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04
-# CHECK: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04]
+# GFX9: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04
-# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03]
+# GFX9: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03
-# CHECK: v_div_fixup_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4]
+# GFX9: v_div_fixup_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4]
0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4
-# CHECK: v_div_fixup_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_div_fixup_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04]
+# GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04]
0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04
-# CHECK: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04]
+# GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04]
0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04
-# CHECK: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03]
+# GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03]
0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03
-# CHECK: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4]
+# GFX9: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4]
0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4
-# CHECK: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04]
+# GFX9: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04
-# CHECK: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04]
+# GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04]
0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04
-# CHECK: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04]
+# GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04]
0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04
-# CHECK: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04]
+# GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04]
0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04
-# CHECK: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03]
0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03
-# CHECK: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
+# GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04]
+0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04
+
+# GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04]
0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04
-# CHECK: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04]
+# GFX9: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04]
0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04
-# CHECK: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03]
+# GFX9: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03]
0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03
-# CHECK: v_mad_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4]
+# GFX9: v_mad_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4]
0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4
-# CHECK: v_mad_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_mad_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04]
+# GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04]
0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04
-# CHECK: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04]
+# GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04]
0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04
-# CHECK: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04]
+# GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04
-# CHECK: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03]
0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03
-# CHECK: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
+# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03]
+0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03
+
+# GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04]
0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04
-# CHECK: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
+# GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04]
0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04
-# CHECK: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03]
0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03
-# CHECK: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
+# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03]
+0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03
+
+# GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04]
0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04
-# CHECK: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04]
+# GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04]
0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04
-# CHECK: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]
+# GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03]
0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03
+
+# GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04]
+0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04
Modified: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt?rev=311006&r1=311005&r2=311006&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt (original)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt Wed Aug 16 06:51:56 2017
@@ -446,3 +446,39 @@
# VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04]
0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04
+
+# VI: v_mad_i32_i24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc2,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xc2,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_mad_u32_u24 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xc3,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xc3,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_sad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xd9,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xd9,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_sad_hi_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xda,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xda,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_sad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdb,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xdb,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_sad_u32 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xdc,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xdc,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_msad_u8 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xe4,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xe4,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xe6,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, v[3:4] clamp ; encoding: [0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xe5,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_mqsad_u32_u8 v[252:255], v[1:2], v2, v[3:6] clamp ; encoding: [0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04]
+0xfc,0x80,0xe7,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xeb,0xd1,0x01,0x05,0x0e,0x04
+
+# VI: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04]
+0x05,0x80,0xec,0xd1,0x01,0x05,0x0e,0x04
More information about the llvm-commits
mailing list