[llvm] r296368 - AMDGPU: Add VOP3P instruction format
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 27 10:49:12 PST 2017
Author: arsenm
Date: Mon Feb 27 12:49:11 2017
New Revision: 296368
URL: http://llvm.org/viewvc/llvm-project?rev=296368&view=rev
Log:
AMDGPU: Add VOP3P instruction format
Add a few non-VOP3P but instructions related to packed.
Includes hack with dummy operands for the benefit of the assembler
Added:
llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/trunk/test/MC/AMDGPU/literalv216-err.s
llvm/trunk/test/MC/AMDGPU/literalv216.s
llvm/trunk/test/MC/AMDGPU/vop3p-err.s
llvm/trunk/test/MC/AMDGPU/vop3p.s
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Mon Feb 27 12:49:11 2017
@@ -190,6 +190,12 @@ def Feature16BitInsts : SubtargetFeature
"Has i16/f16 instructions"
>;
+def FeatureVOP3P : SubtargetFeature<"vop3p",
+ "HasVOP3PInsts",
+ "true",
+ "Has VOP3P packed instructions"
+>;
+
def FeatureMovrel : SubtargetFeature<"movrel",
"HasMovrel",
"true",
@@ -400,7 +406,7 @@ def FeatureGFX9 : SubtargetFeatureGenera
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
- FeatureApertureRegs, FeatureGFX9Insts
+ FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P
]
>;
@@ -575,7 +581,10 @@ def isCIVI : Predicate <
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
-def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
+def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
+ AssemblerPredicate<"Feature16BitInsts">;
+def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
+ AssemblerPredicate<"FeatureVOP3P">;
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA">;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Feb 27 12:49:11 2017
@@ -117,6 +117,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
+ HasVOP3PInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Feb 27 12:49:11 2017
@@ -136,6 +136,7 @@ protected:
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool HasVOP3PInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
@@ -216,6 +217,10 @@ public:
return Has16BitInsts;
}
+ bool hasVOP3PInsts() const {
+ return HasVOP3PInsts;
+ }
+
bool hasHWFP64() const {
return FP64;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Mon Feb 27 12:49:11 2017
@@ -157,7 +157,11 @@ public:
ImmTySendMsg,
ImmTyInterpSlot,
ImmTyInterpAttr,
- ImmTyAttrChan
+ ImmTyAttrChan,
+ ImmTyOpSel,
+ ImmTyOpSelHi,
+ ImmTyNegLo,
+ ImmTyNegHi
};
struct TokOp {
@@ -294,6 +298,10 @@ public:
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
+ bool isOpSel() const { return isImmTy(ImmTyOpSel); }
+ bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
+ bool isNegLo() const { return isImmTy(ImmTyNegLo); }
+ bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isMod() const {
return isClampSI() || isOModSI();
@@ -313,6 +321,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
}
+ bool isSCSrcV2B16() const {
+ return isSCSrcB16();
+ }
+
bool isSCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
}
@@ -325,6 +337,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
}
+ bool isSCSrcV2F16() const {
+ return isSCSrcF16();
+ }
+
bool isSCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
}
@@ -341,6 +357,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::i16);
}
+ bool isSSrcV2B16() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcB16();
+ }
+
bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
@@ -359,6 +380,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::f16);
}
+ bool isSSrcV2F16() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcF16();
+ }
+
bool isVCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -371,6 +397,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ bool isVCSrcV2B16() const {
+ return isVCSrcB16();
+ }
+
bool isVCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
@@ -383,6 +413,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
+ bool isVCSrcV2F16() const {
+ return isVCSrcF16();
+ }
+
bool isVSrcB32() const {
return isVCSrcF32() || isLiteralImm(MVT::i32);
}
@@ -395,6 +429,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::i16);
}
+ bool isVSrcV2B16() const {
+ llvm_unreachable("cannot happen");
+ return isVSrcB16();
+ }
+
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32);
}
@@ -407,6 +446,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
+ bool isVSrcV2F16() const {
+ llvm_unreachable("cannot happen");
+ return isVSrcF16();
+ }
+
bool isKImmFP32() const {
return isLiteralImm(MVT::f32);
}
@@ -607,6 +651,10 @@ public:
case ImmTyInterpSlot: OS << "InterpSlot"; break;
case ImmTyInterpAttr: OS << "InterpAttr"; break;
case ImmTyAttrChan: OS << "AttrChan"; break;
+ case ImmTyOpSel: OS << "OpSel"; break;
+ case ImmTyOpSelHi: OS << "OpSelHi"; break;
+ case ImmTyNegLo: OS << "NegLo"; break;
+ case ImmTyNegHi: OS << "NegHi"; break;
}
}
@@ -783,6 +831,8 @@ public:
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
+ typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
+
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
@@ -881,10 +931,18 @@ public:
//bool ProcessInstruction(MCInst &Inst);
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
+
OperandMatchResultTy
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t &) = nullptr);
+
+ OperandMatchResultTy parseOperandArrayWithPrefix(
+ const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ bool (*ConvertResult)(int64_t&) = nullptr);
+
OperandMatchResultTy
parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
@@ -951,7 +1009,12 @@ public:
void cvtId(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
+
+ void cvtVOP3Impl(MCInst &Inst,
+ const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
@@ -999,6 +1062,30 @@ static const fltSemantics *getFltSemanti
return getFltSemantics(VT.getSizeInBits() / 8);
}
+static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
+ switch (OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ return &APFloat::IEEEsingle();
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ return &APFloat::IEEEdouble();
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ return &APFloat::IEEEhalf();
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
@@ -1044,7 +1131,7 @@ bool AMDGPUOperand::isInlinableImm(MVT t
if (type.getScalarSizeInBits() == 16) {
return AMDGPU::isInlinableLiteral16(
- static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
+ static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
@@ -1136,13 +1223,15 @@ void AMDGPUOperand::addLiteralImmOperand
// Check that this operand accepts literals
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
- auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
+ APInt Literal(64, Val);
+ uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
if (Imm.IsFPImm) { // We got fp literal token
- APInt Literal(64, Val);
-
- switch (OpSize) {
- case 8:
+ switch (OpTy) {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
@@ -1166,17 +1255,32 @@ void AMDGPUOperand::addLiteralImmOperand
// unclear how we should encode them. This case should be checked earlier
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
-
- case 4:
- case 2: {
+ }
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
- FPLiteral.convert(*getFltSemantics(OpSize),
+ FPLiteral.convert(*getOpFltSemantics(OpTy),
APFloat::rmNearestTiesToEven, &lost);
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
- Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+
+ uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+ if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
+ OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ ImmVal |= (ImmVal << 16);
+ }
+
+ Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
@@ -1189,8 +1293,11 @@ void AMDGPUOperand::addLiteralImmOperand
// We got int literal token.
// Only sign extend inline immediates.
// FIXME: No errors on truncation
- switch (OpSize) {
- case 4:
+ switch (OpTy) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
if (isInt<32>(Val) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1200,18 +1307,23 @@ void AMDGPUOperand::addLiteralImmOperand
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
return;
-
- case 8:
- if (AMDGPU::isInlinableLiteral64(Val,
- AsmParser->hasInv2PiInlineImm())) {
+ }
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
+ if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
return;
-
- case 2:
+ }
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Val) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1221,7 +1333,18 @@ void AMDGPUOperand::addLiteralImmOperand
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
return;
-
+ }
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
+ assert(AMDGPU::isInlinableLiteral16(LiteralVal,
+ AsmParser->hasInv2PiInlineImm()));
+
+ uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
+ static_cast<uint32_t>(LiteralVal);
+ Inst.addOperand(MCOperand::createImm(ImmVal));
+ return;
+ }
default:
llvm_unreachable("invalid operand size");
}
@@ -2268,6 +2391,56 @@ AMDGPUAsmParser::parseIntWithPrefix(cons
return MatchOperand_Success;
}
+OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
+ const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy,
+ bool (*ConvertResult)(int64_t&)) {
+ StringRef Name = Parser.getTok().getString();
+ if (!Name.equals(Prefix))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+
+ unsigned Val = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ // FIXME: How to verify the number of elements matches the number of src
+ // operands?
+ for (int I = 0; I < 3; ++I) {
+ if (I != 0) {
+ if (getLexer().is(AsmToken::RBrac))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ }
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return MatchOperand_ParseFail;
+
+ int64_t Op;
+ if (getParser().parseAbsoluteExpression(Op))
+ return MatchOperand_ParseFail;
+
+ if (Op != 0 && Op != 1)
+ return MatchOperand_ParseFail;
+ Val |= (Op << I);
+ }
+
+ Parser.Lex();
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy) {
@@ -2300,12 +2473,11 @@ AMDGPUAsmParser::parseNamedBit(const cha
return MatchOperand_Success;
}
-typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
-
-static void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
- OptionalImmIndexMap& OptionalIdx,
- AMDGPUOperand::ImmTy ImmT,
- int64_t Default = 0) {
+static void addOptionalImmOperand(
+ MCInst& Inst, const OperandVector& Operands,
+ AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
+ AMDGPUOperand::ImmTy ImmT,
+ int64_t Default = 0) {
auto i = OptionalIdx.find(ImmT);
if (i != OptionalIdx.end()) {
unsigned Idx = i->second;
@@ -3214,6 +3386,10 @@ static const OptionalOperand AMDGPUOptio
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
+ {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
+ {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
+ {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
+ {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
@@ -3230,6 +3406,12 @@ OperandMatchResultTy AMDGPUAsmParser::pa
res = parseSDWASel(Operands, Op.Name, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
res = parseSDWADstUnused(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
+ Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
+ Op.Type == AMDGPUOperand::ImmTyNegLo ||
+ Op.Type == AMDGPUOperand::ImmTyNegHi) {
+ res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
+ Op.ConvertResult);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -3285,8 +3467,8 @@ static bool isRegOrImmWithInputMods(cons
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
}
-void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
+void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx) {
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
@@ -3303,6 +3485,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &In
llvm_unreachable("unhandled operand type");
}
}
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ cvtVOP3Impl(Inst, Operands, OptionalIdx);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
@@ -3327,6 +3515,74 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &In
}
}
+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptIdx;
+
+ cvtVOP3Impl(Inst, Operands, OptIdx);
+
+ // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
+ // instruction, and then figure out where to actually put the modifiers
+ int Opc = Inst.getOpcode();
+
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
+
+ int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
+ if (NegLoIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
+ }
+
+ const int Ops[] = { AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2 };
+ const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers };
+
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
+
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
+ unsigned NegLo = 0;
+ unsigned NegHi = 0;
+
+ if (NegLoIdx != -1) {
+ int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
+ NegLo = Inst.getOperand(NegLoIdx).getImm();
+ NegHi = Inst.getOperand(NegHiIdx).getImm();
+ }
+
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
+ if (OpIdx == -1)
+ break;
+
+ uint32_t ModVal = 0;
+
+ if ((OpSel & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_0;
+
+ if ((OpSelHi & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_1;
+
+ if ((NegLo & (1 << J)) != 0)
+ ModVal |= SISrcMods::NEG;
+
+ if ((NegHi & (1 << J)) != 0)
+ ModVal |= SISrcMods::NEG_HI;
+
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
+
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp Mon Feb 27 12:49:11 2017
@@ -97,6 +97,14 @@ static DecodeStatus decodeOperand_VSrc16
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
}
+static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
+}
+
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
@@ -264,6 +272,10 @@ MCOperand AMDGPUDisassembler::decodeOper
return decodeSrcOp(OPW16, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
+ return decodeSrcOp(OPWV216, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
// Some instructions have operand restrictions beyond what the encoding
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
@@ -424,6 +436,7 @@ MCOperand AMDGPUDisassembler::decodeFPIm
case OPW64:
return MCOperand::createImm(getInlineImmVal64(Imm));
case OPW16:
+ case OPWV216:
return MCOperand::createImm(getInlineImmVal16(Imm));
default:
llvm_unreachable("implement me");
@@ -437,6 +450,7 @@ unsigned AMDGPUDisassembler::getVgprClas
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return VGPR_32RegClassID;
case OPW64: return VReg_64RegClassID;
case OPW128: return VReg_128RegClassID;
@@ -450,6 +464,7 @@ unsigned AMDGPUDisassembler::getSgprClas
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return SGPR_32RegClassID;
case OPW64: return SGPR_64RegClassID;
case OPW128: return SGPR_128RegClassID;
@@ -463,6 +478,7 @@ unsigned AMDGPUDisassembler::getTtmpClas
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return TTMP_32RegClassID;
case OPW64: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
@@ -498,6 +514,7 @@ MCOperand AMDGPUDisassembler::decodeSrcO
switch (Width) {
case OPW32:
case OPW16:
+ case OPWV216:
return decodeSpecialReg32(Val);
case OPW64:
return decodeSpecialReg64(Val);
Modified: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h Mon Feb 27 12:49:11 2017
@@ -67,6 +67,7 @@ public:
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
MCOperand decodeOperand_VSrc16(unsigned Val) const;
+ MCOperand decodeOperand_VSrcV216(unsigned Val) const;
MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;
@@ -85,6 +86,7 @@ public:
OPW64,
OPW128,
OPW16,
+ OPWV216,
OPW_LAST_,
OPW_FIRST_ = OPW32
};
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Mon Feb 27 12:49:11 2017
@@ -375,6 +375,14 @@ void AMDGPUInstPrinter::printImmediate16
O << formatHex(static_cast<uint64_t>(Imm));
}
+void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+ printImmediate16(Lo16, STI, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -489,6 +497,10 @@ void AMDGPUInstPrinter::printOperand(con
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ printImmediateV216(Op.getImm(), STI, O);
+ break;
case MCOI::OPERAND_UNKNOWN:
case MCOI::OPERAND_PCREL:
O << formatDec(Op.getImm());
@@ -738,6 +750,71 @@ void AMDGPUInstPrinter::printExpTgt(cons
}
}
+static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
+ int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
+
+ for (int I = 0; I < NumOps; ++I) {
+ if (!!(Ops[I] & Mod) != DefaultValue)
+ return false;
+ }
+
+ return true;
+}
+
+static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
+ raw_ostream &O) {
+ unsigned Opc = MI->getOpcode();
+ int NumOps = 0;
+ int Ops[3];
+
+ for (int OpName : { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers }) {
+ int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
+ if (Idx == -1)
+ break;
+
+ Ops[NumOps++] = MI->getOperand(Idx).getImm();
+ }
+
+ if (allOpsDefaultValue(Ops, NumOps, Mod))
+ return;
+
+ O << Name;
+ for (int I = 0; I < NumOps; ++I) {
+ if (I != 0)
+ O << ',';
+
+ O << !!(Ops[I] & Mod);
+ }
+
+ O << ']';
+}
+
+void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
+}
+
+void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
+}
+
+void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
+}
+
+void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Mon Feb 27 12:49:11 2017
@@ -90,6 +90,8 @@ private:
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
@@ -117,6 +119,14 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printOpSel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printOpSelHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNegLo(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNegHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp Mon Feb 27 12:49:11 2017
@@ -220,15 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding
Imm = MO.getImm();
}
- switch (AMDGPU::getOperandSize(OpInfo)) {
- case 4:
+ switch (OpInfo.OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
- case 8:
+
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
- case 2:
+
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+ uint32_t Encoding = getLit16Encoding(Lo16, STI);
+ assert(Encoding != 255 && "packed constants can only be inline immediates");
+ return Encoding;
+ }
default:
llvm_unreachable("invalid operand size");
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Mon Feb 27 12:49:11 2017
@@ -36,6 +36,7 @@ enum : uint64_t {
// TODO: Should this be spilt into VOP3 a and b?
VOP3 = 1 << 10,
+ VOP3P = 1 << 12,
VINTRP = 1 << 13,
SDWA = 1 << 14,
@@ -102,12 +103,14 @@ namespace AMDGPU {
OPERAND_REG_INLINE_C_FP16,
OPERAND_REG_INLINE_C_FP32,
OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_V2FP16,
+ OPERAND_REG_INLINE_C_V2INT16,
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
@@ -125,9 +128,12 @@ namespace AMDGPU {
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
enum {
- NEG = 1 << 0, // Floating-point negate modifier
- ABS = 1 << 1, // Floating-point absolute modifier
- SEXT = 1 << 0 // Integer sign-extend modifier
+ NEG = 1 << 0, // Floating-point negate modifier
+ ABS = 1 << 1, // Floating-point absolute modifier
+ SEXT = 1 << 0, // Integer sign-extend modifier
+ NEG_HI = ABS, // Floating-point negate high packed component modifier.
+ OP_SEL_0 = 1 << 2,
+ OP_SEL_1 = 1 << 3
};
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Mon Feb 27 12:49:11 2017
@@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string
field bit VOP2 = 0;
field bit VOPC = 0;
field bit VOP3 = 0;
+ field bit VOP3P = 0;
field bit VINTRP = 0;
field bit SDWA = 0;
field bit DPP = 0;
@@ -96,6 +97,7 @@ class InstSI <dag outs, dag ins, string
let TSFlags{8} = VOP2;
let TSFlags{9} = VOPC;
let TSFlags{10} = VOP3;
+ let TSFlags{12} = VOP3P;
let TSFlags{13} = VINTRP;
let TSFlags{14} = SDWA;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Mon Feb 27 12:49:11 2017
@@ -440,6 +440,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP;
}
+ static bool isVOP3P(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
+ }
+
+ bool isVOP3P(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
+ }
+
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Feb 27 12:49:11 2017
@@ -458,6 +458,12 @@ class NamedOperandU32<string Name, AsmOp
let ParserMatchClass = MatchClass;
}
+class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i32, (ops (i32 0))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
@@ -495,6 +501,11 @@ def src0_sel : NamedOperandU32<"SDWASrc0
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
+def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
+def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
+def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
+def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
@@ -534,6 +545,7 @@ class FPInputModsMatchClass <int opSize>
let ParserMethod = "parseRegOrImmWithFPInputMods";
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
+
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -586,6 +598,33 @@ def IntVRegInputMods : InputMods <IntVRe
let PrintMethod = "printOperandAndIntInputMods";
}
+class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
+ let Name = "PackedFP"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImm";
+ let PredicateMethod = "isRegOrImm";
+// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
+}
+
+class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
+ let Name = "PackedInt"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImm";
+ let PredicateMethod = "isRegOrImm";
+// let PredicateMethod = "isPackedInt"#opSize#"InputMods";
+}
+
+def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
+def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
+
+class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
+// let PrintMethod = "printPackedFPInputMods";
+}
+
+class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
+ //let PrintMethod = "printPackedIntInputMods";
+}
+
+def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
+def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
//===----------------------------------------------------------------------===//
// Complex patterns
@@ -602,10 +641,13 @@ def VOP3Mods0Clamp : ComplexPattern<unty
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
-
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
+def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
+def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
+
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -729,12 +771,34 @@ class getVALUDstForVT<ValueType VT> {
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
- 0)));
- RegisterOperand ret = !if(isFP,
- !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
- !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
+ 0))));
+
+ RegisterOperand ret =
+ !if(isFP,
+ !if(!eq(VT.Size, 64),
+ VSrc_f64,
+ !if(!eq(VT.Value, f16.Value),
+ VSrc_f16,
+ !if(!eq(VT.Value, v2f16.Value),
+ VCSrc_v2f16,
+ VSrc_f32
+ )
+ )
+ ),
+ !if(!eq(VT.Size, 64),
+ VSrc_b64,
+ !if(!eq(VT.Value, i16.Value),
+ VSrc_b16,
+ !if(!eq(VT.Value, v2i16.Value),
+ VCSrc_v2b16,
+ VSrc_b32
+ )
+ )
+ )
+ );
}
// Returns the vreg register class to use for source operand given VT
@@ -748,25 +812,38 @@ class getVregSrcForVT<ValueType VT> {
// given VT.
class getVOP3SrcForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
- 0)));
+ 0))));
RegisterOperand ret =
!if(!eq(VT.Size, 128),
- VSrc_128,
- !if(!eq(VT.Size, 64),
+ VSrc_128,
+ !if(!eq(VT.Size, 64),
!if(isFP,
- VCSrc_f64,
- VCSrc_b64),
+ VCSrc_f64,
+ VCSrc_b64),
!if(!eq(VT.Value, i1.Value),
- SCSrc_b64,
- !if(isFP,
- !if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
- !if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
- )
- )
- )
- );
+ SCSrc_b64,
+ !if(isFP,
+ !if(!eq(VT.Value, f16.Value),
+ VCSrc_f16,
+ !if(!eq(VT.Value, v2f16.Value),
+ VCSrc_v2f16,
+ VCSrc_f32
+ )
+ ),
+ !if(!eq(VT.Value, i16.Value),
+ VCSrc_b16,
+ !if(!eq(VT.Value, v2i16.Value),
+ VCSrc_v2b16,
+ VCSrc_b32
+ )
+ )
+ )
+ )
+ )
+ );
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
@@ -776,7 +853,8 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f16.Value), 1,
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
- 0)));
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ 0))));
}
class isIntType<ValueType SrcVT> {
@@ -787,6 +865,23 @@ class isIntType<ValueType SrcVT> {
0)));
}
+class isPackedType<ValueType SrcVT> {
+ bit ret =
+ !if(!eq(SrcVT.Value, v2i16.Value), 1,
+ !if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
+ );
+}
+
+// Float or packed int
+class isModifierType<ValueType SrcVT> {
+ bit ret =
+ !if(!eq(SrcVT.Value, f16.Value), 1,
+ !if(!eq(SrcVT.Value, f32.Value), 1,
+ !if(!eq(SrcVT.Value, f64.Value), 1,
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ !if(!eq(SrcVT.Value, v2i16.Value), 1,
+ 0)))));
+}
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT> {
@@ -794,6 +889,7 @@ class getSrcMod <ValueType VT> {
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
+ bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
!if(isFP,
@@ -824,8 +920,8 @@ class getIns32 <RegisterOperand Src0RC,
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
- Operand Src2Mod> {
+ bit HasModifiers, bit HasOMod,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
!if (!eq(NumSrcArgs, 0),
@@ -844,9 +940,13 @@ class getIns64 <RegisterOperand Src0RC,
!if (!eq(NumSrcArgs, 2),
!if (!eq(HasModifiers, 1),
// VOP 2 with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, omod:$omod)
+ !if( !eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp, omod:$omod),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp))
/* else */,
// VOP2 without modifiers
(ins Src0RC:$src0, Src1RC:$src1)
@@ -854,16 +954,57 @@ class getIns64 <RegisterOperand Src0RC,
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
// VOP3 with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod)
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp, omod:$omod),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp))
/* else */,
// VOP3 without modifiers
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
/* endif */ ))));
}
+/// XXX - src1 may only allow VGPRs?
+
+// The modifiers (except clamp) are dummy operands for the benefit of
+// printing and parsing. They defer their values to looking at the
+// srcN_modifiers for what to print.
+class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs,
+ bit HasClamp,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !if (!eq(NumSrcArgs, 2),
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi)),
+ // else NumSrcArgs == 3
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi))
+ );
+}
+
class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
@@ -947,7 +1088,8 @@ class getAsm32 <bit HasDst, int NumSrcAr
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
-class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers,
+ bit HasOMod, ValueType DstVT = i32> {
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -957,7 +1099,26 @@ class getAsm64 <bit HasDst, int NumSrcAr
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<HasDst, NumSrcArgs, DstVT>.ret,
- dst#", "#src0#src1#src2#"$clamp"#"$omod");
+ dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3P
+// instruction.
+class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
+ bit HasClamp, ValueType DstVT = i32> {
+ string dst = " $vdst";
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1",
+ " $src1,"));
+ string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+ string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
+ string clamp = !if(HasClamp, "$clamp", "");
+
+ // Each modifier is printed as an array of bits for each operand, so
+ // all operands are printed as part of src0_modifiers.
+ string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -1069,7 +1230,7 @@ class VOPProfile <list<ValueType> _ArgVT
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
- field bit HasModifiers = isFloatType<Src0VT>.ret;
+ field bit HasModifiers = isModifierType<Src0VT>.ret;
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
@@ -1083,13 +1244,20 @@ class VOPProfile <list<ValueType> _ArgVT
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
- field bit HasOMod = HasModifiers;
field bit HasClamp = HasModifiers;
field bit HasSDWAClamp = HasSrc0;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
+ field bit IsPacked = isPackedType<Src0VT>.ret;
+ field bit HasOpSel = IsPacked;
+ field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
+
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
+ field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
+ field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
+
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
// VOP3b instructions are a special case with a second explicit
@@ -1101,7 +1269,12 @@ class VOPProfile <list<ValueType> _ArgVT
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasModifiers, HasOMod, Src0Mod, Src1Mod,
+ Src2Mod>.ret;
+ field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
+ NumSrcArgs, HasClamp,
+ Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
+
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
@@ -1109,7 +1282,8 @@ class VOPProfile <list<ValueType> _ArgVT
DstVT>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
- field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
+ field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
}
@@ -1130,6 +1304,13 @@ def VOP_I16_I16_I16 : VOPProfile <[i32,
def VOP_I16_I16_I16_I16 : VOPProfile <[i32, i32, i32, i32, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
+def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
+def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
+def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
+
+def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
+def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Feb 27 12:49:11 2017
@@ -616,6 +616,12 @@ def : BitConvert <i32, f32, VGPR_32>;
def : BitConvert <f32, i32, VGPR_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <v2i16, i32, SReg_32>;
+def : BitConvert <i32, v2i16, SReg_32>;
+def : BitConvert <v2f16, i32, SReg_32>;
+def : BitConvert <i32, v2f16, SReg_32>;
+def : BitConvert <v2i16, v2f16, SReg_32>;
+def : BitConvert <v2f16, v2i16, SReg_32>;
// 64-bit bitcast
def : BitConvert <i64, f64, VReg_64>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Mon Feb 27 12:49:11 2017
@@ -133,7 +133,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [
// TODO: Do we need to set DwarfRegAlias on register tuples?
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 103))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
@@ -184,7 +184,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub
(add (decimate (shl SGPR_32, 15), 4))]>;
// Trap handler TMP 32-bit registers
-def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 11))> {
let isAllocatable = 0;
}
@@ -202,7 +202,8 @@ def TTMP_128Regs : RegisterTuples<[sub0,
(add (decimate (shl TTMP_32, 3), 4))]>;
// VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+// i16/f16 only on VI+
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
@@ -263,7 +264,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
@@ -276,7 +277,7 @@ def SReg_32_XM0 : RegisterClass<"AMDGPU"
}
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
}
@@ -372,7 +373,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1
let Size = 32;
}
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32)> {
let isAllocatable = 0;
}
@@ -423,6 +424,18 @@ multiclass SIRegOperand <string rc, stri
let OperandType = opType#"_FP64";
let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
}
+
+ def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_V2INT16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
+ let DecoderMethod = "decodeOperand_VSrcV216";
+ }
+
+ def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_V2FP16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
+ let DecoderMethod = "decodeOperand_VSrcV216";
+ }
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td Mon Feb 27 12:49:11 2017
@@ -438,6 +438,11 @@ let Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
} // End Defs = [SCC]
+let SubtargetPredicate = isGFX9 in {
+ def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
+ def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
+ def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
+}
//===----------------------------------------------------------------------===//
// SOPK Instructions
@@ -1207,6 +1212,9 @@ def S_BFE_U64_vi : SOP2_Real_v
def S_BFE_I64_vi : SOP2_Real_vi <0x28, S_BFE_I64>;
def S_CBRANCH_G_FORK_vi : SOP2_Real_vi <0x29, S_CBRANCH_G_FORK>;
def S_ABSDIFF_I32_vi : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
+def S_PACK_LL_B32_B16_vi : SOP2_Real_vi <0x32, S_PACK_LL_B32_B16>;
+def S_PACK_LH_B32_B16_vi : SOP2_Real_vi <0x33, S_PACK_LH_B32_B16>;
+def S_PACK_HH_B32_B16_vi : SOP2_Real_vi <0x34, S_PACK_HH_B32_B16>;
def S_MOVK_I32_vi : SOPK_Real_vi <0x00, S_MOVK_I32>;
def S_CMOVK_I32_vi : SOPK_Real_vi <0x01, S_CMOVK_I32>;
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Mon Feb 27 12:49:11 2017
@@ -564,6 +564,7 @@ bool isSISrcFPOperand(const MCInstrDesc
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return true;
default:
return false;
@@ -682,6 +683,14 @@ bool isInlinableLiteral16(int16_t Litera
Val == 0x3118; // 1/2pi
}
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
+}
+
bool isUniformMMO(const MachineMemOperand *MMO) {
const Value *Ptr = MMO->getValue();
// UndefValue means this is a load of a kernel input. These are uniform.
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Mon Feb 27 12:49:11 2017
@@ -301,6 +301,8 @@ inline unsigned getOperandSize(const MCO
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return 2;
default:
@@ -323,6 +325,9 @@ bool isInlinableLiteral32(int32_t Litera
LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
bool isUniformMMO(const MachineMemOperand *MMO);
/// \returns The encoding that will be used for \p ByteOffset in the SMRD
Modified: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td Mon Feb 27 12:49:11 2017
@@ -237,7 +237,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i
src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
- let Asm64 = getAsm64<1, 1, 0>.ret;
+ let Asm64 = getAsm64<1, 1, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Mon Feb 27 12:49:11 2017
@@ -182,7 +182,7 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
@@ -194,6 +194,7 @@ class VOP_MAC <ValueType vt> : VOPProfil
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
let HasSrc2 = 0;
@@ -204,13 +205,13 @@ class VOP_MAC <ValueType vt> : VOPProfil
def VOP_MAC_F16 : VOP_MAC <f16> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret;
}
def VOP_MAC_F32 : VOP_MAC <f32> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret;
}
// Write out to vcc or arbitrary SGPR.
Modified: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td Mon Feb 27 12:49:11 2017
@@ -29,6 +29,26 @@ class getVOP3ModPat<VOPProfile P, SDPatt
ret1));
}
+class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
@@ -263,6 +283,10 @@ defm: Tenary_i16_Pats<mul, add, V_MAD_I1
} // End Predicates = [isVI]
+let SubtargetPredicate = isGFX9 in {
+def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
+}
+
//===----------------------------------------------------------------------===//
// Target
@@ -449,3 +473,5 @@ defm V_LSHLREV_B64 : VOP3_Real_vi <
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>;
+
+defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
Added: llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td?rev=296368&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td (added)
+++ llvm/trunk/lib/Target/AMDGPU/VOP3PInstructions.td Mon Feb 27 12:49:11 2017
@@ -0,0 +1,82 @@
+//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP3P Classes
+//===----------------------------------------------------------------------===//
+
+class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+ VOP3P_Pseudo<OpName, P,
+ !if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+>;
+
+// Non-packed instructions that use the VOP3P encoding. i.e. where
+// omod/abs are used.
+class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+ VOP3P_Pseudo<OpName, P,
+ !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+>;
+
+let isCommutable = 1 in {
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
+def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>>;
+
+def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+
+def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+}
+
+def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+
+// XXX - Commutable?
+def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+
+
+multiclass VOP3P_Real_vi<bits<10> op> {
+ def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3Pe <op, !cast<VOP3P_Pseudo>(NAME).Pfl> {
+ let AssemblerPredicates = [HasVOP3PInsts];
+ let DecoderNamespace = "VI";
+ }
+}
+
+defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
+defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
+defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
+defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
+defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
+
+defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
+defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
+defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
+defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
+defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
+defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
+defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
+defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
+
+defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
+defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
+defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
Modified: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td?rev=296368&r1=296367&r2=296368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td Mon Feb 27 12:49:11 2017
@@ -68,8 +68,9 @@ class VOP3Common <dag outs, dag ins, str
let hasPostISelHook = 1;
}
-class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
- InstSI <P.Outs64, P.Ins64, "", pattern>,
+class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
+ bit VOP3Only = 0, bit isVOP3P = 0> :
+ InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
VOP <opName>,
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e64", opName> {
@@ -79,7 +80,7 @@ class VOP3_Pseudo <string opName, VOPPro
let UseNamedOperandTable = 1;
string Mnemonic = opName;
- string AsmOperands = P.Asm64;
+ string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
let Size = 8;
let mayLoad = 0;
@@ -106,18 +107,24 @@ class VOP3_Pseudo <string opName, VOPPro
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
!if(!eq(VOP3Only,1),
- "cvtVOP3",
+ !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
!if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
VOPProfile Pfl = P;
}
+class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
+ VOP3_Pseudo<opName, P, pattern, 1, 1> {
+ let VOP3P = 1;
+}
+
class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let isPseudo = 0;
let isCodeGenOnly = 0;
+ let UseNamedOperandTable = 1;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@@ -131,6 +138,11 @@ class VOP3_Real <VOP3_Pseudo ps, int Enc
let TSFlags = ps.TSFlags;
}
+// XXX - Is there any reason to distingusih this from regular VOP3
+// here?
+class VOP3P_Real<VOP3P_Pseudo ps, int EncodingFamily> :
+ VOP3_Real<ps, EncodingFamily>;
+
class VOP3a<VOPProfile P> : Enc64 {
bits<2> src0_modifiers;
bits<9> src0;
@@ -198,6 +210,42 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
+class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
+ bits<8> vdst;
+ // neg, neg_hi, op_sel put in srcN_modifiers
+ bits<4> src0_modifiers;
+ bits<9> src0;
+ bits<4> src1_modifiers;
+ bits<9> src1;
+ bits<4> src2_modifiers;
+ bits<9> src2;
+ bits<1> clamp;
+
+ let Inst{7-0} = vdst;
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+
+ let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
+ let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
+
+ let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2)
+
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = !if(P.HasSrc0, src0, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+ let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0)
+ let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1)
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
+}
+
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
let Inst{25-17} = op;
}
@@ -349,3 +397,4 @@ include "VOPCInstructions.td"
include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
+include "VOP3PInstructions.td"
Added: llvm/trunk/test/MC/AMDGPU/literalv216-err.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/literalv216-err.s?rev=296368&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/literalv216-err.s (added)
+++ llvm/trunk/test/MC/AMDGPU/literalv216-err.s Mon Feb 27 12:49:11 2017
@@ -0,0 +1,22 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
+
+v_pk_add_f16 v1, -17, v2
+// GFX9: :19: error: invalid operand for instruction
+
+v_pk_add_f16 v1, 65, v2
+// GFX9: :18: error: invalid operand for instruction
+
+v_pk_add_f16 v1, 64.0, v2
+// GFX9: :18: error: invalid operand for instruction
+
+v_pk_add_f16 v1, -0.15915494, v2
+// GFX9: :19: error: invalid operand for instruction
+
+v_pk_add_f16 v1, -0.0, v2
+// GFX9: :19: error: invalid operand for instruction
+
+v_pk_add_f16 v1, -32768, v2
+// GFX9: :19: error: invalid operand for instruction
+
+v_pk_add_f16 v1, 32767, v2
+// GFX9: :18: error: invalid operand for instruction
Added: llvm/trunk/test/MC/AMDGPU/literalv216.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/literalv216.s?rev=296368&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/literalv216.s (added)
+++ llvm/trunk/test/MC/AMDGPU/literalv216.s Mon Feb 27 12:49:11 2017
@@ -0,0 +1,112 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
+
+v_pk_add_f16 v1, 0, v2
+// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0.0, v2
+// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, v2, 0
+// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
+
+v_pk_add_f16 v1, v2, 0.0
+// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
+
+v_pk_add_f16 v1, 1.0, v2
+// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -1.0, v2
+// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -0.5, v2
+// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0.5, v2
+// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 2.0, v2
+// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -2.0, v2
+// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 4.0, v2
+// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -4.0, v2
+// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0.15915494, v2
+// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -1, v2
+// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -2, v2
+// GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -3, v2
+// GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, -16, v2
+// GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 1, v2
+// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 2, v2
+// GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 3, v2
+// GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 4, v2
+// GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 15, v2
+// GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 16, v2
+// GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 63, v2
+// GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 64, v2
+// GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0x0001, v2
+// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0xffff, v2
+// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0x3c00, v2
+// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0xbc00, v2
+// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0x3800, v2
+// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0xb800, v2
+// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0x4000, v2
+// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0xc000, v2
+// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0x4400, v2
+// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0xc400, v2
+// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 0x3118, v2
+// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
+
+v_pk_add_f16 v1, 65535, v2
+// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
Added: llvm/trunk/test/MC/AMDGPU/vop3p-err.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3p-err.s?rev=296368&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3p-err.s (added)
+++ llvm/trunk/test/MC/AMDGPU/vop3p-err.s Mon Feb 27 12:49:11 2017
@@ -0,0 +1,113 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
+
+// GFX9: 31: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel
+
+// GFX9: 32: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:
+
+// GFX9: 33: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[
+
+// GFX9: 33: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[]
+
+// GFX9: 34: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[,]
+
+// XXGFX9: 34: error: failed parsing operand.
+// v_pk_add_u16 v1, v2, v3 op_sel:[0]
+
+// GFX9: 35: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[0,]
+
+// XXGFX9: 36: error: failed parsing operand.
+// v_pk_add_u16 v1, v2, v3 op_sel:[,0]
+
+// GFX9: 36: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[0,2]
+
+// GFX9: 35: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[2,0]
+
+// GFX9: 33: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[-1,0]
+
+// GFX9: 35: error: failed parsing operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[0,-1]
+
+// GFX9: 40: error: not a valid operand.
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0,0,0]
+
+// XXGFX9: invalid operand for instruction
+v_pk_add_u16 v1, v2, v3 neg_lo:[0,0]
+
+//
+// Regular modifiers on packed instructions
+//
+
+// FIXME: should be invalid operand for instruction
+// GFX9: :18: error: not a valid operand.
+v_pk_add_f16 v1, |v2|, v3
+
+// GFX9: :21: error: not a valid operand.
+v_pk_add_f16 v1, abs(v2), v3
+
+// GFX9: :22: error: not a valid operand.
+v_pk_add_f16 v1, v2, |v3|
+
+// GFX9: :25: error: not a valid operand.
+v_pk_add_f16 v1, v2, abs(v3)
+
+// GFX9: :19: error: invalid operand for instruction
+v_pk_add_f16 v1, -v2, v3
+
+// GFX9: :23: error: invalid operand for instruction
+v_pk_add_f16 v1, v2, -v3
+
+// GFX9: :21: error: not a valid operand.
+v_pk_add_u16 v1, abs(v2), v3
+
+// GFX9: :19: error: invalid operand for instruction
+v_pk_add_u16 v1, -v2, v3
+
+
+//
+// Packed operands on the non-packed VOP3P instructions
+//
+
+// GFX9: invalid operand for instruction
+v_mad_mix_f32 v1, v2, v3, v4 op_sel:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mix_f32 v1, v2, v3, v4 op_sel_hi:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mix_f32 v1, v2, v3, v4 neg_lo:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mix_f32 v1, v2, v3, v4 neg_hi:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixlo_f16 v1, v2, v3, v4 op_sel:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixlo_f16 v1, v2, v3, v4 op_sel_hi:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixlo_f16 v1, v2, v3, v4 neg_lo:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixlo_f16 v1, v2, v3, v4 neg_hi:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixhi_f16 v1, v2, v3, v4 op_sel:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixhi_f16 v1, v2, v3, v4 op_sel_hi:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixhi_f16 v1, v2, v3, v4 neg_lo:[0,0,0]
+
+// GFX9: invalid operand for instruction
+v_mad_mixhi_f16 v1, v2, v3, v4 neg_hi:[0,0,0]
Added: llvm/trunk/test/MC/AMDGPU/vop3p.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3p.s?rev=296368&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3p.s (added)
+++ llvm/trunk/test/MC/AMDGPU/vop3p.s Mon Feb 27 12:49:11 2017
@@ -0,0 +1,216 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
+
+//
+// Test op_sel/op_sel_hi
+//
+
+v_pk_add_u16 v1, v2, v3
+// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x10]
+
+v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x08]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x08]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x10]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x10]
+
+v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x08]
+
+//
+// Test src2 op_sel/op_sel_hi
+//
+
+v_pk_fma_f16 v8, v0, s0, v1
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x8e,0xd3,0x00,0x01,0x04,0x04]
+
+v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x8e,0xd3,0x00,0x01,0x04,0x04]
+
+//
+// Test neg_lo/neg_hi
+//
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0xfc]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0xfc]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x3c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x5c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x9c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1]
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+
+// Test clamp
+v_pk_fma_f16 v8, v0, s0, v1 clamp
+// GFX9: v_pk_fma_f16 v8, v0, s0, v1 clamp ; encoding: [0x08,0xc0,0x8e,0xd3,0x00,0x01,0x04,0x1c]
+
+v_pk_add_u16 v1, v2, v3 clamp
+// GFX9: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x8a,0xd3,0x02,0x07,0x02,0x18]
+
+v_pk_min_i16 v0, v1, v2 clamp
+// GFX9: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0x80,0x88,0xd3,0x01,0x05,0x02,0x18]
+
+//
+// Instruction tests:
+//
+
+v_pk_mul_lo_u16 v0, v1, v2
+// GFX9: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x81,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_add_i16 v0, v1, v2
+// GFX9: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x82,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_sub_i16 v0, v1, v2
+// GFX9: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x83,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_lshlrev_b16 v0, v1, v2
+// GFX9: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x84,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_lshrrev_b16 v0, v1, v2
+// GFX9: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x85,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_ashrrev_i16 v0, v1, v2
+// GFX9: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x86,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_max_i16 v0, v1, v2
+// GFX9: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x87,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_min_i16 v0, v1, v2
+// GFX9: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x88,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_add_u16 v0, v1, v2
+// GFX9: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_max_u16 v0, v1, v2
+// GFX9: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_min_u16 v0, v1, v2
+// GFX9: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_fma_f16 v0, v1, v2, v3
+// GFX9: v_pk_fma_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c]
+
+v_pk_add_f16 v0, v1, v2
+// GFX9: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_mul_f16 v0, v1, v2
+// GFX9: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x90,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_min_f16 v0, v1, v2
+// GFX9: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x91,0xd3,0x01,0x05,0x02,0x18]
+
+v_pk_max_f16 v0, v1, v2
+// GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x92,0xd3,0x01,0x05,0x02,0x18]
+
+v_mad_mix_f32 v0, v1, v2, v3
+// GFX9: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
+
+v_mad_mixlo_f16 v0, v1, v2, v3
+// GFX9: v_mad_mixlo_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa1,0xd3,0x01,0x05,0x0e,0x04]
+
+v_mad_mixhi_f16 v0, v1, v2, v3
+// GFX9: v_mad_mixhi_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa2,0xd3,0x01,0x05,0x0e,0x04]
+
+
+//
+// Regular source modifiers on non-packed instructions
+//
+
+v_mad_mix_f32 v0, abs(v1), v2, v3
+// GFX9: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04]
+
+v_mad_mix_f32 v0, v1, abs(v2), v3
+// GFX9: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04]
+
+v_mad_mix_f32 v0, v1, v2, abs(v3)
+// GFX9: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04]
+
+v_mad_mix_f32 v0, -v1, v2, v3
+// GFX9: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24]
+
+v_mad_mix_f32 v0, v1, -v2, v3
+// GFX9: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44]
+
+v_mad_mix_f32 v0, v1, v2, -v3
+// GFX9: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84]
+
+v_mad_mix_f32 v0, -abs(v1), v2, v3
+// GFX9: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24]
+
+v_mad_mix_f32 v0, v1, -abs(v2), v3
+// GFX9: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44]
+
+v_mad_mix_f32 v0, v1, v2, -abs(v3)
+// GFX9: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84]
+
+v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3)
+// GFX9: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44]
+
+v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3)
+// GFX9: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4]
More information about the llvm-commits
mailing list