[llvm] r359756 - [AMDGPU] gfx1010 allows VOP3 to have a literal
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed May 1 21:01:40 PDT 2019
Author: rampitec
Date: Wed May 1 21:01:39 2019
New Revision: 359756
URL: http://llvm.org/viewvc/llvm-project?rev=359756&view=rev
Log:
[AMDGPU] gfx1010 allows VOP3 to have a literal
Differential Revision: https://reviews.llvm.org/D61413
Added:
llvm/trunk/test/MC/AMDGPU/gfx10-constant-bus.s
llvm/trunk/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt
llvm/trunk/test/MC/Disassembler/AMDGPU/vop3-literal.txt
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
llvm/trunk/test/MC/AMDGPU/expressions.s
llvm/trunk/test/MC/AMDGPU/literals.s
llvm/trunk/test/MC/AMDGPU/literalv216-err.s
llvm/trunk/test/MC/AMDGPU/literalv216.s
llvm/trunk/test/MC/AMDGPU/reloc.s
llvm/trunk/test/MC/AMDGPU/vop2-err.s
llvm/trunk/test/MC/AMDGPU/vop3-errs.s
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td Wed May 1 21:01:39 2019
@@ -65,10 +65,6 @@ def AMDGPUIfBreakOp : SDTypeProfile<1, 2
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
>;
-def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
- [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
->;
-
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -203,12 +199,6 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CAR
// out = (src1 > src0) ? 1 : 0
def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
-// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own
-// nodes in TargetSelectionDAG.td.
-def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>;
-
-def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>;
-
def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
]>;
@@ -249,7 +239,8 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD:
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
+ [SDNPOptInGlue]>;
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed May 1 21:01:39 2019
@@ -236,7 +236,7 @@ public:
}
bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
- return isRegClass(RCID) || isInlinableImm(type);
+ return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
}
bool isRegOrImmWithInt16InputMods() const {
@@ -461,7 +461,7 @@ public:
}
bool isVSrcB32() const {
- return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
+ return isVCSrcF32() || isLiteralImm(MVT::i32);
}
bool isVSrcB64() const {
@@ -473,12 +473,11 @@ public:
}
bool isVSrcV2B16() const {
- llvm_unreachable("cannot happen");
- return isVSrcB16();
+ return isVSrcB16() || isLiteralImm(MVT::v2i16);
}
bool isVSrcF32() const {
- return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
+ return isVCSrcF32() || isLiteralImm(MVT::f32);
}
bool isVSrcF64() const {
@@ -490,8 +489,7 @@ public:
}
bool isVSrcV2F16() const {
- llvm_unreachable("cannot happen");
- return isVSrcF16();
+ return isVSrcF16() || isLiteralImm(MVT::v2f16);
}
bool isKImmFP32() const {
@@ -1145,6 +1143,7 @@ private:
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGDim(const MCInst &Inst);
bool validateLdsDirect(const MCInst &Inst);
+ bool validateVOP3Literal(const MCInst &Inst) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -1287,6 +1286,8 @@ static const fltSemantics *getOpFltSeman
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
@@ -1419,8 +1420,14 @@ bool AMDGPUOperand::isLiteralImm(MVT typ
return false;
}
+ // We allow fp literals with f16x2 operands assuming that the specified
+ // literal goes into the lower half and the upper half is zero. We also
+ // require that the literal may be losslesly converted to f16.
+ MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
+ (type == MVT::v2i16)? MVT::i16 : type;
+
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
- return canLosslesslyConvertToFPType(FPLiteral, type);
+ return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
}
bool AMDGPUOperand::isRegClass(unsigned RCID) const {
@@ -1535,7 +1542,9 @@ void AMDGPUOperand::addLiteralImmOperand
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -1562,6 +1571,8 @@ void AMDGPUOperand::addLiteralImmOperand
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
if (isSafeTruncation(Val, 32) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -2419,7 +2430,9 @@ bool AMDGPUAsmParser::isInlineConstant(c
case 2: {
const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
} else {
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
@@ -2919,6 +2932,42 @@ bool AMDGPUAsmParser::validateSOPLiteral
return NumLiterals <= 1;
}
+// VOP3 literal is only allowed in GFX10+ and only one can be used
+bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
+ unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+ if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+ unsigned NumLiterals = 0;
+ uint32_t LiteralValue;
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1) break;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
+ continue;
+
+ if (!isInlineConstant(Inst, OpIdx)) {
+ uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ if (NumLiterals == 0 || LiteralValue != Value) {
+ LiteralValue = Value;
+ ++NumLiterals;
+ }
+ }
+ }
+
+ return !NumLiterals ||
+ (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
+}
+
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc) {
if (!validateLdsDirect(Inst)) {
@@ -2931,6 +2980,11 @@ bool AMDGPUAsmParser::validateInstructio
"only one literal operand is allowed");
return false;
}
+ if (!validateVOP3Literal(Inst)) {
+ Error(IDLoc,
+ "invalid literal operand");
+ return false;
+ }
if (!validateConstantBusLimitations(Inst)) {
Error(IDLoc,
"invalid operand (violates constant bus restrictions)");
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed May 1 21:01:39 2019
@@ -618,6 +618,14 @@ void AMDGPUInstPrinter::printOperand(con
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ if (!isUInt<16>(Op.getImm()) &&
+ STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+ printImmediate32(Op.getImm(), STI, O);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
printImmediateV216(Op.getImm(), STI, O);
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp Wed May 1 21:01:39 2019
@@ -249,6 +249,11 @@ uint32_t SIMCCodeEmitter::getLitEncoding
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+ return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+ LLVM_FALLTHROUGH;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Wed May 1 21:01:39 2019
@@ -165,13 +165,16 @@ FunctionPass *llvm::createSIFoldOperands
static bool updateOperand(FoldCandidate &Fold,
const SIInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const GCNSubtarget &ST) {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
assert(Old.isReg());
if (Fold.isImm()) {
- if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
+ if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
+ AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
+ ST.hasInv2PiInlineImm())) {
// Set op_sel/op_sel_hi on this operand or bail out if op_sel is
// already set.
unsigned Opcode = MI->getOpcode();
@@ -192,6 +195,8 @@ static bool updateOperand(FoldCandidate
// Only apply the following transformation if that operand requries
// a packed immediate.
switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
// If upper part is all zero we do not need op_sel_hi.
@@ -203,6 +208,8 @@ static bool updateOperand(FoldCandidate
return true;
}
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
+ return true;
}
break;
default:
@@ -891,7 +898,7 @@ void SIFoldOperands::foldInstOperand(Mac
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, *TII, *TRI)) {
+ if (updateOperand(Fold, *TII, *TRI, *ST)) {
// Clear kill flags.
if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed May 1 21:01:39 2019
@@ -2549,19 +2549,12 @@ bool SIInstrInfo::isInlineConstant(const
return false;
}
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
- if (isUInt<16>(Imm)) {
- int16_t Trunc = static_cast<int16_t>(Imm);
- return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
- }
- if (!(Imm & 0xffff)) {
- return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
- }
uint32_t Trunc = static_cast<uint32_t>(Imm);
- return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
+ return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
}
default:
llvm_unreachable("invalid bitwidth");
@@ -2603,7 +2596,8 @@ static bool compareMachineOp(const Machi
bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
const MachineOperand &MO) const {
- const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
+ const MCInstrDesc &InstDesc = MI.getDesc();
+ const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
@@ -2616,7 +2610,15 @@ bool SIInstrInfo::isImmOperandLegal(cons
if (MO.isImm() && isInlineConstant(MO, OpInfo))
return RI.opCanUseInlineConstant(OpInfo.OperandType);
- return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+ if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
+ return false;
+
+ if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
+ return true;
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ return ST.hasVOP3Literal();
}
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
@@ -3600,17 +3602,14 @@ void SIInstrInfo::legalizeOperandsVOP2(M
MachineOperand &Src1 = MI.getOperand(Src1Idx);
// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
- // we need to only have one constant bus use.
- //
- // Note we do not need to worry about literal constants here. They are
- // disabled for the operand type for instructions because they will always
- // violate the one constant bus use rule.
+ // we need to only have one constant bus use before GFX10.
bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
- if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
+ if (Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) ||
+ isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx])))
legalizeOpWithMove(MI, Src0Idx);
}
@@ -3702,10 +3701,8 @@ void SIInstrInfo::legalizeOperandsVOP2(M
Src1.setSubReg(Src0SubReg);
}
-// Legalize VOP3 operands. Because all operand types are supported for any
-// operand, and since literal constants are not allowed and should never be
-// seen, we only need to worry about inserting copies if we use multiple SGPR
-// operands.
+// Legalize VOP3 operands. All operand types are supported for any operand
+// but only one literal constant and only starting from GFX10.
void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
@@ -5732,18 +5729,29 @@ int SIInstrInfo::pseudoToMCOpcode(int Op
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
- ST.getGeneration() >= AMDGPUSubtarget::GFX9)
+ ST.getGeneration() == AMDGPUSubtarget::GFX9)
Gen = SIEncodingFamily::GFX9;
- if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
- Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
- : SIEncodingFamily::SDWA;
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
// subtarget has UnpackedD16VMem feature.
// TODO: remove this when we discard GFX80 encoding.
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
Gen = SIEncodingFamily::GFX80;
+ if (get(Opcode).TSFlags & SIInstrFlags::SDWA) {
+ switch (ST.getGeneration()) {
+ default:
+ Gen = SIEncodingFamily::SDWA;
+ break;
+ case AMDGPUSubtarget::GFX9:
+ Gen = SIEncodingFamily::SDWA9;
+ break;
+ case AMDGPUSubtarget::GFX10:
+ Gen = SIEncodingFamily::SDWA10;
+ break;
+ }
+ }
+
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed May 1 21:01:39 2019
@@ -1209,7 +1209,7 @@ class getVOPSrc0ForVT<ValueType VT> {
!if(!eq(VT.Value, f16.Value),
VSrc_f16,
!if(!eq(VT.Value, v2f16.Value),
- VCSrc_v2f16,
+ VSrc_v2f16,
VSrc_f32
)
)
@@ -1219,7 +1219,7 @@ class getVOPSrc0ForVT<ValueType VT> {
!if(!eq(VT.Value, i16.Value),
VSrc_b16,
!if(!eq(VT.Value, v2i16.Value),
- VCSrc_v2b16,
+ VSrc_v2b16,
VSrc_b32
)
)
@@ -1255,23 +1255,23 @@ class getVOP3SrcForVT<ValueType VT> {
VSrc_128,
!if(!eq(VT.Size, 64),
!if(isFP,
- VCSrc_f64,
- VCSrc_b64),
+ VSrc_f64,
+ VSrc_b64),
!if(!eq(VT.Value, i1.Value),
SCSrc_i1,
!if(isFP,
!if(!eq(VT.Value, f16.Value),
- VCSrc_f16,
+ VSrc_f16,
!if(!eq(VT.Value, v2f16.Value),
- VCSrc_v2f16,
- VCSrc_f32
+ VSrc_v2f16,
+ VSrc_f32
)
),
!if(!eq(VT.Value, i16.Value),
- VCSrc_b16,
+ VSrc_b16,
!if(!eq(VT.Value, v2i16.Value),
- VCSrc_v2b16,
- VCSrc_b32
+ VSrc_v2b16,
+ VSrc_b32
)
)
)
Modified: llvm/trunk/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/pk_max_f16_literal.ll?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/pk_max_f16_literal.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/pk_max_f16_literal.ll Wed May 1 21:01:39 2019
@@ -1,7 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s
; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -14,7 +15,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -27,7 +28,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -40,7 +41,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -53,7 +54,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -66,7 +67,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -79,7 +80,7 @@ bb:
}
; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0:
-; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
+; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -90,6 +91,51 @@ bb:
store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
ret void
}
+
+; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
+; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000
+; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
+; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
+define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %tmp1 = zext i32 %tmp to i64
+ %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
+ %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
+ %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0xH41C8>)
+ store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0:
+; GFX9: s_movk_i32 [[C:s[0-9]+]], 0x41c8
+; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
+; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %tmp1 = zext i32 %tmp to i64
+ %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
+ %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
+ %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH41C8, half 0xH0>)
+ store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8:
+; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c842ca
+; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
+; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}}
+define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %tmp1 = zext i32 %tmp to i64
+ %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
+ %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
+ %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH42CA, half 0xH41C8>)
+ store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
+ ret void
+}
declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
declare i32 @llvm.amdgcn.workitem.id.x()
Modified: llvm/trunk/test/MC/AMDGPU/expressions.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/expressions.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/expressions.s (original)
+++ llvm/trunk/test/MC/AMDGPU/expressions.s Wed May 1 21:01:39 2019
@@ -41,9 +41,6 @@ s_mov_b32 s0, foo+2
s_mov_b32 s0, foo+2
// VI: s_mov_b32 s0, 514 ; encoding: [0xff,0x00,0x80,0xbe,0x02,0x02,0x00,0x00]
-v_mul_f32 v0, foo+2, v2
-// VI: v_mul_f32_e32 v0, 514, v2 ; encoding: [0xff,0x04,0x00,0x0a,0x02,0x02,0x00,0x00]
-
BB1:
v_nop_e64
BB2:
@@ -80,23 +77,24 @@ s_sub_u32 s0, s0, 1.0 + t
v=1
v_sin_f32 v0, -v
-// VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e]
+// NOVI: error: invalid operand for instruction
+v=1
v_sin_f32 v0, -v[0]
// VI: v_sin_f32_e64 v0, -v0 ; encoding: [0x00,0x00,0x69,0xd1,0x00,0x01,0x00,0x20]
s=1
-v_sin_f32 v0, -s
-// VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e]
+s_not_b32 s0, -s
+// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
s0=1
-v_sin_f32 v0, -s0
-// VI: v_sin_f32_e64 v0, -s0 ; encoding: [0x00,0x00,0x69,0xd1,0x00,0x00,0x00,0x20]
+s_not_b32 s0, -s0
+// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
ttmp=1
-v_sin_f32 v0, -ttmp
-// VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e]
+s_not_b32 s0, -ttmp
+// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
ttmp0=1
-v_sin_f32 v0, -[ttmp0]
-// VI: v_sin_f32_e64 v0, -ttmp0 ; encoding: [0x00,0x00,0x69,0xd1,0x70,0x00,0x00,0x20]
+s_not_b32 s0, -[ttmp0]
+// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe]
Added: llvm/trunk/test/MC/AMDGPU/gfx10-constant-bus.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/gfx10-constant-bus.s?rev=359756&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/gfx10-constant-bus.s (added)
+++ llvm/trunk/test/MC/AMDGPU/gfx10-constant-bus.s Wed May 1 21:01:39 2019
@@ -0,0 +1,35 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s
+
+//-----------------------------------------------------------------------------------------
+// On GFX10 we can use two scalar operands
+
+v_add_f32 v0, s0, s1
+// GFX10: v_add_f32_e64 v0, s0, s1 ; encoding: [0x00,0x00,0x03,0xd5,0x00,0x02,0x00,0x00]
+
+v_madak_f32 v0, s0, v1, 42.42
+// GFX10: v_madak_f32 v0, s0, v1, 0x4229ae14 ; encoding: [0x00,0x02,0x00,0x42,0x14,0xae,0x29,0x42]
+
+v_med3_f32 v0, s0, s0, s1
+// GFX10: v_med3_f32 v0, s0, s0, s1 ; encoding: [0x00,0x00,0x57,0xd5,0x00,0x00,0x04,0x00]
+
+//-----------------------------------------------------------------------------------------
+// v_div_fmas implicitly reads VCC, so only one scalar operand is possible
+
+v_div_fmas_f32 v5, s3, s3, s3
+// GFX10: v_div_fmas_f32 v5, s3, s3, s3 ; encoding: [0x05,0x00,0x6f,0xd5,0x03,0x06,0x0c,0x00]
+
+v_div_fmas_f32 v5, s3, s3, s2
+// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+
+v_div_fmas_f32 v5, s3, 0x123, v3
+// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+
+v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678
+// GFX10: v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678 ; encoding: [0x05,0x00,0x70,0xd5,0xff,0xfe,0xfd,0x03,0x78,0x56,0x34,0x12]
+
+v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4]
+// GFX10: v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4] ; encoding: [0x05,0x00,0x70,0xd5,0x01,0x05,0x0c,0x04]
+
+v_div_fmas_f64 v[5:6], v[1:2], s[2:3], 0x123456
+// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
Modified: llvm/trunk/test/MC/AMDGPU/literals.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/literals.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/literals.s (original)
+++ llvm/trunk/test/MC/AMDGPU/literals.s Wed May 1 21:01:39 2019
@@ -282,12 +282,12 @@ v_trunc_f32_e32 v0, 1234
// GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
v_fract_f64_e32 v[0:1], 1234
-// NOSICI: error: invalid operand for instruction
-// NOGFX89: error: invalid operand for instruction
+// NOSICI: error: invalid literal operand
+// NOGFX89: error: invalid literal operand
v_trunc_f32_e64 v0, 1234
-// NOSICI: error: invalid operand for instruction
-// NOGFX89: error: invalid operand for instruction
+// NOSICI: error: invalid literal operand
+// NOGFX89: error: invalid literal operand
v_fract_f64_e64 v[0:1], 1234
// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
@@ -378,8 +378,8 @@ s_mov_b64_e32 s[0:1], 1234
// GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
v_and_b32_e32 v0, 1234, v1
-// NOSICI: error: invalid operand for instruction
-// NOGFX89: error: invalid operand for instruction
+// NOSICI: error: invalid literal operand
+// NOGFX89: error: invalid literal operand
v_and_b32_e64 v0, 1234, v1
// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
@@ -450,12 +450,12 @@ v_trunc_f32_e64 v0, 0x3fc45f306dc9c882
// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882
-// NOSICI: error: invalid operand for instruction
+// NOSICI: error: invalid literal operand
// GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
v_trunc_f32_e64 v0, 0x3e22f983
-// NOSICI: error: invalid operand for instruction
-// NOGFX89: error: invalid operand for instruction
+// NOSICI: error: invalid literal operand
+// NOGFX89: error: invalid literal operand
v_fract_f64_e64 v[0:1], 0x3e22f983
// NOSICI: error: invalid operand for instruction
@@ -466,7 +466,7 @@ s_mov_b64_e32 s[0:1], 0.1591549430918953
// GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26]
v_and_b32_e32 v0, 0.159154943091895317852646485335, v1
-// NOSICI: error: invalid operand for instruction
+// NOSICI: error: invalid literal operand
// GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
v_and_b32_e64 v0, 0.159154943091895317852646485335, v1
Modified: llvm/trunk/test/MC/AMDGPU/literalv216-err.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/literalv216-err.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/literalv216-err.s (original)
+++ llvm/trunk/test/MC/AMDGPU/literalv216-err.s Wed May 1 21:01:39 2019
@@ -1,28 +1,28 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s
v_pk_add_f16 v1, -17, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, 65, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, 64.0, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, -0.15915494, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, -0.0, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, -32768, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, 32767, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, 0xffffffffffff000f, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
v_pk_add_f16 v1, 0x1000ffff, v2
-// GFX9: error: invalid operand for instruction
+// GFX9: error: invalid literal operand
Modified: llvm/trunk/test/MC/AMDGPU/literalv216.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/literalv216.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/literalv216.s (original)
+++ llvm/trunk/test/MC/AMDGPU/literalv216.s Wed May 1 21:01:39 2019
@@ -1,112 +1,286 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX9
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX10
+
+//===----------------------------------------------------------------------===//
+// Inline constants
+//===----------------------------------------------------------------------===//
v_pk_add_f16 v1, 0, v2
// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18]
v_pk_add_f16 v1, 0.0, v2
// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18]
v_pk_add_f16 v1, v2, 0
// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
+// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18]
v_pk_add_f16 v1, v2, 0.0
// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18]
+// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18]
v_pk_add_f16 v1, 1.0, v2
// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18]
v_pk_add_f16 v1, -1.0, v2
// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18]
v_pk_add_f16 v1, -0.5, v2
// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18]
v_pk_add_f16 v1, 0.5, v2
// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18]
v_pk_add_f16 v1, 2.0, v2
// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18]
v_pk_add_f16 v1, -2.0, v2
// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18]
v_pk_add_f16 v1, 4.0, v2
// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18]
v_pk_add_f16 v1, -4.0, v2
// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18]
v_pk_add_f16 v1, 0.15915494, v2
// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18]
v_pk_add_f16 v1, -1, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
v_pk_add_f16 v1, -2, v2
// GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18]
v_pk_add_f16 v1, -3, v2
// GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18]
v_pk_add_f16 v1, -16, v2
// GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18]
v_pk_add_f16 v1, 1, v2
// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18]
v_pk_add_f16 v1, 2, v2
// GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18]
v_pk_add_f16 v1, 3, v2
// GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18]
v_pk_add_f16 v1, 4, v2
// GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18]
v_pk_add_f16 v1, 15, v2
// GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18]
v_pk_add_f16 v1, 16, v2
// GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18]
v_pk_add_f16 v1, 63, v2
// GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18]
v_pk_add_f16 v1, 64, v2
// GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x0001, v2
// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xffff, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x3c00, v2
// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xbc00, v2
// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x3800, v2
// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xb800, v2
// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x4000, v2
// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xc000, v2
// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x4400, v2
// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xc400, v2
// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18]
v_pk_add_f16 v1, 0x3118, v2
// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18]
v_pk_add_f16 v1, 65535, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18]
+// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
+
+//===----------------------------------------------------------------------===//
+// Integer literals
+//===----------------------------------------------------------------------===//
+
+v_pk_add_f16 v5, v1, 0x12345678
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
+
+v_pk_add_f16 v5, 0x12345678, v2
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
+
+v_pk_add_f16 v5, -256, v2
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
+
+v_pk_add_f16 v5, v1, 256
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
+
+v_pk_add_u16 v5, v1, 0x12345678
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
+
+v_pk_add_u16 v5, 0x12345678, v2
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
+
+v_pk_add_u16 v5, -256, v2
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
+
+v_pk_add_u16 v5, v1, 256
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
+
+v_pk_add_f16 v5, v1, 0x123456780
+// NOGFX9: error: invalid operand for instruction
+// NOGFX10: error: invalid operand for instruction
+
+v_pk_add_u16 v5, v1, 0x123456780
+// NOGFX9: error: invalid operand for instruction
+// NOGFX10: error: invalid operand for instruction
+
+v_pk_fma_f16 v5, 0xaf123456, v2, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
+
+v_pk_fma_f16 v5, v1, 0xaf123456, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_fma_f16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
+
+v_pk_fma_f16 v5, v1, v2, 0xaf123456
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_fma_f16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
+
+v_pk_mad_i16 v5, 0xaf123456, v2, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_mad_i16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
+
+v_pk_mad_i16 v5, v1, 0xaf123456, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_mad_i16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
+
+v_pk_mad_i16 v5, v1, v2, 0xaf123456
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
+
+v_pk_ashrrev_i16 v5, 0x12345678, v2
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
+
+v_pk_ashrrev_i16 v5, v1, 0x12345678
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
+
+//===----------------------------------------------------------------------===//
+// Floating-point literals (allowed if lossless conversion to f16 is possible)
+//===----------------------------------------------------------------------===//
+
+v_pk_add_f16 v5, v1, 0.1234
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_f16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00]
+
+v_pk_add_u16 v5, v1, 0.1234
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00]
+
+v_pk_fma_f16 v5, 0.1234, v2, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_fma_f16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00]
+
+v_pk_fma_f16 v5, v1, 0.1234, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_fma_f16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00]
+
+v_pk_fma_f16 v5, v1, v2, 0.1234
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_fma_f16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00]
+
+v_pk_mad_i16 v5, 0.1234, v2, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_mad_i16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00]
+
+v_pk_mad_i16 v5, v1, 0.1234, v3
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_mad_i16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00]
+
+v_pk_mad_i16 v5, v1, v2, 0.1234
+// NOGFX9: error: invalid literal operand
+// GFX10: v_pk_mad_i16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00]
+
+v_pk_add_f16 v5, v1, 123456.0
+// NOGFX9: error: invalid operand for instruction
+// NOGFX10: error: invalid operand for instruction
+
+v_pk_add_u16 v5, v1, 123456.0
+// NOGFX9: error: invalid operand for instruction
+// NOGFX10: error: invalid operand for instruction
+
+//===----------------------------------------------------------------------===//
+// Packed VOP2
+//===----------------------------------------------------------------------===//
+
+// FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid
+v_pk_fmac_f16 v5, 0x12345678, v2
+// NOGFX9: error: instruction not supported on this GPU
+// GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
Modified: llvm/trunk/test/MC/AMDGPU/reloc.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/reloc.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/reloc.s (original)
+++ llvm/trunk/test/MC/AMDGPU/reloc.s Wed May 1 21:01:39 2019
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu=kaveri -show-encoding %s | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu=kaveri -show-encoding %s | llvm-readobj -relocations | FileCheck %s
// CHECK: Relocations [
// CHECK: .rel.text {
@@ -9,13 +9,6 @@
// CHECK: R_AMDGPU_GOTPCREL32_HI global_var2 0x0
// CHECK: R_AMDGPU_REL32_LO global_var3 0x0
// CHECK: R_AMDGPU_REL32_HI global_var4 0x0
-// CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 0x0
-// CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 0x0
-// CHECK: R_AMDGPU_GOTPCREL global_var0 0x0
-// CHECK: R_AMDGPU_GOTPCREL32_LO global_var1 0x0
-// CHECK: R_AMDGPU_GOTPCREL32_HI global_var2 0x0
-// CHECK: R_AMDGPU_REL32_LO global_var3 0x0
-// CHECK: R_AMDGPU_REL32_HI global_var4 0x0
// CHECK: R_AMDGPU_ABS32 var 0x0
// CHECK: }
// CHECK: .rel.data {
@@ -33,14 +26,6 @@ kernel:
s_mov_b32 s5, global_var3 at rel32@lo
s_mov_b32 s6, global_var4 at rel32@hi
- v_mov_b32 v0, SCRATCH_RSRC_DWORD0
- v_mov_b32 v1, SCRATCH_RSRC_DWORD1
- v_mov_b32 v2, global_var0 at GOTPCREL
- v_mov_b32 v3, global_var1 at gotpcrel32@lo
- v_mov_b32 v4, global_var2 at gotpcrel32@hi
- v_mov_b32 v5, global_var3 at rel32@lo
- v_mov_b32 v6, global_var4 at rel32@hi
-
.globl global_var0
.globl global_var1
.globl global_var2
Modified: llvm/trunk/test/MC/AMDGPU/vop2-err.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop2-err.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop2-err.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop2-err.s Wed May 1 21:01:39 2019
@@ -6,7 +6,7 @@
//===----------------------------------------------------------------------===//
v_mul_i32_i24 v1, v2, 100
-// CHECK: error: invalid operand for instruction
+// CHECK: error: invalid literal operand
//===----------------------------------------------------------------------===//
// _e32 checks
@@ -29,11 +29,11 @@ v_cndmask_b32_e32 v1, v2, v3, s[0:1]
// Immediate src0
v_mul_i32_i24_e64 v1, 100, v3
-// CHECK: error: invalid operand for instruction
+// CHECK: error: invalid literal operand
// Immediate src1
v_mul_i32_i24_e64 v1, v2, 100
-// CHECK: error: invalid operand for instruction
+// CHECK: error: invalid literal operand
v_add_i32_e32 v1, s[0:1], v2, v3
// CHECK: error: invalid operand for instruction
Modified: llvm/trunk/test/MC/AMDGPU/vop3-errs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop3-errs.s?rev=359756&r1=359755&r2=359756&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop3-errs.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop3-errs.s Wed May 1 21:01:39 2019
@@ -7,7 +7,7 @@ v_add_f32_e64 v0, v1
// GCN: error: too few operands for instruction
v_div_scale_f32 v24, vcc, v22, 1.1, v22
-// GCN: error: invalid operand for instruction
+// GCN: error: invalid literal operand
v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3]
// GFX67: error: instruction not supported on this GPU
Added: llvm/trunk/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt?rev=359756&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt (added)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt Wed May 1 21:01:39 2019
@@ -0,0 +1,149 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding %s | FileCheck -check-prefix=GFX10 %s
+
+#===----------------------------------------------------------------------===//
+# Inline constants
+#===----------------------------------------------------------------------===//
+
+# GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18]
+0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18
+
+# GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18
+
+# GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18]
+0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18
+
+# GFX10: v_pk_fma_f16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b]
+0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b
+
+# GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b]
+0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b
+
+# GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b]
+0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b
+
+# GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b]
+0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b
+
+# GFX10: v_pk_ashrrev_i16 v5, 1, 16 ; encoding: [0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18]
+0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18
+
+#===----------------------------------------------------------------------===//
+# 32-bit literals
+#===----------------------------------------------------------------------===//
+
+# GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
+0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12
+
+# GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
+0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12
+
+# GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
+0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff
+
+# GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
+0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00
+
+# GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
+0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12
+
+# GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
+0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12
+
+# GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff]
+0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff
+
+# GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00]
+0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00
+
+# GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
+0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf
+
+# GFX10: v_pk_fma_f16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
+0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf
+
+# GFX10: v_pk_fma_f16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
+0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf
+
+# GFX10: v_pk_mad_i16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf]
+0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf
+
+# GFX10: v_pk_mad_i16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf]
+0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf
+
+# GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf]
+0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf
+
+# GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12]
+0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12
+
+# GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12]
+0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12
+
+#===----------------------------------------------------------------------===//
+# Packed VOP2
+#===----------------------------------------------------------------------===//
+
+# FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid
+# GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12]
+0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12
Added: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3-literal.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/vop3-literal.txt?rev=359756&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3-literal.txt (added)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3-literal.txt Wed May 1 21:01:39 2019
@@ -0,0 +1,49 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s
+
+# GFX10: v_bfe_u32 v0, 0x3039, v1, s1 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00]
+0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00
+
+# GFX10: v_bfe_u32 v0, v1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xff,0x05,0x00,0x39,0x30,0x00,0x00]
+0x00,0x00,0x48,0xd5,0x01,0xff,0x05,0x00,0x39,0x30,0x00,0x00
+
+# GFX10: v_bfe_u32 v0, v1, s1, 0x3039 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0x03,0xfc,0x03,0x39,0x30,0x00,0x00]
+0x00,0x00,0x48,0xd5,0x01,0x03,0xfc,0x03,0x39,0x30,0x00,0x00
+
+# GFX10: v_bfe_u32 v0, 0x3039, v1, v2 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x0a,0x04,0x39,0x30,0x00,0x00]
+0x00,0x00,0x48,0xd5,0xff,0x02,0x0a,0x04,0x39,0x30,0x00,0x00
+
+# GFX10: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00]
+0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00
+
+# GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00]
+0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00
+
+# GFX10: v_bfm_b32_e64 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00]
+0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00
+
+# GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00]
+0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00
+
+# GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00]
+0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00
+
+# GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff]
+0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff
+
+# GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff]
+0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff
+
+# GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18]
+0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18
+
+# GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00]
+0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00
+
+# GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18]
+0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18
+
+# GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18]
+0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18
+
+# GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff]
+0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff
More information about the llvm-commits
mailing list