[llvm] [AMDGPU][True16] Support source DPP operands. (PR #79025)
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 22 10:04:46 PST 2024
https://github.com/kosarev created https://github.com/llvm/llvm-project/pull/79025
None
>From 040f34fec6dad8757e86eb9d24daddf48617a609 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev at amd.com>
Date: Fri, 19 Jan 2024 18:19:11 +0000
Subject: [PATCH] [AMDGPU][True16] Support source DPP operands.
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 7 +-
llvm/lib/Target/AMDGPU/BUFInstructions.td | 17 +--
.../MCTargetDesc/AMDGPUMCCodeEmitter.cpp | 43 ++++++-
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 113 +++++++++---------
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 24 ++--
llvm/lib/Target/AMDGPU/VOPCInstructions.td | 38 +++---
llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s | 64 +++++-----
llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s | 24 ++--
.../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 102 ++++++++++------
.../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 35 ++++--
10 files changed, 277 insertions(+), 190 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 66267c9255f4195..489cf85693edb2c 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -346,7 +346,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
}
bool isVRegWithInputMods() const;
- bool isT16VRegWithInputMods() const;
+ template <bool IsFake16> bool isT16VRegWithInputMods() const;
bool isSDWAOperand(MVT type) const;
bool isSDWAFP16Operand() const;
@@ -2054,8 +2054,9 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}
-bool AMDGPUOperand::isT16VRegWithInputMods() const {
- return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
+template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+ return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
+ : AMDGPU::VGPR_16_Lo128RegClassID);
}
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index c3e5be8334a69f7..87247a134848b59 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -485,8 +485,8 @@ class MUBUF_Load_Pseudo <string opName,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
- RegisterClass vdata_rc = getVregSrcForVT<vdata_vt>.ret,
- RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdata_rc, isTFE>.ret>
+ RegisterOperand vdata_rc = getVregSrcForVT<vdata_vt>.ret,
+ RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdata_rc.RegClass, isTFE>.ret>
: MUBUF_Pseudo<opName,
!if(!or(isLds, isLdsOpc), (outs), (outs vdata_op:$vdata)),
!con(getMUBUFIns<addrKindCopy, [], isTFE, hasGFX12Enc>.ret,
@@ -601,7 +601,7 @@ class MUBUF_Store_Pseudo <string opName,
int addrKindCopy = addrKind>
: MUBUF_Pseudo<opName,
(outs),
- getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret], isTFE, hasGFX12Enc>.ret,
+ getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret.RegClass], isTFE, hasGFX12Enc>.ret,
getMUBUFAsmOps<addrKindCopy, 0, 0, isTFE>.ret,
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
@@ -1569,27 +1569,28 @@ multiclass BufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, string
# !if(!eq(RtnMode, "ret"), "", "_noret")
# "_" # vt.Size);
defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+ defvar data_vt_RC = getVregSrcForVT<data_vt>.ret.RegClass;
let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
- getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
+ data_vt_RC:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset);
def : GCNPat<
(vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset), data_vt:$vdata_in)),
!if(!eq(RtnMode, "ret"),
- (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS OffsetResDag, getVregSrcForVT<data_vt>.ret)),
+ (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS OffsetResDag, data_vt_RC)),
!if(!eq(vt, i32), sub0, sub0_sub1)),
OffsetResDag)
>;
defvar Addr64ResDag = (!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix)
- getVregSrcForVT<data_vt>.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
+ data_vt_RC:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset);
def : GCNPat<
(vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
data_vt:$vdata_in)),
!if(!eq(RtnMode, "ret"),
- (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS Addr64ResDag, getVregSrcForVT<data_vt>.ret)),
+ (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS Addr64ResDag, data_vt_RC)),
!if(!eq(vt, i32), sub0, sub0_sub1)),
Addr64ResDag)
>;
@@ -1820,7 +1821,7 @@ multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, stri
(extract_cpol_set_glc $auxiliary),
(extract_cpol $auxiliary));
defvar SrcRC = getVregSrcForVT<vt>.ret;
- defvar DataRC = getVregSrcForVT<data_vt>.ret;
+ defvar DataRC = getVregSrcForVT<data_vt>.ret.RegClass;
defvar SubLo = !if(!eq(vt, i32), sub0, sub0_sub1);
defvar SubHi = !if(!eq(vt, i32), sub1, sub2_sub3);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index de1abaf29c56b21..c3e87244c0c8d12 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -562,7 +562,48 @@ void AMDGPUMCCodeEmitter::getMachineOpValue(const MCInst &MI,
void AMDGPUMCCodeEmitter::getMachineOpValueT16(
const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
- llvm_unreachable("TODO: Implement getMachineOpValueT16().");
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) {
+ unsigned Enc = MRI.getEncodingValue(MO.getReg());
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ bool IsVGPR = Enc & AMDGPU::HWEncoding::IS_VGPR_OR_AGPR;
+ Op = Idx | (IsVGPR << 8);
+ return;
+ }
+ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
+ // VGPRs include the suffix/op_sel bit in the register encoding, but
+ // immediates and SGPRs include it in src_modifiers. Therefore, copy the
+ // op_sel bit from the src operands into src_modifier operands if Op is
+ // src_modifiers and the corresponding src is a VGPR
+ int SrcMOIdx = -1;
+ assert(OpNo < INT_MAX);
+ if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src0_modifiers)) {
+ SrcMOIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ int VDstMOIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst);
+ if (VDstMOIdx != -1) {
+ auto DstReg = MI.getOperand(VDstMOIdx).getReg();
+ if (AMDGPU::isHi(DstReg, MRI))
+ Op |= SISrcMods::DST_OP_SEL;
+ }
+ } else if ((int)OpNo == AMDGPU::getNamedOperandIdx(
+ MI.getOpcode(), AMDGPU::OpName::src1_modifiers))
+ SrcMOIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
+ else if ((int)OpNo == AMDGPU::getNamedOperandIdx(
+ MI.getOpcode(), AMDGPU::OpName::src2_modifiers))
+ SrcMOIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src2);
+ if (SrcMOIdx == -1)
+ return;
+
+ const MCOperand &SrcMO = MI.getOperand(SrcMOIdx);
+ if (!SrcMO.isReg())
+ return;
+ auto SrcReg = SrcMO.getReg();
+ if (AMDGPU::isSGPR(SrcReg, &MRI))
+ return;
+ if (AMDGPU::isHi(SrcReg, MRI))
+ Op |= SISrcMods::OP_SEL_0;
}
void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128(
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 3aeed6aec3650e2..a6820544f4b4d22 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1223,17 +1223,20 @@ def FPVRegInputModsMatchClass : AsmOperandClass {
let PredicateMethod = "isVRegWithInputMods";
}
-def FPT16VRegInputModsMatchClass : AsmOperandClass {
- let Name = "T16VRegWithFPInputMods";
+class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+ let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
+ "T16VRegWithFPInputMods");
let ParserMethod = "parseRegWithFPInputMods";
- let PredicateMethod = "isT16VRegWithInputMods";
+ let PredicateMethod = "isT16VRegWithInputMods<" #
+ !if(IsFake16, "true", "false") # ">";
}
def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndFPInputMods";
}
-def FPT16VRegInputMods : InputMods <FPT16VRegInputModsMatchClass> {
+class FPT16VRegInputMods<bit IsFake16>
+ : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndFPInputMods";
}
@@ -1265,13 +1268,16 @@ def IntVRegInputModsMatchClass : AsmOperandClass {
let PredicateMethod = "isVRegWithInputMods";
}
-def IntT16VRegInputModsMatchClass : AsmOperandClass {
- let Name = "T16VRegWithIntInputMods";
+class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+ let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
+ "T16VRegWithIntInputMods");
let ParserMethod = "parseRegWithIntInputMods";
- let PredicateMethod = "isT16VRegWithInputMods";
+ let PredicateMethod = "isT16VRegWithInputMods<" #
+ !if(IsFake16, "true", "false") # ">";
}
-def IntT16VRegInputMods : InputMods <IntT16VRegInputModsMatchClass> {
+class IntT16VRegInputMods<bit IsFake16>
+ : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndIntInputMods";
}
@@ -1510,25 +1516,17 @@ class getSOPSrcForVT<ValueType VT> {
}
// Returns the vreg register class to use for source operand given VT
-class getVregSrcForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
- !if(!eq(VT.Size, 96), VReg_96,
- !if(!eq(VT.Size, 64), VReg_64,
- !if(!eq(VT.Size, 48), VReg_64,
- VGPR_32))));
-}
-
-class getVregSrcForVT_t16<ValueType VT, bit IsFake16 = 1> {
- RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
- !if(!eq(VT.Size, 96), VReg_96,
- !if(!eq(VT.Size, 64), VReg_64,
- !if(!eq(VT.Size, 48), VReg_64,
- !if(!eq(VT.Size, 16),
- !if(IsFake16, VGPR_32_Lo128, VGPR_16_Lo128),
- VGPR_32)))));
-
- RegisterOperand op = !if (!and(!eq(VT.Size, 16), !not(IsFake16)),
- VGPRSrc_16_Lo128, RegisterOperand<ret>);
+class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
+ RegisterOperand ret =
+ !if (!eq(VT.Size, 128), RegisterOperand<VReg_128>,
+ !if (!eq(VT.Size, 96), RegisterOperand<VReg_96>,
+ !if (!eq(VT.Size, 64), RegisterOperand<VReg_64>,
+ !if (!eq(VT.Size, 48), RegisterOperand<VReg_64>,
+ !if (!eq(VT.Size, 16),
+ !if (IsTrue16,
+ !if (IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
+ RegisterOperand<VGPR_32>),
+ RegisterOperand<VGPR_32>)))));
}
class getSDWASrcForVT <ValueType VT> {
@@ -1635,13 +1633,13 @@ class getSrcModDPP <ValueType VT> {
Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods);
}
-class getSrcModDPP_t16 <ValueType VT> {
+class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
Operand ret =
!if (VT.isFP,
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
- FPT16VRegInputMods, FPVRegInputMods),
- !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods,
- IntVRegInputMods));
+ FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
+ !if (!eq(VT.Value, i16.Value),
+ IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
}
// Return type of input modifiers operand for specified input operand for DPP
@@ -1784,10 +1782,9 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
}
-class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
-
+class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
dag ret = !if(!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
(ins ),
@@ -1827,8 +1824,8 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass
);
}
-class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
@@ -1836,17 +1833,17 @@ class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
}
-class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
+class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins FI:$fi));
}
-class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
+class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins dpp8:$dpp8, FI:$fi));
@@ -2251,13 +2248,13 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterOperand DstRCVOP3DPP = DstRC64;
field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret;
- field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>;
+ field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
- field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
- field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
- field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
+ field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
+ field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
+ field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
@@ -2443,13 +2440,13 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
- let Src1RC32 = getVregSrcForVT_t16<Src1VT, 0 /*IsFake16*/>.op;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
@@ -2465,10 +2462,10 @@ class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
// Most DstVT are 16-bit, but not all
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
- let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 932337269006509..27eec64f59a6355 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -418,15 +418,11 @@ def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
}
def VOP_MADMK_F32 : VOP_MADMK <f32>;
-class getRegisterOperandForVT<ValueType VT> {
- RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>;
-}
-
// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
// and processing time but it makes it easier to convert to mad.
class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2);
- let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3,
+ let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 3,
0, HasModifiers, HasModifiers, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
@@ -489,21 +485,21 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
let DstRC = VOPDstOperand<VGPR_32_Lo128>;
let DstRC64 = VOPDstOperand<VGPR_32>;
let Src1RC32 = VGPRSrc_32_Lo128;
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT_t16<Src2VT>.ret:$src2);
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2);
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
- getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument
+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
- getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument
+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
dpp8:$dpp8, FI:$fi);
let Src2Mod = FP32InputMods; // dummy unused modifiers
let Src2RC64 = VGPRSrc_32; // stub argument
@@ -535,7 +531,7 @@ def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> {
let Src0Mod = Int32InputMods;
let Src1Mod = Int32InputMods;
- let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret,
+ let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret,
3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/,
1 /*HasSrc2Mods*/, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
@@ -898,8 +894,8 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
}
def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
- let Src1DPP = VGPR_32_Lo128;
- let Src1ModDPP = IntT16VRegInputMods;
+ let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
+ let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>;
}
let isReMaterializable = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index c3aa13a9b3c7d7d..3ca97f0291e01b4 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -61,13 +61,13 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
let AsmDPP16 = AsmDPP#"$fi";
// VOPC DPP Instructions do not need an old operand
let TieRegDPP = "";
- let InsDPP = getInsDPP<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
+ let InsDPP = getInsDPP<VOPDstOperand<Src0DPP.RegClass>, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
Src2ModDPP, 0/*HasOld*/>.ret;
- let InsDPP16 = getInsDPP16<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
+ let InsDPP16 = getInsDPP16<VOPDstOperand<Src0DPP.RegClass>, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
Src2ModDPP, 0/*HasOld*/>.ret;
- let InsDPP8 = getInsDPP8<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
+ let InsDPP8 = getInsDPP8<VOPDstOperand<Src0DPP.RegClass>, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
Src2ModDPP, 0/*HasOld*/>.ret;
@@ -88,10 +88,10 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
def NAME : VOPC_Profile<sched, vt0, vt1>;
def _t16 : VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
- let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
@@ -118,10 +118,10 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
def NAME : VOPC_NoSdst_Profile<sched, vt0, vt1>;
def _t16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
- let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
@@ -789,11 +789,11 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
def NAME : VOPC_Class_Profile<sched, f16>;
def _t16 : VOPC_Class_Profile<sched, f16, i16> {
let IsTrue16 = 1;
- let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC64 = VSrc_b32;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
@@ -816,11 +816,11 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
def NAME : VOPC_Class_NoSdst_Profile<sched, f16>;
def _t16 : VOPC_Class_NoSdst_Profile<sched, f16, i16> {
let IsTrue16 = 1;
- let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC64 = VSrc_b32;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
index fa6df6affeb1e71..d15de77f67c398d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
@@ -43,47 +43,47 @@ v_bfrev_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
v_bfrev_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: encoding: [0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x05,0x30]
-v_ceil_f16 v5.l, v1 quad_perm:[3,2,1,0]
+v_ceil_f16 v5.l, v1.l quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff]
-v_ceil_f16 v5.l, v1 quad_perm:[0,1,2,3]
+v_ceil_f16 v5.l, v1.l quad_perm:[0,1,2,3]
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff]
-v_ceil_f16 v5.l, v1 row_mirror
+v_ceil_f16 v5.l, v1.l row_mirror
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_half_mirror
+v_ceil_f16 v5.l, v1.l row_half_mirror
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_shl:1
+v_ceil_f16 v5.l, v1.l row_shl:1
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_shl:15
+v_ceil_f16 v5.l, v1.l row_shl:15
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_shr:1
+v_ceil_f16 v5.l, v1.l row_shr:1
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_shr:15
+v_ceil_f16 v5.l, v1.l row_shr:15
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_ror:1
+v_ceil_f16 v5.l, v1.l row_ror:1
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_ror:15
+v_ceil_f16 v5.l, v1.l row_ror:15
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff]
-v_ceil_f16 v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf
+v_ceil_f16 v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff]
-v_ceil_f16 v127.l, v127 row_share:15 row_mask:0x0 bank_mask:0x1
+v_ceil_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
-v_ceil_f16 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: encoding: [0xfa,0xb8,0x0a,0x7f,0x01,0x60,0x09,0x13]
+v_ceil_f16 v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0xb8,0x0a,0x7f,0x81,0x60,0x09,0x13]
-v_ceil_f16 v127.h, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: encoding: [0xfa,0xb8,0xfe,0x7f,0x7f,0x6f,0x35,0x30]
+v_ceil_f16 v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xb8,0xfe,0x7f,0xff,0x6f,0x35,0x30]
v_ceil_f32 v5, v1 quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff]
@@ -1513,47 +1513,47 @@ v_ffbl_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
v_ffbl_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x05,0x30]
-v_floor_f16 v5.l, v1 quad_perm:[3,2,1,0]
+v_floor_f16 v5.l, v1.l quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
-v_floor_f16 v5.l, v1 quad_perm:[0,1,2,3]
+v_floor_f16 v5.l, v1.l quad_perm:[0,1,2,3]
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
-v_floor_f16 v5.l, v1 row_mirror
+v_floor_f16 v5.l, v1.l row_mirror
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff]
-v_floor_f16 v5.l, v1 row_half_mirror
+v_floor_f16 v5.l, v1.l row_half_mirror
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff]
-v_floor_f16 v5.l, v1 row_shl:1
+v_floor_f16 v5.l, v1.l row_shl:1
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff]
-v_floor_f16 v5.l, v1 row_shl:15
+v_floor_f16 v5.l, v1.l row_shl:15
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
-v_floor_f16 v5.l, v1 row_shr:1
+v_floor_f16 v5.l, v1.l row_shr:1
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff]
-v_floor_f16 v5.l, v1 row_shr:15
+v_floor_f16 v5.l, v1.l row_shr:15
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
-v_floor_f16 v5.l, v1 row_ror:1
+v_floor_f16 v5.l, v1.l row_ror:1
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff]
-v_floor_f16 v5.l, v1 row_ror:15
+v_floor_f16 v5.l, v1.l row_ror:15
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
-v_floor_f16 v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf
+v_floor_f16 v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff]
-v_floor_f16 v127.l, v127 row_share:15 row_mask:0x0 bank_mask:0x1
+v_floor_f16 v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
-v_floor_f16 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: encoding: [0xfa,0xb6,0x0a,0x7f,0x01,0x60,0x09,0x13]
+v_floor_f16 v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0xb6,0x0a,0x7f,0x81,0x60,0x09,0x13]
-v_floor_f16 v127.h, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: encoding: [0xfa,0xb6,0xfe,0x7f,0x7f,0x6f,0x35,0x30]
+v_floor_f16 v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xb6,0xfe,0x7f,0xff,0x6f,0x35,0x30]
v_floor_f32 v5, v1 quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s
index 4fe3aa121b5907c..bc1b0ff8d00b42e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s
@@ -10,17 +10,17 @@ v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_bfrev_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: encoding: [0xe9,0x70,0xfe,0x7f,0xff,0x00,0x00,0x00]
-v_ceil_f16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_ceil_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05]
-v_ceil_f16 v127.l, v127 dpp8:[7,6,5,4,3,2,1,0]
+v_ceil_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0xb8,0xfe,0x7e,0x7f,0x77,0x39,0x05]
-v_ceil_f16 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: encoding: [0xea,0xb8,0x0a,0x7f,0x01,0x77,0x39,0x05]
+v_ceil_f16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05]
-v_ceil_f16 v127.h, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: encoding: [0xe9,0xb8,0xfe,0x7f,0x7f,0x00,0x00,0x00]
+v_ceil_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00]
v_ceil_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05]
@@ -328,17 +328,17 @@ v_ffbl_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_ffbl_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00]
-v_floor_f16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_floor_f16 v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05]
-v_floor_f16 v127.l, v127 dpp8:[7,6,5,4,3,2,1,0]
+v_floor_f16 v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x77,0x39,0x05]
-v_floor_f16 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: encoding: [0xea,0xb6,0x0a,0x7f,0x01,0x77,0x39,0x05]
+v_floor_f16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05]
-v_floor_f16 v127.h, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: encoding: [0xe9,0xb6,0xfe,0x7f,0x7f,0x00,0x00,0x00]
+v_floor_f16 v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00]
v_floor_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 8758305258387cc..3e1bcf2fd0345f5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
# GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -43,47 +45,61 @@
# GFX11: v_bfrev_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x70,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_ceil_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x1b,0x00,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0xe4,0x00,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x40,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x41,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x01,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x0f,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x11,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x1f,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x21,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x2f,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff]
0xfa,0xb8,0x0a,0x7e,0x01,0x50,0x01,0xff
-# GFX11: v_ceil_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x5f,0x01,0x01]
-0xfa,0xb8,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX11-REAL16: v_ceil_f16_dpp v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
+# GFX11-FAKE16: v_ceil_f16_dpp v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
+0xfa,0xb8,0xfe,0x7e,0x7f,0x5f,0x01,0x01
-# GFX11: v_ceil_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xb8,0x0a,0x7e,0x01,0x60,0x01,0x13]
-0xfa,0xb8,0x0a,0x7e,0x01,0x60,0x01,0x13
+# GFX11-REAL16: v_ceil_f16_dpp v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xb8,0x0a,0x7f,0x81,0x60,0x01,0x13]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xfa,0xb8,0x0a,0x7f,0x81,0x60,0x01,0x13
-# GFX11: v_ceil_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xb8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30]
-0xfa,0xb8,0xfe,0x7e,0x7f,0x6f,0x3d,0x30
+# GFX11-REAL16: v_ceil_f16_dpp v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xb8,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xfa,0xb8,0xfe,0x7f,0xff,0x6f,0x3d,0x30
# GFX11: v_ceil_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x44,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1303,47 +1319,61 @@
# GFX11: v_exp_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x4a,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
0xfa,0x4a,0xfe,0x7f,0xff,0x6f,0x3d,0x30
-# GFX11: v_floor_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x1b,0x00,0xff
-# GFX11: v_floor_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0xe4,0x00,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x40,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x41,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x01,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x0f,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x11,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x1f,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x21,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x2f,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff]
0xfa,0xb6,0x0a,0x7e,0x01,0x50,0x01,0xff
-# GFX11: v_floor_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01]
-0xfa,0xb6,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX11-REAL16: v_floor_f16_dpp v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
+# GFX11-FAKE16: v_floor_f16_dpp v127, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x5f,0x01,0x01]
+0xfa,0xb6,0xfe,0x7e,0x7f,0x5f,0x01,0x01
-# GFX11: v_floor_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x01,0x13]
-0xfa,0xb6,0x0a,0x7e,0x01,0x60,0x01,0x13
+# GFX11-REAL16: v_floor_f16_dpp v5.h, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xb6,0x0a,0x7f,0x81,0x60,0x01,0x13]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xfa,0xb6,0x0a,0x7f,0x81,0x60,0x01,0x13
-# GFX11: v_floor_f16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x3d,0x30]
-0xfa,0xb6,0xfe,0x7e,0x7f,0x6f,0x3d,0x30
+# GFX11-REAL16: v_floor_f16_dpp v127.h, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xb6,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xfa,0xb6,0xfe,0x7f,0xff,0x6f,0x3d,0x30
# GFX11: v_floor_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x48,0x0a,0x7e,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index a3531410ac401f1..aef470fe87fd3dd 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -1,5 +1,6 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
# GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05
@@ -7,11 +8,21 @@
# GFX11: v_bfrev_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x70,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x70,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_ceil_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_ceil_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_ceil_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xb8,0x0a,0x7e,0x01,0x77,0x39,0x05
-# GFX11: v_ceil_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb8,0xfe,0x7e,0x7f,0x00,0x00,0x00]
-0xea,0xb8,0xfe,0x7e,0x7f,0x00,0x00,0x00
+# GFX11-REAL16: v_ceil_f16_dpp v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0xfe,0x7e,0x7f,0x77,0x39,0x05]
+# GFX11-FAKE16: v_ceil_f16_dpp v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0xfe,0x7e,0x7f,0x77,0x39,0x05]
+0xe9,0xb8,0xfe,0x7e,0x7f,0x77,0x39,0x05
+
+# GFX11-REAL16: v_ceil_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xe9,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05
+
+# GFX11-REAL16: v_ceil_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xea,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00
# GFX11: v_ceil_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x44,0x0a,0x7e,0x01,0x77,0x39,0x05
@@ -187,11 +198,21 @@
# GFX11: v_exp_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x4a,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x4a,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_floor_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_floor_f16_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_floor_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xb6,0x0a,0x7e,0x01,0x77,0x39,0x05
-# GFX11: v_floor_f16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb6,0xfe,0x7e,0x7f,0x00,0x00,0x00]
-0xea,0xb6,0xfe,0x7e,0x7f,0x00,0x00,0x00
+# GFX11-REAL16: v_floor_f16_dpp v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x77,0x39,0x05]
+# GFX11-FAKE16: v_floor_f16_dpp v127, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0xfe,0x7e,0x7f,0x77,0x39,0x05]
+0xe9,0xb6,0xfe,0x7e,0x7f,0x77,0x39,0x05
+
+# GFX11-REAL16: v_floor_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xe9,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05
+
+# GFX11-REAL16: v_floor_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00]
+# COM: GFX11-FAKE16: warning: invalid instruction encoding
+0xea,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00
# GFX11: v_floor_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x48,0x0a,0x7e,0x01,0x77,0x39,0x05
More information about the llvm-commits
mailing list