[llvm] [AMDGPU][True16][MC] 16bit vsrc and vdst support in MC (PR #104510)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 17:42:02 PDT 2024
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/104510
>From 87e07e8e7d304b9bac9e63edad6413de1eab2acb Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 21 Aug 2024 10:22:17 -0400
Subject: [PATCH] [AMDGPU][True16][MC]support 16bit operand and vdst in MC
---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 36 ++-
.../Disassembler/AMDGPUDisassembler.cpp | 22 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 209 ++++++++++----
llvm/lib/Target/AMDGPU/SIInstructions.td | 8 +-
llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 2 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 45 ++-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 22 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 12 +-
.../inst-select-amdgcn.fcmp.constants.w32.mir | 12 +-
.../inst-select-amdgcn.fcmp.constants.w64.mir | 12 +-
.../AMDGPU/GlobalISel/inst-select-fptosi.mir | 162 ++++++-----
.../AMDGPU/GlobalISel/inst-select-fptoui.mir | 148 +++++-----
.../AMDGPU/GlobalISel/inst-select-sitofp.mir | 32 ++-
.../AMDGPU/GlobalISel/inst-select-uitofp.mir | 32 ++-
.../AMDGPU/fix-sgpr-copies-f16-fake16.mir | 20 ++
.../AMDGPU/fix-sgpr-copies-f16-true16.mir | 20 ++
.../CodeGen/AMDGPU/fix-sgpr-copies-f16.mir | 17 --
.../CodeGen/AMDGPU/mode-register-fptrunc.mir | 8 +-
...schedule-regpressure-ilp-metric-spills.mir | 256 +++++++++---------
llvm/test/MC/AMDGPU/gfx11_asm_vop1.s | 44 +--
llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s | 64 ++---
llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s | 26 +-
.../MC/AMDGPU/gfx11_asm_vop1_t16_promote.s | 114 ++++++--
llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s | 40 +--
.../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 56 ++--
.../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 16 +-
.../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s | 36 +--
llvm/test/MC/AMDGPU/gfx12_asm_vop1.s | 81 +++---
.../Disassembler/AMDGPU/gfx11_dasm_vop1.txt | 65 +++--
.../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 96 ++++---
.../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 32 ++-
.../gfx11_dasm_vop3_dpp16_from_vop1.txt | 84 ++++--
.../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 24 +-
.../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 51 ++--
35 files changed, 1164 insertions(+), 750 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1a10206eea2374..6181a36b016adf 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -280,8 +280,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
- bool isRegOrImmWithIntT16InputMods() const {
- return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
+ template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
+ return isRegOrImmWithInputMods(
+ IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
}
bool isRegOrImmWithInt32InputMods() const {
@@ -292,6 +293,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
+ return isRegOrInline(
+ IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
+ }
+
bool isRegOrInlineImmWithInt32InputMods() const {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -304,8 +310,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
- bool isRegOrImmWithFPT16InputMods() const {
- return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
+ return isRegOrImmWithInputMods(
+ IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
}
bool isRegOrImmWithFP32InputMods() const {
@@ -354,6 +361,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
}
bool isVRegWithInputMods() const;
+ template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
template <bool IsFake16> bool isT16VRegWithInputMods() const;
bool isSDWAOperand(MVT type) const;
@@ -515,7 +523,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
- bool isVCSrcTB16() const {
+ bool isVCSrcT_b16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
}
@@ -545,7 +553,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
}
- bool isVCSrcTF16() const {
+ bool isVCSrcT_f16() const {
+ return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
+ }
+
+ bool isVCSrcT_bf16() const {
return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
}
@@ -583,7 +595,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
- bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
+ bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
bool isVSrcT_b16_Lo128() const {
return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
@@ -617,7 +629,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
- bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
+ bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
bool isVSrcT_bf16_Lo128() const {
return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
@@ -2162,11 +2174,17 @@ bool AMDGPUOperand::isVRegWithInputMods() const {
AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
}
-template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+template <bool IsFake16>
+bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
: AMDGPU::VGPR_16_Lo128RegClassID);
}
+template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
+ return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
+ : AMDGPU::VGPR_16RegClassID);
+}
+
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a0dc7098347ac..c8b8a7d120792e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -328,36 +328,40 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<9>(Imm) && "9-bit encoding expected");
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- bool IsVGPR = Imm & (1 << 8);
- if (IsVGPR) {
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
bool IsHi = Imm & (1 << 7);
unsigned RegIdx = Imm & 0x7f;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
- return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
- Imm & 0xFF, false, 16));
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+ OpWidth, Imm & 0xFF, false, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics));
}
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<10>(Imm) && "10-bit encoding expected");
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- bool IsVGPR = Imm & (1 << 8);
- if (IsVGPR) {
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
bool IsHi = Imm & (1 << 9);
unsigned RegIdx = Imm & 0xff;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}
- return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
- Imm & 0xFF, false, 16));
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+ OpWidth, Imm & 0xFF, false, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics));
}
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
@@ -628,6 +632,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
convertVOP3DPPInst(MI); // Regular VOP3 case
}
+ convertTrue16OpSel(MI);
+
if (AMDGPU::isMAC(MI.getOpcode())) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6dce41d1605fa4..f9fce1ea899d33 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5424,9 +5424,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
- case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
- case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
- case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
+ case AMDGPU::S_CVT_F32_F16:
+ case AMDGPU::S_CVT_HI_F32_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
+ : AMDGPU::V_CVT_F32_F16_fake16_e64;
+ case AMDGPU::S_CVT_F16_F32:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
+ : AMDGPU::V_CVT_F16_F32_fake16_e64;
case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 85281713e22b1f..f0fc427531dd92 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1205,9 +1205,11 @@ class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
}
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
-def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
- let Name = "RegOrImmWithFPT16InputMods";
- let PredicateMethod = "isRegOrImmWithFPT16InputMods";
+class FPT16InputModsMatchClass<bit IsFake16> : FPInputModsMatchClass<16> {
+ let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods",
+ "RegOrImmWithFPT16InputMods");
+ let PredicateMethod = "isRegOrImmWithFPT16InputMods<" #
+ !if(IsFake16, "true", "false") # ">";
}
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -1232,12 +1234,19 @@ class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
}
def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
-def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
+class FPT16InputMods<bit IsFake16> : FPInputMods<FPT16InputModsMatchClass<IsFake16>> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
+def FP32T16DstInputMods : FPInputMods<FP32InputModsMatchClass> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
-class FP16VCSrcInputMods<bit IsFake16>
- : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
+class FPT16VCSrcInputMods<bit IsFake16 = 1>
+ : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>> {
+let EncoderMethod = "getMachineOpValueT16";
+}
def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
class IntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1249,21 +1258,38 @@ class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize>
let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
}
-def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
- let Name = "RegOrImmWithIntT16InputMods";
- let PredicateMethod = "isRegOrImmWithIntT16InputMods";
+class IntT16InputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
+ let Name = !if(IsFake16, "RegOrImmWithIntFake16InputMods",
+ "RegOrImmWithIntT16InputMods");
+ let PredicateMethod = "isRegOrImmWithIntT16InputMods<" #
+ !if(IsFake16, "true", "false") # ">";
}
def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
+class IntT16VCSrcInputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
+ let Name = !if(IsFake16, "RegOrInlineImmWithIntFake16InputMods",
+ "RegOrInlineImmWithIntT16InputMods");
+ let PredicateMethod = "isRegOrInlineImmWithIntT16InputMods<" #
+ !if(IsFake16, "true", "false") # ">";
+}
class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
-def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
+class IntT16InputMods<bit IsFake16> : IntInputMods<IntT16InputModsMatchClass<IsFake16>> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
+def Int32T16DstInputMods : IntInputMods<Int32InputModsMatchClass> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
+class IntT16VCSrcInputMods<bit IsFake16 = 1>
+ : IntInputMods<IntT16VCSrcInputModsMatchClass<IsFake16>> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
class OpSelModsMatchClass : AsmOperandClass {
let Name = "OpSelMods";
@@ -1297,6 +1323,23 @@ def FPVRegInputModsMatchClass : AsmOperandClass {
let PredicateMethod = "isVRegWithInputMods";
}
+def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndFPInputMods";
+}
+
+def FPVRegT16DstInputMods : InputMods <FPVRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndFPInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
+class FPT16_Lo128VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+ let Name = !if(IsFake16, "Fake16_Lo128VRegWithFPInputMods",
+ "T16_Lo128VRegWithFPInputMods");
+ let ParserMethod = "parseRegWithFPInputMods";
+ let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
+ !if(IsFake16, "true", "false") # ">";
+}
+
class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
"T16VRegWithFPInputMods");
@@ -1305,13 +1348,16 @@ class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
!if(IsFake16, "true", "false") # ">";
}
-def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
+class FPT16_Lo128VRegInputMods<bit IsFake16 = 1>
+ : InputMods <FPT16_Lo128VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndFPInputMods";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
}
-class FPT16VRegInputMods<bit IsFake16>
+class FPT16VRegInputMods<bit IsFake16 = 1>
: InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndFPInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
}
class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1342,7 +1388,15 @@ def IntVRegInputModsMatchClass : AsmOperandClass {
let PredicateMethod = "isVRegWithInputMods";
}
-class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
+class IntT16_Lo128VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
+ let Name = !if(IsFake16, "Fake16_Lo128VRegWithIntInputMods",
+ "T16_Lo128VRegWithIntInputMods");
+ let ParserMethod = "parseRegWithIntInputMods";
+ let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
+ !if(IsFake16, "true", "false") # ">";
+}
+
+class IntT16VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
"T16VRegWithIntInputMods");
let ParserMethod = "parseRegWithIntInputMods";
@@ -1350,15 +1404,27 @@ class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
!if(IsFake16, "true", "false") # ">";
}
-class IntT16VRegInputMods<bit IsFake16>
+class IntT16_Lo128VRegInputMods<bit IsFake16 = 1>
+ : InputMods <IntT16_Lo128VRegInputModsMatchClass<IsFake16>> {
+ let PrintMethod = "printOperandAndIntInputMods";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
+class IntT16VRegInputMods<bit IsFake16 = 1>
: InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
let PrintMethod = "printOperandAndIntInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
}
def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
+def IntVRegT16DstInputMods : InputMods <IntVRegInputModsMatchClass> {
+ let PrintMethod = "printOperandAndIntInputMods";
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "PackedFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
@@ -1585,7 +1651,7 @@ class getSOPSrcForVT<ValueType VT> {
}
// Returns the vreg register class to use for source operand given VT
-class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
+class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
RegisterOperand ret =
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
@@ -1627,12 +1693,12 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
}
// Src2 of VOP3 DPP instructions cannot be a literal
-class getVOP3DPPSrcForVT<ValueType VT> {
+class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
RegisterOperand ret =
!cond(!eq(VT, i1) : SSrc_i1,
- !eq(VT, i16) : VCSrc_b16,
- !eq(VT, f16) : VCSrc_f16,
- !eq(VT, bf16) : VCSrc_bf16,
+ !eq(VT, i16) : !if (IsFake16, VCSrc_b16, VCSrcT_b16),
+ !eq(VT, f16) : !if (IsFake16, VCSrc_f16, VCSrcT_f16),
+ !eq(VT, bf16) : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16),
!eq(VT, v2i16) : VCSrc_v2b16,
!eq(VT, v2f16) : VCSrc_v2f16,
!eq(VT, v2bf16) : VCSrc_v2bf16,
@@ -1666,23 +1732,27 @@ class isModifierType<ValueType SrcVT> {
!eq(SrcVT.Value, v16bf16.Value));
}
-// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
- Operand ret = !if(!eq(VT.Size, 64),
+// Return type of input modifiers operand for specified input operand.
+// True16: If the destination is a 16-bit value, the src0 modifier must hold
+// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
+// 64-bit src types are not implemented for True16 dst.
+class getSrc0Mod <ValueType VT, ValueType DstVT, bit IsTrue16 = 0, bit IsFake16 = 1> {
+ defvar T16Dst = !if(!eq(VT.Size, 64),
+ !if(VT.isFP, FP64InputMods, Int64InputMods),
+ !if(!eq(VT.Size, 16),
+ !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
+ !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
+ !if(VT.isFP, FP32T16DstInputMods, Int32T16DstInputMods)));
+ defvar Normal = !if(!eq(VT.Size, 64),
!if(VT.isFP, FP64InputMods, Int64InputMods),
!if(!eq(VT.Size, 16),
- !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
- !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
+ !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
+ !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
!if(VT.isFP, FP32InputMods, Int32InputMods)));
+ Operand ret = !if(!and(IsTrue16, !eq(DstVT.Size, 16)), T16Dst, Normal);
}
-class getOpSelMod <ValueType VT> {
- Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
- !eq(VT, bf16) : FP16InputMods,
- !eq(VT, v2f16) : PackedF16InputMods,
- !eq(VT, v2bf16) : PackedF16InputMods,
- 1 : IntOpSelMods);
-}
+class getSrcMod<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> : getSrc0Mod<VT, f128/*Dummy Arg*/, IsTrue16, IsFake16>;
// Return type of input modifiers operand specified input operand for DPP
class getSrcModDPP <ValueType VT> {
@@ -1693,18 +1763,42 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
Operand ret =
!if (VT.isFP,
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
- FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
+ FPT16_Lo128VRegInputMods<IsFake16>, FPVRegInputMods),
!if (!eq(VT.Value, i16.Value),
- IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
+ IntT16_Lo128VRegInputMods<IsFake16>, IntVRegInputMods));
}
// Return type of input modifiers operand for specified input operand for DPP
-class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
+// True16: If the destination is a 16-bit value, the src0 modifier must hold
+// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
+// 64-bit src types are not implemented for True16 dst.
+class getSrc0ModVOP3DPP <ValueType VT, ValueType DstVT, bit IsFake16 = 1> {
+ defvar T16Dst =
+ !if (VT.isFP,
+ !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
+ FPT16VRegInputMods<IsFake16>, FPVRegT16DstInputMods),
+ !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods<IsFake16>,
+ IntVRegT16DstInputMods));
+ defvar Normal =
+ !if (VT.isFP,
+ !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
+ FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
+ !if (!eq(VT.Value, i16.Value),
+ IntT16VRegInputMods<IsFake16>,
+ IntVRegInputMods));
+ Operand ret = !if(!and(!not(IsFake16), !eq(DstVT.Size, 16)), T16Dst, Normal);
+}
+
+// GFX11 only supports VGPR src1, but the restriction is done in AsmParser
+// and GCNDPPCombine.
+class getSrcModVOP3DPP<ValueType VT, bit IsFake16 = 1> {
Operand ret =
!if (VT.isFP,
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
- FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
- Int32VCSrcInputMods);
+ FPT16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
+ !if (!eq(VT.Value, i16.Value),
+ IntT16VCSrcInputMods<IsFake16>,
+ Int32VCSrcInputMods));
}
// Return type of input modifiers operand specified input operand for SDWA
@@ -1745,9 +1839,11 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
/* else */,
// VOP1 without modifiers
- !if (HasClamp,
- (ins Src0RC:$src0, Clamp0:$clamp),
- (ins Src0RC:$src0))
+ !if(HasOMod,
+ (ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod),
+ !if (HasClamp,
+ (ins Src0RC:$src0, Clamp0:$clamp),
+ (ins Src0RC:$src0)))
/* endif */ ),
!if (!eq(NumSrcArgs, 2),
!if (HasModifiers,
@@ -2320,13 +2416,13 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
- field Operand Src0Mod = getSrcMod<Src0VT>.ret;
+ field Operand Src0Mod = getSrc0Mod<Src0VT, DstVT>.ret;
field Operand Src1Mod = getSrcMod<Src1VT>.ret;
field Operand Src2Mod = getSrcMod<Src2VT>.ret;
field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
- field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
+ field Operand Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT>.ret;
field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
@@ -2417,9 +2513,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOMod,
- getOpSelMod<Src0VT>.ret,
- getOpSelMod<Src1VT>.ret,
- getOpSelMod<Src2VT>.ret>.ret;
+ Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsDPP = !if(HasExtDPP,
getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
@@ -2507,7 +2601,6 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
// Most DstVT are 16-bit, but not all.
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
- let DstRC64 = getVALUDstForVT<DstVT>.ret;
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
@@ -2516,8 +2609,10 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
- let Src0VOP3DPP = VGPRSrc_16;
- let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
+ let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32);
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
@@ -2525,9 +2620,9 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
- let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret;
- let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret;
- let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret;
}
class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
@@ -2539,9 +2634,17 @@ class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1 /*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1 /*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index e7831d00a3a4a8..d8b4944a1cda98 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1165,8 +1165,8 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
let SubtargetPredicate = NotHasTrue16BitInsts in
defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
-let SubtargetPredicate = HasTrue16BitInsts in
-defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64>;
+let SubtargetPredicate = UseFakeTrue16Insts in
+defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
//===----------------------------------------------------------------------===//
// VOP2 Patterns
@@ -2779,7 +2779,7 @@ def : GCNPat <
let SubtargetPredicate = HasTrue16BitInsts in
def : GCNPat <
(f16 (sint_to_fp i1:$src)),
- (V_CVT_F16_F32_t16_e32 (
+ (V_CVT_F16_F32_fake16_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
SSrc_i1:$src))
@@ -2796,7 +2796,7 @@ def : GCNPat <
let SubtargetPredicate = HasTrue16BitInsts in
def : GCNPat <
(f16 (uint_to_fp i1:$src)),
- (V_CVT_F16_F32_t16_e32 (
+ (V_CVT_F16_F32_fake16_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
SSrc_i1:$src))
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index a590c6560942cf..6eb0dc97329fd8 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -180,7 +180,7 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
if (TII->getSubtarget().hasTrue16BitInsts()) {
MachineBasicBlock &MBB = *MI.getParent();
MachineInstrBuilder B(*MBB.getParent(), MI);
- MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_fake16_e64));
MachineOperand Src0 = MI.getOperand(1);
MI.removeOperand(1);
B.addImm(0); // src0_modifiers
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index d3e39464fea396..443797ef0ff4bf 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1131,10 +1131,17 @@ def OperandSemantics {
class SrcRegOrImm9<RegisterClass regClass, string opWidth, string operandType,
int immWidth, int OperandSemantics>
: RegOrImmOperand<regClass, operandType> {
- let DecoderMethod = "decodeSrcRegOrImm9<AMDGPUDisassembler::" # opWidth #
+ string DecoderMethodName = "decodeSrcRegOrImm9";
+ let DecoderMethod = DecoderMethodName # "<AMDGPUDisassembler::" # opWidth #
", " # immWidth # ", " # OperandSemantics # ">";
}
+class SrcRegOrImm9_t16<string operandType, int OperandSemantics, RegisterClass regClass = VS_16>
+ : SrcRegOrImm9<regClass, "OPW16", operandType, 16, OperandSemantics> {
+ let DecoderMethodName = "decodeOperand_VSrcT16";
+ let EncoderMethod = "getMachineOpValueT16";
+}
+
def SSrc_b16 : SrcRegOrImm9 <SReg_32, "OPW32", "OPERAND_REG_IMM_INT16", 16, OperandSemantics.INT>;
def SSrc_bf16: SrcRegOrImm9 <SReg_32, "OPW32", "OPERAND_REG_IMM_BF16", 16, OperandSemantics.BF16>;
def SSrc_f16 : SrcRegOrImm9 <SReg_32, "OPW32", "OPERAND_REG_IMM_FP16", 16, OperandSemantics.FP16>;
@@ -1175,32 +1182,16 @@ def VSrc_bf16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_IMM_BF16", 16, Operan
def VSrc_f16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_IMM_FP16", 16, OperandSemantics.FP16>;
// True16 VOP3 operands.
-def VSrcT_b16 : RegOrImmOperand <VS_16, "OPERAND_REG_IMM_INT16"> {
- let EncoderMethod = "getMachineOpValueT16";
- let DecoderMethod = "decodeOperand_VSrcT16";
-}
-def VSrcT_bf16 : RegOrImmOperand <VS_16, "OPERAND_REG_IMM_BF16"> {
- let EncoderMethod = "getMachineOpValueT16";
- let DecoderMethod = "decodeOperand_VSrcT16";
-}
-def VSrcT_f16 : RegOrImmOperand <VS_16, "OPERAND_REG_IMM_FP16"> {
- let EncoderMethod = "getMachineOpValueT16";
- let DecoderMethod = "decodeOperand_VSrcT16";
-}
+def VSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_INT16", OperandSemantics.INT>;
+def VSrcT_bf16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_BF16", OperandSemantics.BF16>;
+def VSrcT_f16 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_FP16", OperandSemantics.FP16>;
// True16 VOP1/2/C operands.
-def VSrcT_b16_Lo128 : RegOrImmOperand <VS_16_Lo128, "OPERAND_REG_IMM_INT16"> {
- let EncoderMethod = "getMachineOpValueT16Lo128";
- let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
-}
-def VSrcT_bf16_Lo128 : RegOrImmOperand <VS_16_Lo128, "OPERAND_REG_IMM_BF16"> {
- let EncoderMethod = "getMachineOpValueT16Lo128";
- let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
-}
-def VSrcT_f16_Lo128 : RegOrImmOperand <VS_16_Lo128, "OPERAND_REG_IMM_FP16"> {
- let EncoderMethod = "getMachineOpValueT16Lo128";
- let DecoderMethod = "decodeOperand_VSrcT16_Lo128";
-}
+let DecoderMethodName = "decodeOperand_VSrcT16_Lo128", EncoderMethod = "getMachineOpValueT16Lo128" in {
+ def VSrcT_b16_Lo128 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_INT16", OperandSemantics.INT, VS_16_Lo128>;
+ def VSrcT_bf16_Lo128 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_BF16", OperandSemantics.BF16, VS_16_Lo128>;
+ def VSrcT_f16_Lo128 : SrcRegOrImm9_t16 <"OPERAND_REG_IMM_FP16",OperandSemantics.FP16, VS_16_Lo128>;
+} // End DecoderMethodName = "decodeOperand_VSrcT16_Lo128", EncoderMethod = "getMachineOpValueT16Lo128"
// The current and temporary future default used case for fake VOP1/2/C.
// For VOP1,2,C True16 instructions. _Lo128 use first 128 32-bit VGPRs only.
@@ -1303,6 +1294,10 @@ def VCSrc_v2b16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_INLINE_C_V2INT16",
def VCSrc_v2bf16: SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_INLINE_C_V2BF16", 16, OperandSemantics.BF16>;
def VCSrc_v2f16 : SrcRegOrImm9 <VS_32, "OPW32", "OPERAND_REG_INLINE_C_V2FP16", 16, OperandSemantics.FP16>;
+// True 16 Operands
+def VCSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_INT16", OperandSemantics.INT>;
+def VCSrcT_bf16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_BF16", OperandSemantics.BF16>;
+def VCSrcT_f16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_FP16", OperandSemantics.FP16>;
//===----------------------------------------------------------------------===//
// VISrc_* Operands with a VGPR or an inline constant
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 03e4cb9fcf49b7..2c60297f452495 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -294,16 +294,22 @@ defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>;
let FPDPRounding = 1, isReMaterializable = 0 in {
+ // V_CVT_F16_F32 and V_CVT_F32_F16 are special cases because they are
+ // present in targets without Has16BitInsts. Otherwise they can use
+ // class VOP1Inst_t16
let OtherPredicates = [NotHasTrue16BitInsts] in
- defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
- let OtherPredicates = [HasTrue16BitInsts] in
- defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+ defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
+ let OtherPredicates = [UseRealTrue16Insts] in
+ defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
+ let OtherPredicates = [UseFakeTrue16Insts] in
+ defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
} // End FPDPRounding = 1, isReMaterializable = 0
-
let OtherPredicates = [NotHasTrue16BitInsts] in
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
-let OtherPredicates = [HasTrue16BitInsts] in
-defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
+let OtherPredicates = [UseRealTrue16Insts] in
+ defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
+let OtherPredicates = [UseFakeTrue16Insts] in
+ defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -1004,7 +1010,9 @@ defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
+defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
+defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index fccaa27f361381..7a0885b5ee7d48 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -923,14 +923,14 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
let Src1ModSDWA = Int16SDWAInputMods;
}
def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
- let Src1RC32 = RegisterOperand<VGPR_16_Lo128>;
- let Src1DPP = RegisterOperand<VGPR_16_Lo128>;
- let Src1ModDPP = IntT16VRegInputMods<0/*IsFake16*/>;
+ let Src1Mod = IntT16InputMods<0/*IsFake16*/>;
+ let Src1ModDPP = IntT16_Lo128VRegInputMods<0/*IsFake16*/>;
+ let Src1ModVOP3DPP = IntT16VCSrcInputMods<0/*IsFake16*/>;
}
def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
- let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
- let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
- let Src1ModDPP = IntT16VRegInputMods<1/*IsFake16*/>;
+ let Src1Mod = Int32InputMods;
+ let Src1ModDPP = IntT16_Lo128VRegInputMods<1/*IsFake16*/>;
+ let Src1ModVOP3DPP = IntT16VCSrcInputMods<1/*IsFake16*/>;
}
let isReMaterializable = 1 in {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 9a1e1ed9208c2f..17cdab46c3b936 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -15,9 +15,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -41,9 +41,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 8ea51f5541df41..158076a3b74a2a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -15,9 +15,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -41,9 +41,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index 7730dde20395df..32a73bc4e24a5a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -17,20 +17,22 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %1
+ ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
+ ;
; VI-LABEL: name: fptosi_s32_to_s32_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %1
+ ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
+ ;
; GFX11-LABEL: name: fptosi_s32_to_s32_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %1
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FPTOSI %0
$vgpr0 = COPY %1
@@ -50,20 +52,22 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %1
+ ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
+ ;
; VI-LABEL: name: fptosi_s32_to_s32_vs
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %1
+ ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
+ ;
; GFX11-LABEL: name: fptosi_s32_to_s32_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %1
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_FPTOSI %0
$vgpr0 = COPY %1
@@ -83,20 +87,22 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
+ ; GCN-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
+ ;
; VI-LABEL: name: fptosi_s32_to_s32_fneg_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
+ ;
; GFX11-LABEL: name: fptosi_s32_to_s32_fneg_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %2
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FNEG %0
%2:vgpr(s32) = G_FPTOSI %1
@@ -117,23 +123,25 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
+ ;
; VI-LABEL: name: fptosi_s16_to_s32_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptosi_s16_to_s32_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
@@ -154,23 +162,25 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
+ ;
; VI-LABEL: name: fptosi_s16_to_s32_vs
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptosi_s16_to_s32_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %3, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
@@ -193,27 +203,29 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %3
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
+ ;
; VI-LABEL: name: fptosi_s16_to_s32_fneg_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %3
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptosi_s16_to_s32_fneg_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %3
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
@@ -235,23 +247,25 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
+ ;
; VI-LABEL: name: fptosi_s16_to_s1_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI-NEXT: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptosi_s16_to_s1_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
@@ -273,23 +287,25 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
+ ;
; VI-LABEL: name: fptosi_s16_to_s1_vs
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI-NEXT: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptosi_s16_to_s1_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOSI %1
@@ -313,27 +329,29 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit %3
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
+ ;
; VI-LABEL: name: fptosi_s16_to_s1_fneg_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
- ; VI-NEXT: S_ENDPGM 0, implicit %3
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptosi_s16_to_s1_fneg_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit %3
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index 09a886255048c2..47a091804ce0a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -19,40 +19,40 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
- ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; GCN-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; GCN-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ;
; VI-LABEL: name: fptoui
; VI: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
- ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; VI-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; VI-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ;
; GFX11-LABEL: name: fptoui
; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(p1) = COPY $vgpr3_vgpr4
- ; fptoui s
%3:vgpr(s32) = G_FPTOUI %0
- ; fptoui v
%4:vgpr(s32) = G_FPTOUI %1
G_STORE %3, %2 :: (store (s32), addrspace 1)
@@ -73,23 +73,25 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
+ ;
; VI-LABEL: name: fptoui_s16_to_s32_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptoui_s16_to_s32_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
@@ -110,23 +112,25 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
+ ;
; VI-LABEL: name: fptoui_s16_to_s32_vs
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptoui_s16_to_s32_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
@@ -149,27 +153,29 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY %3
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
+ ;
; VI-LABEL: name: fptoui_s16_to_s32_fneg_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI-NEXT: $vgpr0 = COPY %3
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptoui_s16_to_s32_fneg_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %3
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
@@ -191,23 +197,25 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
+ ;
; VI-LABEL: name: fptoui_s16_to_s1_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI-NEXT: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptoui_s16_to_s1_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
@@ -229,23 +237,25 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit %2
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
+ ;
; VI-LABEL: name: fptoui_s16_to_s1_vs
; VI: liveins: $sgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; VI-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; VI-NEXT: S_ENDPGM 0, implicit %2
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptoui_s16_to_s1_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_FPTOUI %1
@@ -269,27 +279,29 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GCN-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit %3
+ ; GCN-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
+ ;
; VI-LABEL: name: fptoui_s16_to_s1_fneg_vv
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; VI-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; VI-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
- ; VI-NEXT: S_ENDPGM 0, implicit %3
+ ; VI-NEXT: [[V_CVT_F32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; VI-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e64_]], implicit $mode, implicit $exec
+ ; VI-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
+ ;
; GFX11-LABEL: name: fptoui_s16_to_s1_fneg_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_t16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit %3
+ ; GFX11-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_FNEG %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
index 043d93aa814ef8..938bb58bafc936 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
@@ -23,6 +23,7 @@ body: |
; WAVE64-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ;
; WAVE32-LABEL: name: sitofp
; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4
; WAVE32-NEXT: {{ $}}
@@ -33,6 +34,7 @@ body: |
; WAVE32-NEXT: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ;
; GFX11-LABEL: name: sitofp
; GFX11: liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4
; GFX11-NEXT: {{ $}}
@@ -49,10 +51,8 @@ body: |
%2:vgpr(p1) = COPY $vgpr3_vgpr4
- ; sitofp s
%3:vgpr(s32) = G_SITOFP %0
- ; sitofp v
%4:vgpr(s32) = G_SITOFP %1
G_STORE %3, %2 :: (store (s32), addrspace 1)
@@ -74,22 +74,24 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE64-NEXT: $vgpr0 = COPY %1
+ ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; WAVE32-LABEL: name: sitofp_s32_to_s16_vv
; WAVE32: liveins: $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; GFX11-LABEL: name: sitofp_s32_to_s16_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %1
+ ; GFX11-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_SITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
@@ -111,22 +113,24 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE64-NEXT: $vgpr0 = COPY %1
+ ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; WAVE32-LABEL: name: sitofp_s32_to_s16_vs
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; GFX11-LABEL: name: sitofp_s32_to_s16_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %1
+ ; GFX11-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s16) = G_SITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
index ac0f15033c46b6..9c6fded0d14253 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
@@ -19,12 +19,14 @@ body: |
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
+ ;
; WAVE32-LABEL: name: uitofp_s32_to_s32_vv
; WAVE32: liveins: $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
+ ;
; GFX11-LABEL: name: uitofp_s32_to_s32_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
@@ -52,12 +54,14 @@ body: |
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
+ ;
; WAVE32-LABEL: name: uitofp_s32_to_s32_vs
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]]
+ ;
; GFX11-LABEL: name: uitofp_s32_to_s32_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
@@ -84,22 +88,24 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE64-NEXT: $vgpr0 = COPY %1
+ ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; WAVE32-LABEL: name: uitofp_s32_to_s16_vv
; WAVE32: liveins: $vgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; GFX11-LABEL: name: uitofp_s32_to_s16_vv
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %1
+ ; GFX11-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_UITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
@@ -121,22 +127,24 @@ body: |
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE64-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE64-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE64-NEXT: $vgpr0 = COPY %1
+ ; WAVE64-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE64-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; WAVE32-LABEL: name: uitofp_s32_to_s16_vs
; WAVE32: liveins: $sgpr0
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; WAVE32-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; WAVE32-NEXT: $vgpr0 = COPY %1
+ ; WAVE32-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; WAVE32-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_e64_]]
+ ;
; GFX11-LABEL: name: uitofp_s32_to_s16_vs
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-NEXT: %1:vgpr_32 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: $vgpr0 = COPY %1
+ ; GFX11-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s16) = G_UITOFP %0
%2:vgpr(s32) = G_ANYEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
new file mode 100644
index 00000000000000..0bea1ccfe3ecae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -0,0 +1,20 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+# XFAIL: *
+# FIXME-TRUE16. reenable after CVT_F16_U16_fake16 is supported in MC
+
+---
+name: cvt_hi_f32_f16
+body: |
+ bb.0:
+ ; GCN-LABEL: name: cvt_hi_f32_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_e64_]], implicit $exec
+ ; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %2:sreg_32 = COPY %1:vgpr_32
+ %3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
new file mode 100644
index 00000000000000..059f3fc3d9ff52
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
@@ -0,0 +1,20 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+# XFAIL: *
+# FIXME-TRUE16. reenable after CVT_F16_U16_t16 is supported in MC
+
+---
+name: cvt_hi_f32_f16
+body: |
+ bb.0:
+ ; GCN-LABEL: name: cvt_hi_f32_f16
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_e64_]], implicit $exec
+ ; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ %0:vgpr_16 = IMPLICIT_DEF
+ %1:vgpr_16 = V_CVT_F16_U16_t16_e64 %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
+ %2:sreg_32 = COPY %1:vgpr_16
+ %3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
index 9ae5f559e860af..9a727a321d7869 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
@@ -22,23 +22,6 @@ body: |
%5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
...
-# Needs extra shift instruction to select hi 16 bits
----
-name: cvt_hi_f32_f16
-body: |
- bb.0:
- ; GCN-LABEL: name: cvt_hi_f32_f16
- ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_CVT_F16_U16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_e64_]], implicit $exec
- ; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
- %0:vgpr_32 = IMPLICIT_DEF
- %1:vgpr_32 = V_CVT_F16_U16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
- %2:sreg_32 = COPY %1:vgpr_32
- %3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
-...
-
---
name: fmac_f16
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
index 3706c535224438..67eb719fd2c0d9 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
@@ -20,7 +20,7 @@ body: |
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 0, implicit $mode, implicit $exec
@@ -45,7 +45,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 1, implicit $mode, implicit $exec
@@ -70,7 +70,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_fake16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr1, 2, implicit $mode, implicit $exec
@@ -95,7 +95,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode
- ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_fake16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_ROUND_F16_F32_PSEUDO $vgpr0, 3, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
index bc37c994d226b3..14bb4310c619ea 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
@@ -409,14 +409,14 @@ body: |
%264:vgpr_32 = V_LSHL_OR_B32_e64 %254, 8, %263, implicit $exec
%265:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%266:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %267:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %265.sub0, 0, 0, implicit $mode, implicit $exec
- %268:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %265.sub1, 0, 0, implicit $mode, implicit $exec
- %269:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %265.sub2, 0, 0, implicit $mode, implicit $exec
- %270:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %265.sub3, 0, 0, implicit $mode, implicit $exec
- %271:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %266.sub0, 0, 0, implicit $mode, implicit $exec
- %272:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %266.sub1, 0, 0, implicit $mode, implicit $exec
- %273:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %266.sub2, 0, 0, implicit $mode, implicit $exec
- %274:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %266.sub3, 0, 0, implicit $mode, implicit $exec
+ %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, implicit $mode, implicit $exec
+ %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, implicit $mode, implicit $exec
+ %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, implicit $mode, implicit $exec
+ %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, implicit $mode, implicit $exec
+ %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, implicit $mode, implicit $exec
+ %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, implicit $mode, implicit $exec
+ %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, implicit $mode, implicit $exec
+ %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, implicit $mode, implicit $exec
undef %275.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %273, 0, %274, 0, 0, implicit $mode, implicit $exec
%275.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %271, 0, %272, 0, 0, implicit $mode, implicit $exec
%275.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %269, 0, %270, 0, 0, implicit $mode, implicit $exec
@@ -431,14 +431,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %212.sub6, %212.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%277:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%278:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %279:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %277.sub0, 0, 0, implicit $mode, implicit $exec
- %280:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %277.sub1, 0, 0, implicit $mode, implicit $exec
- %281:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %277.sub2, 0, 0, implicit $mode, implicit $exec
- %282:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %277.sub3, 0, 0, implicit $mode, implicit $exec
- %283:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %278.sub0, 0, 0, implicit $mode, implicit $exec
- %284:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %278.sub1, 0, 0, implicit $mode, implicit $exec
- %285:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %278.sub2, 0, 0, implicit $mode, implicit $exec
- %286:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %278.sub3, 0, 0, implicit $mode, implicit $exec
+ %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, implicit $mode, implicit $exec
+ %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, implicit $mode, implicit $exec
+ %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, implicit $mode, implicit $exec
+ %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, implicit $mode, implicit $exec
+ %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, implicit $mode, implicit $exec
+ %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, implicit $mode, implicit $exec
+ %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, implicit $mode, implicit $exec
+ %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, implicit $mode, implicit $exec
undef %287.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %285, 0, %286, 0, 0, implicit $mode, implicit $exec
%287.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %283, 0, %284, 0, 0, implicit $mode, implicit $exec
%287.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %281, 0, %282, 0, 0, implicit $mode, implicit $exec
@@ -450,14 +450,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %246.sub6, %246.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%288:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%289:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %290:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %288.sub0, 0, 0, implicit $mode, implicit $exec
- %291:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %288.sub1, 0, 0, implicit $mode, implicit $exec
- %292:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %288.sub2, 0, 0, implicit $mode, implicit $exec
- %293:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %288.sub3, 0, 0, implicit $mode, implicit $exec
- %294:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %289.sub0, 0, 0, implicit $mode, implicit $exec
- %295:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %289.sub1, 0, 0, implicit $mode, implicit $exec
- %296:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %289.sub2, 0, 0, implicit $mode, implicit $exec
- %297:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %289.sub3, 0, 0, implicit $mode, implicit $exec
+ %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, implicit $mode, implicit $exec
+ %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, implicit $mode, implicit $exec
+ %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, implicit $mode, implicit $exec
+ %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, implicit $mode, implicit $exec
+ %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, implicit $mode, implicit $exec
+ %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, implicit $mode, implicit $exec
+ %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, implicit $mode, implicit $exec
+ %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, implicit $mode, implicit $exec
undef %298.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %296, 0, %297, 0, 0, implicit $mode, implicit $exec
%298.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %294, 0, %295, 0, 0, implicit $mode, implicit $exec
%298.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %292, 0, %293, 0, 0, implicit $mode, implicit $exec
@@ -470,14 +470,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %250.sub6, %250.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%300:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%301:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %302:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %300.sub0, 0, 0, implicit $mode, implicit $exec
- %303:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %300.sub1, 0, 0, implicit $mode, implicit $exec
- %304:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %300.sub2, 0, 0, implicit $mode, implicit $exec
- %305:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %300.sub3, 0, 0, implicit $mode, implicit $exec
- %306:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %301.sub0, 0, 0, implicit $mode, implicit $exec
- %307:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %301.sub1, 0, 0, implicit $mode, implicit $exec
- %308:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %301.sub2, 0, 0, implicit $mode, implicit $exec
- %309:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %301.sub3, 0, 0, implicit $mode, implicit $exec
+ %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, implicit $mode, implicit $exec
+ %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, implicit $mode, implicit $exec
+ %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, implicit $mode, implicit $exec
+ %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, implicit $mode, implicit $exec
+ %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, implicit $mode, implicit $exec
+ %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, implicit $mode, implicit $exec
+ %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, implicit $mode, implicit $exec
+ %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, implicit $mode, implicit $exec
undef %310.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %308, 0, %309, 0, 0, implicit $mode, implicit $exec
%310.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %306, 0, %307, 0, 0, implicit $mode, implicit $exec
%310.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %304, 0, %305, 0, 0, implicit $mode, implicit $exec
@@ -491,14 +491,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %253.sub6, %253.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%313:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%314:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %315:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %313.sub0, 0, 0, implicit $mode, implicit $exec
- %316:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %313.sub1, 0, 0, implicit $mode, implicit $exec
- %317:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %313.sub2, 0, 0, implicit $mode, implicit $exec
- %318:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %313.sub3, 0, 0, implicit $mode, implicit $exec
- %319:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %314.sub0, 0, 0, implicit $mode, implicit $exec
- %320:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %314.sub1, 0, 0, implicit $mode, implicit $exec
- %321:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %314.sub2, 0, 0, implicit $mode, implicit $exec
- %322:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %314.sub3, 0, 0, implicit $mode, implicit $exec
+ %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, implicit $mode, implicit $exec
+ %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, implicit $mode, implicit $exec
+ %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, implicit $mode, implicit $exec
+ %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, implicit $mode, implicit $exec
+ %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, implicit $mode, implicit $exec
+ %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, implicit $mode, implicit $exec
+ %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, implicit $mode, implicit $exec
+ %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, implicit $mode, implicit $exec
undef %323.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %321, 0, %322, 0, 0, implicit $mode, implicit $exec
%323.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %319, 0, %320, 0, 0, implicit $mode, implicit $exec
%323.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %317, 0, %318, 0, 0, implicit $mode, implicit $exec
@@ -511,14 +511,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %214.sub6, %214.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%325:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%326:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %327:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %325.sub0, 0, 0, implicit $mode, implicit $exec
- %328:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %325.sub1, 0, 0, implicit $mode, implicit $exec
- %329:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %325.sub2, 0, 0, implicit $mode, implicit $exec
- %330:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %325.sub3, 0, 0, implicit $mode, implicit $exec
- %331:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %326.sub0, 0, 0, implicit $mode, implicit $exec
- %332:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %326.sub1, 0, 0, implicit $mode, implicit $exec
- %333:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %326.sub2, 0, 0, implicit $mode, implicit $exec
- %334:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %326.sub3, 0, 0, implicit $mode, implicit $exec
+ %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, implicit $mode, implicit $exec
+ %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, implicit $mode, implicit $exec
+ %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, implicit $mode, implicit $exec
+ %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, implicit $mode, implicit $exec
+ %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, implicit $mode, implicit $exec
+ %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, implicit $mode, implicit $exec
+ %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, implicit $mode, implicit $exec
+ %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, implicit $mode, implicit $exec
undef %335.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %333, 0, %334, 0, 0, implicit $mode, implicit $exec
%335.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %331, 0, %332, 0, 0, implicit $mode, implicit $exec
%335.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %329, 0, %330, 0, 0, implicit $mode, implicit $exec
@@ -531,14 +531,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %247.sub6, %247.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%337:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%338:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %339:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %337.sub0, 0, 0, implicit $mode, implicit $exec
- %340:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %337.sub1, 0, 0, implicit $mode, implicit $exec
- %341:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %337.sub2, 0, 0, implicit $mode, implicit $exec
- %342:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %337.sub3, 0, 0, implicit $mode, implicit $exec
- %343:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %338.sub0, 0, 0, implicit $mode, implicit $exec
- %344:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %338.sub1, 0, 0, implicit $mode, implicit $exec
- %345:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %338.sub2, 0, 0, implicit $mode, implicit $exec
- %346:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %338.sub3, 0, 0, implicit $mode, implicit $exec
+ %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, implicit $mode, implicit $exec
+ %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, implicit $mode, implicit $exec
+ %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, implicit $mode, implicit $exec
+ %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, implicit $mode, implicit $exec
+ %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, implicit $mode, implicit $exec
+ %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, implicit $mode, implicit $exec
+ %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, implicit $mode, implicit $exec
+ %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, implicit $mode, implicit $exec
undef %347.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %345, 0, %346, 0, 0, implicit $mode, implicit $exec
%347.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %343, 0, %344, 0, 0, implicit $mode, implicit $exec
%347.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %341, 0, %342, 0, 0, implicit $mode, implicit $exec
@@ -552,14 +552,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %213.sub6, %213.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%350:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%351:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %352:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %350.sub0, 0, 0, implicit $mode, implicit $exec
- %353:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %350.sub1, 0, 0, implicit $mode, implicit $exec
- %354:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %350.sub2, 0, 0, implicit $mode, implicit $exec
- %355:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %350.sub3, 0, 0, implicit $mode, implicit $exec
- %356:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %351.sub0, 0, 0, implicit $mode, implicit $exec
- %357:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %351.sub1, 0, 0, implicit $mode, implicit $exec
- %358:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %351.sub2, 0, 0, implicit $mode, implicit $exec
- %359:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %351.sub3, 0, 0, implicit $mode, implicit $exec
+ %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, implicit $mode, implicit $exec
+ %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, implicit $mode, implicit $exec
+ %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, implicit $mode, implicit $exec
+ %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, implicit $mode, implicit $exec
+ %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, implicit $mode, implicit $exec
+ %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, implicit $mode, implicit $exec
+ %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, implicit $mode, implicit $exec
+ %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, implicit $mode, implicit $exec
undef %360.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %358, 0, %359, 0, 0, implicit $mode, implicit $exec
%360.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %356, 0, %357, 0, 0, implicit $mode, implicit $exec
%360.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %354, 0, %355, 0, 0, implicit $mode, implicit $exec
@@ -573,14 +573,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %216.sub6, %216.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%363:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%364:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %365:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %363.sub0, 0, 0, implicit $mode, implicit $exec
- %366:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %363.sub1, 0, 0, implicit $mode, implicit $exec
- %367:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %363.sub2, 0, 0, implicit $mode, implicit $exec
- %368:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %363.sub3, 0, 0, implicit $mode, implicit $exec
- %369:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %364.sub0, 0, 0, implicit $mode, implicit $exec
- %370:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %364.sub1, 0, 0, implicit $mode, implicit $exec
- %371:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %364.sub2, 0, 0, implicit $mode, implicit $exec
- %372:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %364.sub3, 0, 0, implicit $mode, implicit $exec
+ %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, implicit $mode, implicit $exec
+ %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, implicit $mode, implicit $exec
+ %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, implicit $mode, implicit $exec
+ %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, implicit $mode, implicit $exec
+ %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, implicit $mode, implicit $exec
+ %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, implicit $mode, implicit $exec
+ %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, implicit $mode, implicit $exec
+ %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, implicit $mode, implicit $exec
undef %373.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %371, 0, %372, 0, 0, implicit $mode, implicit $exec
%373.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %369, 0, %370, 0, 0, implicit $mode, implicit $exec
%373.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %367, 0, %368, 0, 0, implicit $mode, implicit $exec
@@ -593,14 +593,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %248.sub6, %248.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%375:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%376:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %377:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %375.sub0, 0, 0, implicit $mode, implicit $exec
- %378:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %375.sub1, 0, 0, implicit $mode, implicit $exec
- %379:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %375.sub2, 0, 0, implicit $mode, implicit $exec
- %380:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %375.sub3, 0, 0, implicit $mode, implicit $exec
- %381:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %376.sub0, 0, 0, implicit $mode, implicit $exec
- %382:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %376.sub1, 0, 0, implicit $mode, implicit $exec
- %383:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %376.sub2, 0, 0, implicit $mode, implicit $exec
- %384:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %376.sub3, 0, 0, implicit $mode, implicit $exec
+ %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, implicit $mode, implicit $exec
+ %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, implicit $mode, implicit $exec
+ %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, implicit $mode, implicit $exec
+ %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, implicit $mode, implicit $exec
+ %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, implicit $mode, implicit $exec
+ %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, implicit $mode, implicit $exec
+ %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, implicit $mode, implicit $exec
+ %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, implicit $mode, implicit $exec
undef %385.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %383, 0, %384, 0, 0, implicit $mode, implicit $exec
%385.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %381, 0, %382, 0, 0, implicit $mode, implicit $exec
%385.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %379, 0, %380, 0, 0, implicit $mode, implicit $exec
@@ -612,14 +612,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %217.sub6, %217.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%386:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%387:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %388:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %386.sub0, 0, 0, implicit $mode, implicit $exec
- %389:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %386.sub1, 0, 0, implicit $mode, implicit $exec
- %390:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %386.sub2, 0, 0, implicit $mode, implicit $exec
- %391:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %386.sub3, 0, 0, implicit $mode, implicit $exec
- %392:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %387.sub0, 0, 0, implicit $mode, implicit $exec
- %393:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %387.sub1, 0, 0, implicit $mode, implicit $exec
- %394:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %387.sub2, 0, 0, implicit $mode, implicit $exec
- %395:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %387.sub3, 0, 0, implicit $mode, implicit $exec
+ %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, implicit $mode, implicit $exec
+ %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, implicit $mode, implicit $exec
+ %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, implicit $mode, implicit $exec
+ %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, implicit $mode, implicit $exec
+ %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, implicit $mode, implicit $exec
+ %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, implicit $mode, implicit $exec
+ %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, implicit $mode, implicit $exec
+ %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, implicit $mode, implicit $exec
undef %396.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %394, 0, %395, 0, 0, implicit $mode, implicit $exec
%396.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %392, 0, %393, 0, 0, implicit $mode, implicit $exec
%396.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %390, 0, %391, 0, 0, implicit $mode, implicit $exec
@@ -632,14 +632,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %251.sub6, %251.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%398:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%399:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %400:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %398.sub0, 0, 0, implicit $mode, implicit $exec
- %401:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %398.sub1, 0, 0, implicit $mode, implicit $exec
- %402:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %398.sub2, 0, 0, implicit $mode, implicit $exec
- %403:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %398.sub3, 0, 0, implicit $mode, implicit $exec
- %404:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %399.sub0, 0, 0, implicit $mode, implicit $exec
- %405:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %399.sub1, 0, 0, implicit $mode, implicit $exec
- %406:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %399.sub2, 0, 0, implicit $mode, implicit $exec
- %407:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %399.sub3, 0, 0, implicit $mode, implicit $exec
+ %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, implicit $mode, implicit $exec
+ %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, implicit $mode, implicit $exec
+ %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, implicit $mode, implicit $exec
+ %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, implicit $mode, implicit $exec
+ %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, implicit $mode, implicit $exec
+ %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, implicit $mode, implicit $exec
+ %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, implicit $mode, implicit $exec
+ %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, implicit $mode, implicit $exec
undef %408.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %406, 0, %407, 0, 0, implicit $mode, implicit $exec
%408.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %404, 0, %405, 0, 0, implicit $mode, implicit $exec
%408.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %402, 0, %403, 0, 0, implicit $mode, implicit $exec
@@ -652,14 +652,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %252.sub6, %252.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%410:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%411:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %412:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %410.sub0, 0, 0, implicit $mode, implicit $exec
- %413:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %410.sub1, 0, 0, implicit $mode, implicit $exec
- %414:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %410.sub2, 0, 0, implicit $mode, implicit $exec
- %415:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %410.sub3, 0, 0, implicit $mode, implicit $exec
- %416:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %411.sub0, 0, 0, implicit $mode, implicit $exec
- %417:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %411.sub1, 0, 0, implicit $mode, implicit $exec
- %418:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %411.sub2, 0, 0, implicit $mode, implicit $exec
- %419:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %411.sub3, 0, 0, implicit $mode, implicit $exec
+ %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, implicit $mode, implicit $exec
+ %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, implicit $mode, implicit $exec
+ %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, implicit $mode, implicit $exec
+ %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, implicit $mode, implicit $exec
+ %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, implicit $mode, implicit $exec
+ %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, implicit $mode, implicit $exec
+ %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, implicit $mode, implicit $exec
+ %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, implicit $mode, implicit $exec
undef %420.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %418, 0, %419, 0, 0, implicit $mode, implicit $exec
%420.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %416, 0, %417, 0, 0, implicit $mode, implicit $exec
%420.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %414, 0, %415, 0, 0, implicit $mode, implicit $exec
@@ -672,14 +672,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %220.sub6, %220.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%422:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%423:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %424:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %422.sub0, 0, 0, implicit $mode, implicit $exec
- %425:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %422.sub1, 0, 0, implicit $mode, implicit $exec
- %426:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %422.sub2, 0, 0, implicit $mode, implicit $exec
- %427:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %422.sub3, 0, 0, implicit $mode, implicit $exec
- %428:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %423.sub0, 0, 0, implicit $mode, implicit $exec
- %429:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %423.sub1, 0, 0, implicit $mode, implicit $exec
- %430:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %423.sub2, 0, 0, implicit $mode, implicit $exec
- %431:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %423.sub3, 0, 0, implicit $mode, implicit $exec
+ %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, implicit $mode, implicit $exec
+ %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, implicit $mode, implicit $exec
+ %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, implicit $mode, implicit $exec
+ %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, implicit $mode, implicit $exec
+ %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, implicit $mode, implicit $exec
+ %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, implicit $mode, implicit $exec
+ %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, implicit $mode, implicit $exec
+ %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, implicit $mode, implicit $exec
undef %432.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %430, 0, %431, 0, 0, implicit $mode, implicit $exec
%432.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %428, 0, %429, 0, 0, implicit $mode, implicit $exec
%432.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %426, 0, %427, 0, 0, implicit $mode, implicit $exec
@@ -692,14 +692,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %249.sub6, %249.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%434:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%435:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %436:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %434.sub0, 0, 0, implicit $mode, implicit $exec
- %437:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %434.sub1, 0, 0, implicit $mode, implicit $exec
- %438:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %434.sub2, 0, 0, implicit $mode, implicit $exec
- %439:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %434.sub3, 0, 0, implicit $mode, implicit $exec
- %440:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %435.sub0, 0, 0, implicit $mode, implicit $exec
- %441:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %435.sub1, 0, 0, implicit $mode, implicit $exec
- %442:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %435.sub2, 0, 0, implicit $mode, implicit $exec
- %443:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %435.sub3, 0, 0, implicit $mode, implicit $exec
+ %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, implicit $mode, implicit $exec
+ %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, implicit $mode, implicit $exec
+ %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, implicit $mode, implicit $exec
+ %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, implicit $mode, implicit $exec
+ %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, implicit $mode, implicit $exec
+ %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, implicit $mode, implicit $exec
+ %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, implicit $mode, implicit $exec
+ %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, implicit $mode, implicit $exec
undef %444.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %442, 0, %443, 0, 0, implicit $mode, implicit $exec
%444.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %440, 0, %441, 0, 0, implicit $mode, implicit $exec
%444.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %438, 0, %439, 0, 0, implicit $mode, implicit $exec
@@ -712,14 +712,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %219.sub6, %219.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%446:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%447:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %448:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %446.sub0, 0, 0, implicit $mode, implicit $exec
- %449:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %446.sub1, 0, 0, implicit $mode, implicit $exec
- %450:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %446.sub2, 0, 0, implicit $mode, implicit $exec
- %451:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %446.sub3, 0, 0, implicit $mode, implicit $exec
- %452:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %447.sub0, 0, 0, implicit $mode, implicit $exec
- %453:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %447.sub1, 0, 0, implicit $mode, implicit $exec
- %454:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %447.sub2, 0, 0, implicit $mode, implicit $exec
- %455:vgpr_32 = V_CVT_F16_F32_t16_e64 0, %447.sub3, 0, 0, implicit $mode, implicit $exec
+ %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, implicit $mode, implicit $exec
+ %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, implicit $mode, implicit $exec
+ %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, implicit $mode, implicit $exec
+ %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, implicit $mode, implicit $exec
+ %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, implicit $mode, implicit $exec
+ %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, implicit $mode, implicit $exec
+ %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, implicit $mode, implicit $exec
+ %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, implicit $mode, implicit $exec
undef %456.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %454, 0, %455, 0, 0, implicit $mode, implicit $exec
%456.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %452, 0, %453, 0, 0, implicit $mode, implicit $exec
%456.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %450, 0, %451, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
index 90d5ca7f727513..b80bbe161c0de4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s
@@ -403,50 +403,50 @@ v_ctz_i32_b32 v5, src_scc
v_ctz_i32_b32 v255, 0xaf123456
// GFX11: encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf]
-v_cvt_f16_f32 v5, v1
+v_cvt_f16_f32 v5.l, v1
// GFX11: encoding: [0x01,0x15,0x0a,0x7e]
-v_cvt_f16_f32 v5, v255
+v_cvt_f16_f32 v5.l, v255
// GFX11: encoding: [0xff,0x15,0x0a,0x7e]
-v_cvt_f16_f32 v5, s1
+v_cvt_f16_f32 v5.l, s1
// GFX11: encoding: [0x01,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, s105
+v_cvt_f16_f32 v5.l, s105
// GFX11: encoding: [0x69,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, vcc_lo
+v_cvt_f16_f32 v5.l, vcc_lo
// GFX11: encoding: [0x6a,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, vcc_hi
+v_cvt_f16_f32 v5.l, vcc_hi
// GFX11: encoding: [0x6b,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, ttmp15
+v_cvt_f16_f32 v5.l, ttmp15
// GFX11: encoding: [0x7b,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, m0
+v_cvt_f16_f32 v5.l, m0
// GFX11: encoding: [0x7d,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, exec_lo
+v_cvt_f16_f32 v5.l, exec_lo
// GFX11: encoding: [0x7e,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, exec_hi
+v_cvt_f16_f32 v5.l, exec_hi
// GFX11: encoding: [0x7f,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, null
+v_cvt_f16_f32 v5.l, null
// GFX11: encoding: [0x7c,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, -1
+v_cvt_f16_f32 v5.l, -1
// GFX11: encoding: [0xc1,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, 0.5
+v_cvt_f16_f32 v5.l, 0.5
// GFX11: encoding: [0xf0,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, src_scc
-// GFX11: encoding: [0xfd,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.h, src_scc
+// GFX11: encoding: [0xfd,0x14,0x0a,0x7f]
-v_cvt_f16_f32 v127, 0xaf123456
-// GFX11: encoding: [0xff,0x14,0xfe,0x7e,0x56,0x34,0x12,0xaf]
+v_cvt_f16_f32 v127.h, 0xaf123456
+// GFX11: encoding: [0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf]
v_cvt_f16_i16 v5, v1
// GFX11: encoding: [0x01,0xa3,0x0a,0x7e]
@@ -538,12 +538,18 @@ v_cvt_f16_u16 v5, src_scc
v_cvt_f16_u16 v127, 0xfe0b
// GFX11: encoding: [0xff,0xa0,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
-v_cvt_f32_f16 v5, v1
+v_cvt_f32_f16 v5, v1.l
// GFX11: encoding: [0x01,0x17,0x0a,0x7e]
-v_cvt_f32_f16 v5, v127
+v_cvt_f32_f16 v5, v127.l
// GFX11: encoding: [0x7f,0x17,0x0a,0x7e]
+v_cvt_f32_f16 v5, v1.h
+// GFX11: encoding: [0x81,0x17,0x0a,0x7e]
+
+v_cvt_f32_f16 v5, v127.h
+// GFX11: encoding: [0xff,0x17,0x0a,0x7e]
+
v_cvt_f32_f16 v5, s1
// GFX11: encoding: [0x01,0x16,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
index fea36e9f221445..f6915c98013fad 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s
@@ -337,47 +337,47 @@ v_ctz_i32_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
v_ctz_i32_b32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x05,0x30]
-v_cvt_f16_f32 v5, v1 quad_perm:[3,2,1,0]
+v_cvt_f16_f32 v5.l, v1 quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff]
-v_cvt_f16_f32 v5, v1 quad_perm:[0,1,2,3]
+v_cvt_f16_f32 v5.l, v1 quad_perm:[0,1,2,3]
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff]
-v_cvt_f16_f32 v5, v1 row_mirror
+v_cvt_f16_f32 v5.l, v1 row_mirror
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_half_mirror
+v_cvt_f16_f32 v5.l, v1 row_half_mirror
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_shl:1
+v_cvt_f16_f32 v5.l, v1 row_shl:1
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_shl:15
+v_cvt_f16_f32 v5.l, v1 row_shl:15
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_shr:1
+v_cvt_f16_f32 v5.l, v1 row_shr:1
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_shr:15
+v_cvt_f16_f32 v5.l, v1 row_shr:15
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_ror:1
+v_cvt_f16_f32 v5.l, v1 row_ror:1
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_ror:15
+v_cvt_f16_f32 v5.l, v1 row_ror:15
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+v_cvt_f16_f32 v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff]
-v_cvt_f16_f32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+v_cvt_f16_f32 v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x5f,0x01,0x01]
-v_cvt_f16_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x60,0x09,0x13]
+v_cvt_f16_f32 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x14,0x0a,0x7f,0x01,0x60,0x09,0x13]
-v_cvt_f16_f32 v127, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: encoding: [0xfa,0x14,0xfe,0x7e,0xff,0x6f,0x35,0x30]
+v_cvt_f16_f32 v127.h, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0x14,0xfe,0x7f,0xff,0x6f,0x35,0x30]
v_cvt_f16_i16 v5, v1 quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
@@ -463,47 +463,47 @@ v_cvt_f16_u16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
v_cvt_f16_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: encoding: [0xfa,0xa0,0xfe,0x7e,0x7f,0x6f,0x05,0x30]
-v_cvt_f32_f16 v5, v1 quad_perm:[3,2,1,0]
+v_cvt_f32_f16 v5, v1.l quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff]
-v_cvt_f32_f16 v5, v1 quad_perm:[0,1,2,3]
+v_cvt_f32_f16 v5, v1.l quad_perm:[0,1,2,3]
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff]
-v_cvt_f32_f16 v5, v1 row_mirror
+v_cvt_f32_f16 v5, v1.l row_mirror
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_half_mirror
+v_cvt_f32_f16 v5, v1.l row_half_mirror
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_shl:1
+v_cvt_f32_f16 v5, v1.l row_shl:1
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_shl:15
+v_cvt_f32_f16 v5, v1.l row_shl:15
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_shr:1
+v_cvt_f32_f16 v5, v1.l row_shr:1
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_shr:15
+v_cvt_f32_f16 v5, v1.l row_shr:15
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_ror:1
+v_cvt_f32_f16 v5, v1.l row_ror:1
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_ror:15
+v_cvt_f32_f16 v5, v1.l row_ror:15
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+v_cvt_f32_f16 v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff]
-v_cvt_f32_f16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+v_cvt_f32_f16 v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x5f,0x01,0x01]
-v_cvt_f32_f16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x60,0x09,0x13]
+v_cvt_f32_f16 v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x16,0x0a,0x7e,0x81,0x60,0x09,0x13]
-v_cvt_f32_f16 v255, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: encoding: [0xfa,0x16,0xfe,0x7f,0x7f,0x6f,0x35,0x30]
+v_cvt_f32_f16 v255, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0x16,0xfe,0x7f,0xff,0x6f,0x35,0x30]
v_cvt_f32_i32 v5, v1 quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s
index fc9079fc54282a..d533cb28e8807e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s
@@ -76,14 +76,17 @@ v_ctz_i32_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_ctz_i32_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: encoding: [0xe9,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00]
-v_cvt_f16_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f16_f32 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05]
-v_cvt_f16_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: encoding: [0xea,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05]
+v_cvt_f16_f32 v127.l, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x14,0xfe,0x7e,0x01,0x77,0x39,0x05]
-v_cvt_f16_f32 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: encoding: [0xe9,0x14,0xfe,0x7e,0xff,0x00,0x00,0x00]
+v_cvt_f16_f32 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05]
+
+v_cvt_f16_f32 v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00]
v_cvt_f16_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05]
@@ -103,14 +106,17 @@ v_cvt_f16_u16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_f16_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: encoding: [0xe9,0xa0,0xfe,0x7e,0x7f,0x00,0x00,0x00]
-v_cvt_f32_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f32_f16 v5, v1.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05]
-v_cvt_f32_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: encoding: [0xea,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05]
+v_cvt_f32_f16 v5, v127.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x16,0x0a,0x7e,0x7f,0x77,0x39,0x05]
+
+v_cvt_f32_f16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x16,0x0a,0x7e,0x81,0x77,0x39,0x05]
-v_cvt_f32_f16 v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: encoding: [0xe9,0x16,0xfe,0x7f,0x7f,0x00,0x00,0x00]
+v_cvt_f32_f16 v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0x16,0xfe,0x7f,0xff,0x00,0x00,0x00]
v_cvt_f32_i32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: encoding: [0xe9,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s
index 68de95a9857e81..30c9c12038a0c3 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
v_ceil_f16 v128, 0xfe0b
// GFX11: v_ceil_f16_e64
@@ -97,49 +97,94 @@ v_cos_f16 v255, vcc_lo
v_cos_f16 v5, v199
// GFX11: v_cos_f16_e64
-v_cvt_f16_f32 v128, 0xaf123456
+v_cvt_f16_f32 v128.l, 0xaf123456
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, -1
+v_cvt_f16_f32 v128.h, 0xaf123456
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, 0.5
+v_cvt_f16_f32 v255.l, -1
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, exec_hi
+v_cvt_f16_f32 v255.h, -1
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, exec_lo
+v_cvt_f16_f32 v255.l, 0.5
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, m0
+v_cvt_f16_f32 v255.h, 0.5
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, null
+v_cvt_f16_f32 v255.l, exec_hi
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, s1
+v_cvt_f16_f32 v255.h, exec_hi
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, s105
+v_cvt_f16_f32 v255.l, exec_lo
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, src_scc
+v_cvt_f16_f32 v255.h, exec_lo
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, ttmp15
+v_cvt_f16_f32 v255.l, m0
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, v1
+v_cvt_f16_f32 v255.h, m0
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, v255
+v_cvt_f16_f32 v255.l, null
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, vcc_hi
+v_cvt_f16_f32 v255.h, null
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, vcc_lo
+v_cvt_f16_f32 v255.l, s1
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, s1
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, s105
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, s105
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, src_scc
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, src_scc
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, ttmp15
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, ttmp15
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, v1
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, v1
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, v255
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, v255
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, vcc_hi
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, vcc_hi
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, vcc_lo
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, vcc_lo
// GFX11: v_cvt_f16_f32_e64
v_cvt_f16_i16 v128, 0xfe0b
@@ -238,7 +283,10 @@ v_cvt_f16_u16 v255, vcc_lo
v_cvt_f16_u16 v5, v199
// GFX11: v_cvt_f16_u16_e64
-v_cvt_f32_f16 v5, v199
+v_cvt_f32_f16 v5, v199.l
+// GFX11: v_cvt_f32_f16_e64
+
+v_cvt_f32_f16 v5, v199.h
// GFX11: v_cvt_f32_f16_e64
v_cvt_i16_f16 v128, 0xfe0b
@@ -1084,10 +1132,16 @@ v_cos_f16 v255, v127 quad_perm:[3,2,1,0]
v_cos_f16 v5, v199 quad_perm:[3,2,1,0]
// GFX11: v_cos_f16_e64
-v_cvt_f16_f32 v255, v1 quad_perm:[3,2,1,0]
+v_cvt_f16_f32 v255.l, v1 quad_perm:[3,2,1,0]
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, v1 quad_perm:[3,2,1,0]
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, v255 quad_perm:[3,2,1,0]
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, v255 quad_perm:[3,2,1,0]
+v_cvt_f16_f32 v255.h, v255 quad_perm:[3,2,1,0]
// GFX11: v_cvt_f16_f32_e64
v_cvt_f16_i16 v255, v1 quad_perm:[3,2,1,0]
@@ -1108,7 +1162,10 @@ v_cvt_f16_u16 v255, v127 quad_perm:[3,2,1,0]
v_cvt_f16_u16 v5, v199 quad_perm:[3,2,1,0]
// GFX11: v_cvt_f16_u16_e64
-v_cvt_f32_f16 v5, v199 quad_perm:[3,2,1,0]
+v_cvt_f32_f16 v5, v199.l quad_perm:[3,2,1,0]
+// GFX11: v_cvt_f32_f16_e64
+
+v_cvt_f32_f16 v5, v199.h quad_perm:[3,2,1,0]
// GFX11: v_cvt_f32_f16_e64
v_cvt_i16_f16 v255, v1 quad_perm:[3,2,1,0]
@@ -1291,10 +1348,16 @@ v_cos_f16 v255, v127 dpp8:[7,6,5,4,3,2,1,0]
v_cos_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cos_f16_e64
-v_cvt_f16_f32 v255, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f16_f32 v255.l, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cvt_f16_f32_e64
-v_cvt_f16_f32 v255, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f16_f32 v255.h v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.l, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_f16_f32_e64
+
+v_cvt_f16_f32 v255.h, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cvt_f16_f32_e64
v_cvt_f16_i16 v255, v1 dpp8:[7,6,5,4,3,2,1,0]
@@ -1315,7 +1378,10 @@ v_cvt_f16_u16 v255, v127 dpp8:[7,6,5,4,3,2,1,0]
v_cvt_f16_u16 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cvt_f16_u16_e64
-v_cvt_f32_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f32_f16 v5, v199.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_f32_f16_e64
+
+v_cvt_f32_f16 v5, v199.h dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cvt_f32_f16_e64
v_cvt_i16_f16 v255, v1 dpp8:[7,6,5,4,3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
index 7c50b4c22fc294..76b1c38fad43d9 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
@@ -1,5 +1,5 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
v_add_f16_e32 v255, v1, v2
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -13,8 +13,8 @@ v_fmac_f16_e32 v255, v1, v2
v_fmamk_f16_e32 v255, v1, 0xfe0b, v3
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_e32 v255, v1, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_e32 v255.l, v1.l, v2.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_e32 v255, v1, v2
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -43,8 +43,8 @@ v_fmac_f16_e32 v5, v255, v2
v_fmamk_f16_e32 v5, v255, 0xfe0b, v3
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_e32 v5, v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_e32 v5.l, v255.l, v2.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_e32 v5, v255, v2
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -73,8 +73,8 @@ v_fmac_f16_e32 v5, v1, v255
v_fmamk_f16_e32 v5, v1, 0xfe0b, v255
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_e32 v5, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_e32 v5.l, v1.l, v255.l
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_e32 v5, v1, v255
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -97,8 +97,8 @@ v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -121,8 +121,8 @@ v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -145,8 +145,8 @@ v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -169,8 +169,8 @@ v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -193,8 +193,8 @@ v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
@@ -217,8 +217,8 @@ v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-v_ldexp_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_ldexp_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
index d105ac6a72bb64..3e2402c2f6707c 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
@@ -345,46 +345,46 @@ v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
-v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+v_cvt_f16_f32_e64_dpp v5.l, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+v_cvt_f16_f32_e64_dpp v5.l, v1 quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_mirror
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_mirror
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_half_mirror
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_shl:1
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_shl:15
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_shr:1
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_shr:15
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_ror:1
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_ror:15
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+v_cvt_f16_f32_e64_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
-v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_cvt_f16_f32_e64_dpp v5.l, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
-v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_cvt_f16_f32_e64_dpp v5.l, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
-v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+v_cvt_f16_f32_e64_dpp v255.l, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
@@ -471,46 +471,46 @@ v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_
v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x05,0x30]
-v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+v_cvt_f32_f16_e64_dpp v5, v1.l quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+v_cvt_f32_f16_e64_dpp v5, v1.l quad_perm:[0,1,2,3]
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_mirror
+v_cvt_f32_f16_e64_dpp v5, v1.l row_mirror
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror
+v_cvt_f32_f16_e64_dpp v5, v1.l row_half_mirror
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1
+v_cvt_f32_f16_e64_dpp v5, v1.l row_shl:1
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15
+v_cvt_f32_f16_e64_dpp v5, v1.l row_shl:15
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1
+v_cvt_f32_f16_e64_dpp v5, v1.l row_shr:1
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15
+v_cvt_f32_f16_e64_dpp v5, v1.l row_shr:15
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1
+v_cvt_f32_f16_e64_dpp v5, v1.l row_ror:1
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15
+v_cvt_f32_f16_e64_dpp v5, v1.l row_ror:15
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+v_cvt_f32_f16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
-v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+v_cvt_f32_f16_e64_dpp v5, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
-v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+v_cvt_f32_f16_e64_dpp v5, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13]
-v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+v_cvt_f32_f16_e64_dpp v255, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
index c7faf1124755e0..7f70fb95359c72 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
@@ -93,16 +93,16 @@ v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
-v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f16_f32_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f16_f32_e64_dpp v5.l, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
-v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_cvt_f16_f32_e64_dpp v5.l, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: [0x05,0x00,0x8a,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
-v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+v_cvt_f16_f32_e64_dpp v255.l, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x81,0x8a,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
@@ -129,16 +129,16 @@ v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x80,0xd0,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00]
-v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f32_f16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+v_cvt_f32_f16_e64_dpp v5, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
-v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+v_cvt_f32_f16_e64_dpp v5, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: [0x05,0x00,0x8b,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
-v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+v_cvt_f32_f16_e64_dpp v255, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x81,0x8b,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
index e6744a5082f2c7..2788493de76706 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
v_bfrev_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00]
@@ -396,49 +396,49 @@ v_ctz_i32_b32_e64 v5, src_scc
v_ctz_i32_b32_e64 v255, 0xaf123456
// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_cvt_f16_f32_e64 v5, v1
+v_cvt_f16_f32_e64 v5.l, v1
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
-v_cvt_f16_f32_e64 v5, v255
+v_cvt_f16_f32_e64 v5.l, v255
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
-v_cvt_f16_f32_e64 v5, s1
+v_cvt_f16_f32_e64 v5.l, s1
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, s105
+v_cvt_f16_f32_e64 v5.l, s105
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, vcc_lo
+v_cvt_f16_f32_e64 v5.l, vcc_lo
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, vcc_hi
+v_cvt_f16_f32_e64 v5.l, vcc_hi
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, ttmp15
+v_cvt_f16_f32_e64 v5.l, ttmp15
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, m0
+v_cvt_f16_f32_e64 v5.l, m0
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, exec_lo
+v_cvt_f16_f32_e64 v5.l, exec_lo
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, exec_hi
+v_cvt_f16_f32_e64 v5.l, exec_hi
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, null
+v_cvt_f16_f32_e64 v5.l, null
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, -1
+v_cvt_f16_f32_e64 v5.l, -1
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
-v_cvt_f16_f32_e64 v5, 0.5 mul:2
+v_cvt_f16_f32_e64 v5.l, 0.5 mul:2
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
-v_cvt_f16_f32_e64 v5, src_scc mul:4
+v_cvt_f16_f32_e64 v5.l, src_scc mul:4
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
-v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2
+v_cvt_f16_f32_e64 v255.l, -|0xaf123456| clamp div:2
// GFX11: encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
v_cvt_f16_i16_e64 v5, v1
@@ -531,10 +531,10 @@ v_cvt_f16_u16_e64 v5, src_scc mul:4
v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2
// GFX11: encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00]
-v_cvt_f32_f16_e64 v5, v1
+v_cvt_f32_f16_e64 v5, v1.l
// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00]
-v_cvt_f32_f16_e64 v5, v255
+v_cvt_f32_f16_e64 v5, v255.l
// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
v_cvt_f32_f16_e64 v5, s1
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
index 7a6bb874b105df..a3917e05ec68d0 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s
@@ -2,7 +2,6 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s
-
// this file will be converted to true16 format when more true16 instructions are supported
v_bfrev_b32_e32 v5, v1
@@ -446,50 +445,56 @@ v_cvt_pk_f32_fp8_e32 v[2:3], 3
v_cvt_pk_f32_fp8_e32 v[2:3], v3
// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e]
-v_cvt_f16_f32 v5, v1
-// GFX12: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, v1
+// GFX12: v_cvt_f16_f32_e32 v5.l, v1 ; encoding: [0x01,0x15,0x0a,0x7e]
+
+v_cvt_f16_f32 v5.l, v255
+// GFX12: v_cvt_f16_f32_e32 v5.l, v255 ; encoding: [0xff,0x15,0x0a,0x7e]
+
+v_cvt_f16_f32 v5.l, s1
+// GFX12: v_cvt_f16_f32_e32 v5.l, s1 ; encoding: [0x01,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, v255
-// GFX12: v_cvt_f16_f32_e32 v5, v255 ; encoding: [0xff,0x15,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, s105
+// GFX12: v_cvt_f16_f32_e32 v5.l, s105 ; encoding: [0x69,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, s1
-// GFX12: v_cvt_f16_f32_e32 v5, s1 ; encoding: [0x01,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, vcc_lo
+// GFX12: v_cvt_f16_f32_e32 v5.l, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, s105
-// GFX12: v_cvt_f16_f32_e32 v5, s105 ; encoding: [0x69,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, vcc_hi
+// GFX12: v_cvt_f16_f32_e32 v5.l, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, vcc_lo
-// GFX12: v_cvt_f16_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, ttmp15
+// GFX12: v_cvt_f16_f32_e32 v5.l, ttmp15 ; encoding: [0x7b,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, vcc_hi
-// GFX12: v_cvt_f16_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, m0
+// GFX12: v_cvt_f16_f32_e32 v5.l, m0 ; encoding: [0x7d,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, ttmp15
-// GFX12: v_cvt_f16_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, exec_lo
+// GFX12: v_cvt_f16_f32_e32 v5.l, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, m0
-// GFX12: v_cvt_f16_f32_e32 v5, m0 ; encoding: [0x7d,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, exec_hi
+// GFX12: v_cvt_f16_f32_e32 v5.l, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, exec_lo
-// GFX12: v_cvt_f16_f32_e32 v5, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, null
+// GFX12: v_cvt_f16_f32_e32 v5.l, null ; encoding: [0x7c,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, exec_hi
-// GFX12: v_cvt_f16_f32_e32 v5, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, -1
+// GFX12: v_cvt_f16_f32_e32 v5.l, -1 ; encoding: [0xc1,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, null
-// GFX12: v_cvt_f16_f32_e32 v5, null ; encoding: [0x7c,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, 0.5
+// GFX12: v_cvt_f16_f32_e32 v5.l, 0.5 ; encoding: [0xf0,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, -1
-// GFX12: v_cvt_f16_f32_e32 v5, -1 ; encoding: [0xc1,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.l, src_scc
+// GFX12: v_cvt_f16_f32_e32 v5.l, src_scc ; encoding: [0xfd,0x14,0x0a,0x7e]
-v_cvt_f16_f32 v5, 0.5
-// GFX12: v_cvt_f16_f32_e32 v5, 0.5 ; encoding: [0xf0,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v127.l, 0xaf123456
+// GFX12: v_cvt_f16_f32_e32 v127.l, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7e,0x56,0x34,0x12,0xaf]
-v_cvt_f16_f32 v5, src_scc
-// GFX12: v_cvt_f16_f32_e32 v5, src_scc ; encoding: [0xfd,0x14,0x0a,0x7e]
+v_cvt_f16_f32 v5.h, src_scc
+// GFX12: v_cvt_f16_f32_e32 v5.h, src_scc ; encoding: [0xfd,0x14,0x0a,0x7f]
-v_cvt_f16_f32 v127, 0xaf123456
-// GFX12: v_cvt_f16_f32_e32 v127, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7e,0x56,0x34,0x12,0xaf]
+v_cvt_f16_f32 v127.h, 0xaf123456
+// GFX12: v_cvt_f16_f32_e32 v127.h, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf]
v_cvt_f16_i16 v5, v1
// GFX12: v_cvt_f16_i16_e32 v5, v1 ; encoding: [0x01,0xa3,0x0a,0x7e]
@@ -583,11 +588,17 @@ v_cvt_f16_u16 v5, src_scc
v_cvt_f16_u16 v127, 0xfe0b
// GFX12: v_cvt_f16_u16_e32 v127, 0xfe0b ; encoding: [0xff,0xa0,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
-v_cvt_f32_f16 v5, v1
-// GFX12: v_cvt_f32_f16_e32 v5, v1 ; encoding: [0x01,0x17,0x0a,0x7e]
+v_cvt_f32_f16 v5, v1.l
+// GFX12: v_cvt_f32_f16_e32 v5, v1.l ; encoding: [0x01,0x17,0x0a,0x7e]
+
+v_cvt_f32_f16 v5, v127.l
+// GFX12: v_cvt_f32_f16_e32 v5, v127.l ; encoding: [0x7f,0x17,0x0a,0x7e]
+
+v_cvt_f32_f16 v5, v1.h
+// GFX12: v_cvt_f32_f16_e32 v5, v1.h ; encoding: [0x81,0x17,0x0a,0x7e]
-v_cvt_f32_f16 v5, v127
-// GFX12: v_cvt_f32_f16_e32 v5, v127 ; encoding: [0x7f,0x17,0x0a,0x7e]
+v_cvt_f32_f16 v5, v127.h
+// GFX12: v_cvt_f32_f16_e32 v5, v127.h ; encoding: [0xff,0x17,0x0a,0x7e]
v_cvt_f32_f16 v5, s1
// GFX12: v_cvt_f32_f16_e32 v5, s1 ; encoding: [0x01,0x16,0x0a,0x7e]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
index 851242dd5deceb..7082753f9403c2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt
@@ -422,50 +422,65 @@
# GFX11: v_ctz_i32_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf]
0xff,0x74,0xfe,0x7f,0x56,0x34,0x12,0xaf
-# GFX11: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, v1 ; encoding: [0x01,0x15,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, v1 ; encoding: [0x01,0x15,0x0a,0x7e]
0x01,0x15,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, v255 ; encoding: [0xff,0x15,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, v255 ; encoding: [0xff,0x15,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, v255 ; encoding: [0xff,0x15,0x0a,0x7e]
0xff,0x15,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, s1 ; encoding: [0x01,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, s1 ; encoding: [0x01,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, s1 ; encoding: [0x01,0x14,0x0a,0x7e]
0x01,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, s105 ; encoding: [0x69,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, s105 ; encoding: [0x69,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, s105 ; encoding: [0x69,0x14,0x0a,0x7e]
0x69,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, vcc_lo ; encoding: [0x6a,0x14,0x0a,0x7e]
0x6a,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, vcc_hi ; encoding: [0x6b,0x14,0x0a,0x7e]
0x6b,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, ttmp15 ; encoding: [0x7b,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, ttmp15 ; encoding: [0x7b,0x14,0x0a,0x7e]
0x7b,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, m0 ; encoding: [0x7d,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, m0 ; encoding: [0x7d,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, m0 ; encoding: [0x7d,0x14,0x0a,0x7e]
0x7d,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, exec_lo ; encoding: [0x7e,0x14,0x0a,0x7e]
0x7e,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, exec_hi ; encoding: [0x7f,0x14,0x0a,0x7e]
0x7f,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, null ; encoding: [0x7c,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, null ; encoding: [0x7c,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, null ; encoding: [0x7c,0x14,0x0a,0x7e]
0x7c,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, -1 ; encoding: [0xc1,0x14,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v5, -1 ; encoding: [0xc1,0x14,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.l, -1 ; encoding: [0xc1,0x14,0x0a,0x7e]
0xc1,0x14,0x0a,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, 0.5 ; encoding: [0xf0,0x14,0x0a,0x7e]
-0xf0,0x14,0x0a,0x7e
+# GFX11-FAKE16: v_cvt_f16_f32_e32 v127, 0.5 ; encoding: [0xf0,0x14,0xfe,0x7e]
+# GFX11-REAL16: v_cvt_f16_f32_e32 v127.l, 0.5 ; encoding: [0xf0,0x14,0xfe,0x7e]
+0xf0,0x14,0xfe,0x7e
-# GFX11: v_cvt_f16_f32_e32 v5, src_scc ; encoding: [0xfd,0x14,0x0a,0x7e]
-0xfd,0x14,0x0a,0x7e
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x14,0x0a,0x7f
+# GFX11-REAL16: v_cvt_f16_f32_e32 v5.h, src_scc ; encoding: [0xfd,0x14,0x0a,0x7f]
+0xfd,0x14,0x0a,0x7f
-# GFX11: v_cvt_f16_f32_e32 v127, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7e,0x56,0x34,0x12,0xaf]
-0xff,0x14,0xfe,0x7e,0x56,0x34,0x12,0xaf
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf
+# GFX11-REAL16: v_cvt_f16_f32_e32 v127.h, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf
# GFX11: v_cvt_f16_i16_e32 v5, v1 ; encoding: [0x01,0xa3,0x0a,0x7e]
0x01,0xa3,0x0a,0x7e
@@ -557,12 +572,22 @@
# GFX11: v_cvt_f16_u16_e32 v127, 0xfe0b ; encoding: [0xff,0xa0,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
0xff,0xa0,0xfe,0x7e,0x0b,0xfe,0x00,0x00
-# GFX11: v_cvt_f32_f16_e32 v5, v1 ; encoding: [0x01,0x17,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f32_f16_e32 v5, v1 ; encoding: [0x01,0x17,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f32_f16_e32 v5, v1.l ; encoding: [0x01,0x17,0x0a,0x7e]
0x01,0x17,0x0a,0x7e
-# GFX11: v_cvt_f32_f16_e32 v5, v127 ; encoding: [0x7f,0x17,0x0a,0x7e]
+# GFX11-FAKE16: v_cvt_f32_f16_e32 v5, v127 ; encoding: [0x7f,0x17,0x0a,0x7e]
+# GFX11-REAL16: v_cvt_f32_f16_e32 v5, v127.l ; encoding: [0x7f,0x17,0x0a,0x7e]
0x7f,0x17,0x0a,0x7e
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0x81,0x17,0x0a,0x7e
+# GFX11-REAL16: v_cvt_f32_f16_e32 v5, v1.h ; encoding: [0x81,0x17,0x0a,0x7e]
+0x81,0x17,0x0a,0x7e
+
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0x17,0x0a,0x7e
+# GFX11-REAL16: v_cvt_f32_f16_e32 v5, v127.h ; encoding: [0xff,0x17,0x0a,0x7e]
+0xff,0x17,0x0a,0x7e
+
# GFX11: v_cvt_f32_f16_e32 v5, s1 ; encoding: [0x01,0x16,0x0a,0x7e]
0x01,0x16,0x0a,0x7e
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index d697addd2686ca..c058cfd706e6c8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -353,47 +353,61 @@
# GFX11: v_ctz_i32_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x74,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_cvt_f16_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x1b,0x00,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0xe4,0x00,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x40,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x41,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x01,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x0f,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x11,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x1f,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x21,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x2f,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff]
0xfa,0x14,0x0a,0x7e,0x01,0x50,0x01,0xff
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x5f,0x01,0x01]
-0xfa,0x14,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v127, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x14,0xfe,0x7e,0x01,0x5f,0x01,0x01]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x14,0xfe,0x7e,0x01,0x5f,0x01,0x01]
+0xfa,0x14,0xfe,0x7e,0x01,0x5f,0x01,0x01
-# GFX11: v_cvt_f16_f32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x14,0x0a,0x7e,0x01,0x60,0x01,0x13]
-0xfa,0x14,0x0a,0x7e,0x01,0x60,0x01,0x13
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xfa,0x14,0x0a,0x7f,0x01,0x60,0x01,0x13
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x14,0x0a,0x7f,0x01,0x60,0x01,0x13]
+0xfa,0x14,0x0a,0x7f,0x01,0x60,0x01,0x13
-# GFX11: v_cvt_f16_f32_dpp v127, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x14,0xfe,0x7e,0xff,0x6f,0x3d,0x30]
-0xfa,0x14,0xfe,0x7e,0xff,0x6f,0x3d,0x30
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xfa,0x14,0xfe,0x7f,0xff,0x6f,0x3d,0x30
+# GFX11-REAL16: v_cvt_f16_f32_dpp v127.h, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x14,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
+0xfa,0x14,0xfe,0x7f,0xff,0x6f,0x3d,0x30
# GFX11: v_cvt_f16_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -479,47 +493,61 @@
# GFX11: v_cvt_f16_u16_dpp v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xa0,0xfe,0x7e,0x7f,0x6f,0x0d,0x30]
0xfa,0xa0,0xfe,0x7e,0x7f,0x6f,0x0d,0x30
-# GFX11: v_cvt_f32_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x1b,0x00,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0xe4,0x00,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x40,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x41,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x01,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x0f,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x11,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x1f,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x21,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x2f,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff]
0xfa,0x16,0x0a,0x7e,0x01,0x50,0x01,0xff
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x5f,0x01,0x01]
-0xfa,0x16,0x0a,0x7e,0x01,0x5f,0x01,0x01
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v127 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x16,0x0a,0x7e,0x7f,0x5f,0x01,0x01]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x16,0x0a,0x7e,0x7f,0x5f,0x01,0x01]
+0xfa,0x16,0x0a,0x7e,0x7f,0x5f,0x01,0x01
-# GFX11: v_cvt_f32_f16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x16,0x0a,0x7e,0x01,0x60,0x01,0x13]
-0xfa,0x16,0x0a,0x7e,0x01,0x60,0x01,0x13
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xfa,0x16,0x0a,0x7e,0x81,0x60,0x01,0x13
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x16,0x0a,0x7e,0x81,0x60,0x01,0x13]
+0xfa,0x16,0x0a,0x7e,0x81,0x60,0x01,0x13
-# GFX11: v_cvt_f32_f16_dpp v255, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x16,0xfe,0x7f,0x7f,0x6f,0x3d,0x30]
-0xfa,0x16,0xfe,0x7f,0x7f,0x6f,0x3d,0x30
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xfa,0x16,0xfe,0x7f,0xff,0x6f,0x3d,0x30
+# GFX11-REAL16: v_cvt_f32_f16_dpp v255, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x16,0xfe,0x7f,0xff,0x6f,0x3d,0x30]
+0xfa,0x16,0xfe,0x7f,0xff,0x6f,0x3d,0x30
# GFX11: v_cvt_f32_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index 6b8de94465156b..7b85cc0c688480 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -60,11 +60,21 @@
# GFX11: v_ctz_i32_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x74,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x14,0x0a,0x7e,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f16_f32_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x14,0xfe,0x7e,0xff,0x00,0x00,0x00]
-0xea,0x14,0xfe,0x7e,0xff,0x00,0x00,0x00
+# GFX11-FAKE16: v_cvt_f16_f32_dpp v127, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0xfe,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f16_f32_dpp v127.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0xfe,0x7e,0x01,0x77,0x39,0x05]
+0xe9,0x14,0xfe,0x7e,0x01,0x77,0x39,0x05
+
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05
+# GFX11-REAL16: v_cvt_f16_f32_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05]
+0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05
+
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11-REAL16: v_cvt_f16_f32_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00]
+0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00
# GFX11: v_cvt_f16_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xa2,0x0a,0x7e,0x01,0x77,0x39,0x05
@@ -78,11 +88,21 @@
# GFX11: v_cvt_f16_u16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa0,0xfe,0x7e,0x7f,0x00,0x00,0x00]
0xea,0xa0,0xfe,0x7e,0x7f,0x00,0x00,0x00
-# GFX11: v_cvt_f32_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x16,0x0a,0x7e,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f32_f16_dpp v255, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x16,0xfe,0x7f,0x7f,0x00,0x00,0x00]
-0xea,0x16,0xfe,0x7f,0x7f,0x00,0x00,0x00
+# GFX11-FAKE16: v_cvt_f32_f16_dpp v5, v127 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x16,0x0a,0x7e,0x7f,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x16,0x0a,0x7e,0x7f,0x77,0x39,0x05]
+0xe9,0x16,0x0a,0x7e,0x7f,0x77,0x39,0x05
+
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xe9,0x16,0x0a,0x7e,0x81,0x77,0x39,0x05
+# GFX11-REAL16: v_cvt_f32_f16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x16,0x0a,0x7e,0x81,0x77,0x39,0x05]
+0xe9,0x16,0x0a,0x7e,0x81,0x77,0x39,0x05
+
+# COM: GFX11-FAKE16: warning: invalid instruction encoding 0xea,0x16,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11-REAL16: v_cvt_f32_f16_dpp v255, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x16,0xfe,0x7f,0xff,0x00,0x00,0x00]
+0xea,0x16,0xfe,0x7f,0xff,0x00,0x00,0x00
# GFX11: v_cvt_f32_i32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x0a,0x0a,0x7e,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index ac208caa58a32b..768bb68ef24997 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -363,46 +363,60 @@
# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13]
0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13
-# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.l, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -489,46 +503,60 @@
# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30]
0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13]
0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13
-# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index c39ff6fb6367af..2b99246fcfa4be 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -89,16 +89,20 @@
# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v5.l, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.l, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -125,16 +129,20 @@
# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00]
0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05]
0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v5, v1.l mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05]
0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index d7f6c8de471eac..89f4def0e85079 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -414,49 +414,64 @@
# GFX11: v_ctz_i32_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00
-# GFX11: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08
-# GFX11: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v5.l, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10]
0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10
-# GFX11: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX11-REAL16: v_cvt_f16_f32_e64 v255.l, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
# GFX11: v_cvt_f16_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -549,10 +564,12 @@
# GFX11: v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00]
0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00
-# GFX11: v_cvt_f32_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00]
+# GFX11-REAL16: v_cvt_f32_f16_e64 v5, v1.l ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00
-# GFX11: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-REAL16: v_cvt_f32_f16_e64 v5, v255.l ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00
# GFX11: v_cvt_f32_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00]
More information about the llvm-commits
mailing list