[llvm] 6cb3866 - Revert "[AMDGPU] Introduce real and keep fake True16 instructions."
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 07:40:36 PDT 2023
Author: Ivan Kosarev
Date: 2023-09-22T15:40:26+01:00
New Revision: 6cb3866b1ce9d835402e414049478cea82427cf1
URL: https://github.com/llvm/llvm-project/commit/6cb3866b1ce9d835402e414049478cea82427cf1
DIFF: https://github.com/llvm/llvm-project/commit/6cb3866b1ce9d835402e414049478cea82427cf1.diff
LOG: Revert "[AMDGPU] Introduce real and keep fake True16 instructions."
This reverts commit 0f864c7b8bc9323293ec3d85f4bd5322f8f61b16 due to
failures on expensive checks.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir
llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index efa1cc0696d2f7c..e0b40199b774585 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1693,15 +1693,6 @@ def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
-// Control use of True16 instructions. The real True16 instructions are
-// True16 instructions as they are defined in the ISA. Fake True16
-// instructions have the same encoding as real ones but syntactically
-// only allow 32-bit registers in operands and use low halves thereof.
-def UseRealTrue16Insts : Predicate<"Subtarget->useRealTrue16Insts()">,
- AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
-def UseFakeTrue16Insts : Predicate<"Subtarget->hasTrue16BitInsts() && "
- "!Subtarget->useRealTrue16Insts()">;
-
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 54dce2dbcbffc26..4c76e19abc2a8f2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -420,14 +420,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res =
- tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
- MI, DecW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
- MI, DecW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
@@ -466,8 +463,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1164,
- DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
@@ -475,8 +471,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
- MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI);
@@ -537,8 +532,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
- Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
if (Res) break;
if (Bytes.size() < 4) break;
@@ -568,8 +562,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
- Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
if (Res)
break;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 70b99fab2631a51..4e14219ffc80d8c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -144,17 +144,6 @@ class AMDGPUDisassembler : public MCDisassembler {
return MCDisassembler::Fail;
}
- template <typename InsnType>
- DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
- MCInst &MI, InsnType Inst, uint64_t Address,
- raw_ostream &Comments) const {
- for (const uint8_t *T : {Table1, Table2}) {
- if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
- return Res;
- }
- return MCDisassembler::Fail;
- }
-
std::optional<DecodeStatus>
onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &CStream) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1ebfa297f4fc339..1032f7a95d791d6 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1408,7 +1408,6 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
case AMDGPU::V_MAX_F32_e64:
case AMDGPU::V_MAX_F16_e64:
case AMDGPU::V_MAX_F16_t16_e64:
- case AMDGPU::V_MAX_F16_fake16_e64:
case AMDGPU::V_MAX_F64_e64:
case AMDGPU::V_PK_MAX_F16: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
@@ -1504,8 +1503,7 @@ static int getOModValue(unsigned Opc, int64_t Val) {
}
}
case AMDGPU::V_MUL_F16_e64:
- case AMDGPU::V_MUL_F16_t16_e64:
- case AMDGPU::V_MUL_F16_fake16_e64: {
+ case AMDGPU::V_MUL_F16_t16_e64: {
switch (static_cast<uint16_t>(Val)) {
case 0x3800: // 0.5
return SIOutMods::DIV2;
@@ -1532,14 +1530,12 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_MUL_F64_e64:
case AMDGPU::V_MUL_F32_e64:
case AMDGPU::V_MUL_F16_t16_e64:
- case AMDGPU::V_MUL_F16_fake16_e64:
case AMDGPU::V_MUL_F16_e64: {
// If output denormals are enabled, omod is ignored.
if ((Op == AMDGPU::V_MUL_F32_e64 &&
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
- Op == AMDGPU::V_MUL_F16_t16_e64 ||
- Op == AMDGPU::V_MUL_F16_fake16_e64) &&
+ Op == AMDGPU::V_MUL_F16_t16_e64) &&
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
return std::pair(nullptr, SIOutMods::NONE);
@@ -1569,14 +1565,12 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_ADD_F64_e64:
case AMDGPU::V_ADD_F32_e64:
case AMDGPU::V_ADD_F16_e64:
- case AMDGPU::V_ADD_F16_t16_e64:
- case AMDGPU::V_ADD_F16_fake16_e64: {
+ case AMDGPU::V_ADD_F16_t16_e64: {
// If output denormals are enabled, omod is ignored.
if ((Op == AMDGPU::V_ADD_F32_e64 &&
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
- Op == AMDGPU::V_ADD_F16_t16_e64 ||
- Op == AMDGPU::V_ADD_F16_fake16_e64) &&
+ Op == AMDGPU::V_ADD_F16_t16_e64) &&
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
return std::pair(nullptr, SIOutMods::NONE);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a34bb02d9ae4dce..9eb341212554f62 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2262,7 +2262,6 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field list<ValueType> ArgVT = _ArgVT;
field bit EnableClamp = _EnableClamp;
field bit IsTrue16 = 0;
- field bit IsRealTrue16 = 0;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
@@ -2454,21 +2453,6 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this
// class, so copy changes to this class in those profiles
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
- let IsTrue16 = 1;
- let IsRealTrue16 = 1;
- // Most DstVT are 16-bit, but not all.
- let DstRC = getVALUDstForVT_t16<DstVT>.ret;
- let DstRC64 = getVALUDstForVT<DstVT>.ret;
- let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
- let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
- let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
- let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
-}
-
-class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
let IsTrue16 = 1;
// Most DstVT are 16-bit, but not all
let DstRC = getVALUDstForVT_t16<DstVT>.ret;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 245808fc22a9c99..5549652aae90bcb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1641,10 +1641,8 @@ def : ClampPat<V_MAX_F32_e64, f32>;
def : ClampPat<V_MAX_F64_e64, f64>;
let SubtargetPredicate = NotHasTrue16BitInsts in
def : ClampPat<V_MAX_F16_e64, f16>;
-let SubtargetPredicate = UseRealTrue16Insts in
+let SubtargetPredicate = HasTrue16BitInsts in
def : ClampPat<V_MAX_F16_t16_e64, f16>;
-let SubtargetPredicate = UseFakeTrue16Insts in
-def : ClampPat<V_MAX_F16_fake16_e64, f16>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
@@ -2698,12 +2696,12 @@ def : GCNPat<
let OtherPredicates = [HasTrue16BitInsts] in {
def : GCNPat<
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
+ (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
>;
def : GCNPat<
(fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
- (V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
+ (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
>;
} // End OtherPredicates
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 00e4701e33bf521..7684e6a7a9b10b4 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -152,7 +152,7 @@ multiclass VOP1Inst_t16<string opName,
defm NAME : VOP1Inst<opName, P, node>;
}
let OtherPredicates = [HasTrue16BitInsts] in {
- defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
+ defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
}
}
@@ -170,7 +170,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
}
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
- VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
+ VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod);
@@ -199,7 +199,7 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
-def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
+def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
let HasOMod = 1;
}
@@ -292,13 +292,13 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
let OtherPredicates = [HasTrue16BitInsts] in
- defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+ defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
} // End FPDPRounding = 1, isReMaterializable = 0
let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
let OtherPredicates = [HasTrue16BitInsts] in
-defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 8d2bae626d285cf..83df7a3cca4cbe2 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -194,12 +194,9 @@ multiclass VOP2Inst_t16<string opName,
let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in {
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
}
- let SubtargetPredicate = UseRealTrue16Insts in {
+ let SubtargetPredicate = HasTrue16BitInsts in {
defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
}
- let SubtargetPredicate = UseFakeTrue16Insts in {
- defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>;
- }
}
// Creating a _t16_e32 pseudo when there is no corresponding real instruction on
@@ -215,7 +212,7 @@ multiclass VOP2Inst_e64_t16<string opName,
defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
}
let SubtargetPredicate = HasTrue16BitInsts in {
- defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>;
+ defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
}
}
@@ -877,7 +874,7 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
let HasSrc1FloatMods = 0;
let Src1ModSDWA = Int16SDWAInputMods;
}
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
let Src1DPP = VGPR_32_Lo128;
let Src1ModDPP = IntT16VRegInputMods;
@@ -928,9 +925,9 @@ def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
let SubtargetPredicate = isGFX11Plus in {
let isCommutable = 1 in {
- defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
- defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
- defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
+ defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
+ defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
+ defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX11Plus
@@ -1310,8 +1307,6 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName,
string asmName, bit single = 0> {
defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "GFX11", "GFX11_FAKE16"),
- AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in
def _e32_gfx11 :
VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>,
VOP2e<op{5-0}, ps.Pfl> {
@@ -1336,8 +1331,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
SIEncodingFamily.GFX11> {
let AsmString = asmName # ps.Pfl.AsmDPP16;
- let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPPGFX11", "DPPGFX11_FAKE16");
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only);
+ let DecoderNamespace = "DPPGFX11";
}
}
multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
@@ -1346,8 +1340,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
if ps.Pfl.HasExtDPP then
def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
let AsmString = asmName # ps.Pfl.AsmDPP8;
- let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPP8GFX11", "DPP8GFX11_FAKE16");
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only);
+ let DecoderNamespace = "DPP8GFX11";
}
}
@@ -1498,19 +1491,13 @@ defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f,
defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>;
defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
-defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">;
defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
-defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">;
defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
-defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">;
defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
-defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">;
defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">;
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
-defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
-defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">;
defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 395bb01f867cf3a..90ba6e298429f4c 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1358,8 +1358,6 @@ let AssemblerPredicate = isGFX11Only,
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3OpSel_gfx11<op, ps.Pfl>;
if !not(ps.Pfl.HasOpSel) then
- let DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "GFX11", "GFX11_FAKE16"),
- AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in
def _e64_gfx11 :
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3e_gfx11<op, ps.Pfl>;
@@ -1390,9 +1388,7 @@ let AssemblerPredicate = isGFX11Only,
multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- let AsmString = asmName # ps.Pfl.AsmVOP3DPP16,
- DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPPGFX11", "DPPGFX11_FAKE16"),
- AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in {
+ let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in {
defm NAME : VOP3_Real_dpp_Base_gfx11<op, opName>;
}
}
@@ -1415,9 +1411,7 @@ let AssemblerPredicate = isGFX11Only,
multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
- let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
- DecoderNamespace = !if(ps.Pfl.IsRealTrue16, "DPP8GFX11", "DPP8GFX11_FAKE16"),
- AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, isGFX11Only) in {
+ let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in {
defm NAME : VOP3_Real_dpp8_Base_gfx11<op, opName>;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir
index c2c6b235cf5b210..ec50b56bebc76a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir
@@ -18,16 +18,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
- ;
+ ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %4
; GFX11-LABEL: name: fmaxnum_ieee_f16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]]
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -50,16 +49,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
- ;
+ ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %5
; GFX11-LABEL: name: fmaxnum_ieee_f16_v_fneg_v
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]]
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir
index 8c658a6c2820152..27f275b58eceb2b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir
@@ -18,16 +18,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
- ;
+ ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %4
; GFX11-LABEL: name: fmaxnum_f16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]]
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -50,16 +49,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]]
- ;
+ ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %5
; GFX11-LABEL: name: fmaxnum_f16_v_fneg_v
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MAX_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_fake16_e64_]]
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir
index 0878d894645bfec..17687956044cc2a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir
@@ -18,16 +18,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]]
- ;
+ ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %4
; GFX11-LABEL: name: fminnum_ieee_f16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]]
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -50,16 +49,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]]
- ;
+ ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %5
; GFX11-LABEL: name: fminnum_ieee_f16_v_fneg_v
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]]
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir
index 5eb4d263a111d78..34b50f60f7ed75e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir
@@ -18,16 +18,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]]
- ;
+ ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %4
; GFX11-LABEL: name: fminnum_f16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]]
+ ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %4
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -50,16 +49,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]]
- ;
+ ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit %5
; GFX11-LABEL: name: fminnum_f16_v_fneg_v
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_MIN_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_fake16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_fake16_e64_]]
+ ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit %5
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
index 3f4b039a976ed26..3628c6cc8a52031 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll
@@ -534,9 +534,9 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_half(half inre
; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: [[V_ADD_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_fake16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GISEL-GFX11-NEXT: [[V_ADD_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_F16_fake16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
+ ; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_F16_t16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
; GISEL-GFX11-NEXT: S_ENDPGM 0
;
; GISEL-GFX10-LABEL: name: amdgpu_cs_chain_preserve_cc_half
@@ -558,10 +558,10 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_half(half inre
; DAGISEL-GFX11-NEXT: {{ $}}
; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; DAGISEL-GFX11-NEXT: [[V_ADD_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; DAGISEL-GFX11-NEXT: [[V_ADD_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_t16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; DAGISEL-GFX11-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_F16_fake16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
+ ; DAGISEL-GFX11-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_F16_t16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
;
; DAGISEL-GFX10-LABEL: name: amdgpu_cs_chain_preserve_cc_half
@@ -589,9 +589,9 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a,
; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: [[V_ADD_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_fake16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GISEL-GFX11-NEXT: [[V_ADD_F16_t16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]]
- ; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_F16_fake16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
+ ; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_F16_t16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`)
; GISEL-GFX11-NEXT: S_ENDPGM 0
;
; GISEL-GFX10-LABEL: name: amdgpu_cs_chain_cc_bfloat
More information about the llvm-commits
mailing list