[llvm] [AMDGPU][True16][MC] true16 for v_cmp_lt_f16 (PR #122499)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 12:47:02 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
True16 format for v_cmp_lt_f16. Update VOPC t16 and fake16 pseudo.
---
Patch is 323.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122499.diff
33 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+17-2)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h (+1)
- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+116-62)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir (+4-3)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s (+69-54)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vopc.s (+35-20)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s (+28-8)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc.s (+100-60)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s (+160-56)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s (+36-12)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s (+108-42)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s (+110-44)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s (+30-10)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s (+77-62)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s (+43-28)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc.s (+92-60)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s (+152-56)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s (+28-12)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s (+108-42)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s (+142-54)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vopc.txt (+72-31)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vopc.txt (+36-13)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vopc.txt (+24-8)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt (+114-34)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt (+78-32)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt (+30-8)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c.txt (+25-8)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt (+76-33)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt (+40-15)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt (+88-34)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt (+72-32)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt (+25-8)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index d2363274965a3c..31f47b6125bd74 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -668,9 +668,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
- else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
- AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI); // Special VOP3 case
+ else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ convertVOPC64DPPInst(MI); // Special VOP3 case
else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
-1)
convertDPP8Inst(MI);
@@ -1254,6 +1255,20 @@ void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
AMDGPU::OpName::src1_modifiers);
}
+void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
+ unsigned Opc = MI.getOpcode();
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+
+ convertTrue16OpSel(MI);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ auto Mods = collectVOPModifiers(MI);
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+ AMDGPU::OpName::op_sel);
+ }
+}
+
void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 9a06cc3dc8c782..29452166e21a00 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -210,6 +210,7 @@ class AMDGPUDisassembler : public MCDisassembler {
void convertVOP3DPPInst(MCInst &MI) const;
void convertVOP3PDPPInst(MCInst &MI) const;
void convertVOPCDPPInst(MCInst &MI) const;
+ void convertVOPC64DPPInst(MCInst &MI) const;
void convertMacDPPInst(MCInst &MI) const;
void convertTrue16OpSel(MCInst &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 8589d598f58702..842647a2a2b75d 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -89,23 +89,57 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
def _t16 : VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
- let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let HasOpSel = 1;
+ let HasModifiers = 1; // All instructions at least have OpSel
+ let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_16;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+
+ let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 1/*IsVOP3Encoding*/>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
}
def _fake16: VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_32;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
+
+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}
@@ -283,7 +317,9 @@ class getVOPCPat64 <SDPatternOperator cond, VOPProfile P> : LetDummies {
(setcc (P.Src0VT
!if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
+ !if(P.HasClamp,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)))),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
@@ -324,6 +360,10 @@ multiclass VOPC_Pseudos <string opName,
let SchedRW = P.Schedule;
let isCompare = 1;
let isCommutable = 1;
+ let AsmMatchConverter =
+ !if (P.HasOpSel, "cvtVOP3OpSel",
+ !if (!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), "cvtVOP3",
+ ""));
}
if P.HasExtSDWA then
@@ -1344,29 +1384,9 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
// VOPC64
-class VOPC64_DPP_Base<bits<10> op, string OpName, VOPProfile P>
- : VOP3_DPP_Base<OpName, P, 1>, VOP3_DPPe_Common<op, P> {
+class VOPC64_DPP<VOP_DPP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP_Base<opName, ps.Pfl, 1> {
Instruction Opcode = !cast<Instruction>(NAME);
-
- bits<8> src0;
- bits<9> dpp_ctrl;
- bits<1> bound_ctrl;
- bits<4> bank_mask;
- bits<4> row_mask;
- bit fi;
-
- let Inst{40-32} = 0xfa;
- let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
- let Inst{80-72} = dpp_ctrl;
- let Inst{82} = fi;
- let Inst{83} = bound_ctrl;
- // Inst{87-84} ignored by hw
- let Inst{91-88} = bank_mask;
- let Inst{95-92} = row_mask;
-}
-
-class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
let True16Predicate = ps.True16Predicate;
@@ -1380,32 +1400,28 @@ class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
class VOPC64_DPP16_Dst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
- : VOPC64_DPP16<op, ps, opName> {
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
bits<8> sdst;
let Inst{7-0} = sdst;
}
class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
- : VOPC64_DPP16<op, ps, opName> {
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
let Inst{7-0} = ? ;
}
-class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
- : VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P> {
- Instruction Opcode = !cast<Instruction>(NAME);
-
- bits<8> src0;
- bits<24> dpp8;
- bits<9> fi;
-
- let Inst{40-32} = fi;
- let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
- let Inst{95-72} = dpp8{23-0};
+class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Inst{14} = 0;
}
-class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP8_Base<op, opName, ps.Pfl> {
+class VOPC64_DPP8<VOP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP8_Base<opName, ps.Pfl> {
+ Instruction Opcode = !cast<Instruction>(NAME);
// Note ps is the non-dpp pseudo
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
@@ -1416,18 +1432,26 @@ class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
}
class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP8<op, ps, opName> {
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
bits<8> sdst;
let Inst{7-0} = sdst;
let Constraints = "";
}
class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
- : VOPC64_DPP8<op, ps, opName> {
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
let Inst{7-0} = ? ;
let Constraints = "";
}
+class VOPC64_DPP8_Dst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Inst{14} = 0;
+ let Constraints = "";
+}
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
@@ -1442,7 +1466,7 @@ multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
VOPCe<op{7-0}>;
- def _e64#Gen.Suffix : VOP3_Real<ps64, Gen.Subtarget>,
+ def _e64#Gen.Suffix : VOP3_Real_Gen<ps64, Gen>,
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
@@ -1508,13 +1532,25 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
// the destination-less 32bit forms add it to the asmString here.
VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
VOPCe<op{7-0}>;
- def _e64#Gen.Suffix :
- VOP3_Real_Gen<ps64, Gen, asm_name>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
- // Encoding used for VOPC instructions encoded as VOP3 differs from
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
- bits<8> sdst;
- let Inst{7-0} = sdst;
+ if ps64.Pfl.IsRealTrue16 then {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps64, Gen, asm_name>,
+ VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Inst{14} = 0;
+ }
+ } else {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps64, Gen, asm_name>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
}
defm : VOPCInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
@@ -1554,9 +1590,15 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
- def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ if ps64.Pfl.IsRealTrue16 then {
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst_t16<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst_t16<{0, op}, ps64, asm_name>;
+ } else {
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ }
} // end if ps64.Pfl.HasExtVOP3DPP
} // End DecoderNamespace
} // End AssemblerPredicate
@@ -1693,11 +1735,23 @@ multiclass VOPC_Real_t16_gfx11<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
+multiclass VOPC_Real_t16_and_fake16_gfx11<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> {
+ defm _t16: VOPC_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+ defm _fake16: VOPC_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
multiclass VOPC_Real_t16_gfx11_gfx12<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPC_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
+multiclass VOPC_Real_t16_and_fake16_gfx11_gfx12<bits <9> op, string asm_name,
+ string OpName = NAME, string pseudo_mnemonic = ""> {
+ defm _t16: VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
+ defm _fake16: VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
+}
+
multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
@@ -1708,7 +1762,7 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
+defm V_CMP_LT_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
defm V_CMP_LE_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
defm V_CMP_GT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 930ed9a5e2d0b3..3b5358b737aa4c 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -2033,7 +2033,7 @@ def VOP2InfoTable : VOPInfoTable<"VOP2">;
def VOP3InfoTable : VOPInfoTable<"VOP3">;
class VOPC64Table <string Format> : GenericTable {
- let FilterClass = "VOPC64_" # Format # "_Base";
+ let FilterClass = "VOPC64_" # Format;
let CppTypeName = "VOPC64DPPInfo";
let Fields = ["Opcode"];
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
index 4604518d71c961..7df17cfd586ca2 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
-#
+# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+# FIXME. reenable after fix-sgpr-copies is updated for true16 flow
---
name: cmp_f16
@@ -11,7 +11,8 @@ body: |
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_U16_t16_e64_]]
- ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, killed [[COPY]], 0, [[DEF1]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY killed [[COPY]]
+ ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[COPY1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
%0:vgpr_16 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s
index 1b9092d30b1b70..798616cef66398 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vopc.s
@@ -3428,112 +3428,127 @@ v_cmp_lg_f32_e64_dpp ttmp[14:15], -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cmp_lg_f32_e64_dpp null, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0x7c,0x83,0x15,0xd4,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cmp_lt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
// W64-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
-v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3]
-// W32: v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cmp_lt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3]
+// W32: v_cmp_lt_f16_e64_dpp s5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
// W64-ERR: :[[@LINE-2]]:22: error: invalid oper...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/122499
More information about the llvm-commits
mailing list