[llvm] [AMDGPU] Update VOP instructions for GFX12 (PR #74853)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 8 07:41:24 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-mc
Author: Mariusz Sikora (mariusz-sikora-at-amd)
<details>
<summary>Changes</summary>
---
Patch is 2.34 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74853.diff
33 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+37-2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+21)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+5-1)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+238-172)
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+414-245)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+149-99)
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+100-65)
- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+316-273)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+254-145)
- (added) llvm/test/CodeGen/AMDGPU/move-to-valu-lshlrev.mir (+32)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop1.s (+3545)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s (+2816)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s (+605)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop2.s (+2560)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s (+19)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s (+2006)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s (+433)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+5983)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop3_aliases.s (+49)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+4695)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+2968)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1.txt (+3302)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt (+2606)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt (+374)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt (+2228)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt (+1696)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt (+244)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+5497)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+4112)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+2631)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3c9f9cfd834fa..cbfcab4c97330 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -201,6 +201,8 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHLREV_B64_e32_gfx12:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx12:
case AMDGPU::V_LSHL_B64_e64:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 092845d391a3b..b5ea3376cc024 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3411,6 +3411,8 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
case AMDGPU::V_LSHLREV_B64_e64_gfx11:
+ case AMDGPU::V_LSHLREV_B64_e32_gfx12:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx12:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
case AMDGPU::V_LSHRREV_B64_e64_gfx11:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1f11beb71101b..fc276d337bc62 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -493,6 +493,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
+ Res =
+ tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
MI, DecW, Address, CS);
if (Res) {
@@ -506,6 +512,19 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
break;
}
+ Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
+ MI, DecW, Address, CS);
+ if (Res) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
+ convertVOP3PDPPInst(MI);
+ else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ convertVOPCDPPInst(MI); // Special VOP3 case
+ else {
+ assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
+ convertVOP3DPPInst(MI); // Regular VOP3 case
+ }
+ break;
+ }
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
if (Res)
break;
@@ -543,6 +562,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
MI = MCInst(); // clear
+ Res = tryDecodeInst(DecoderTableDPP8GFX1264,
+ DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
@@ -554,6 +579,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
}
+ Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
+ MI, QW, Address, CS);
+ if (Res) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
+ convertVOPCDPPInst(MI);
+ break;
+ }
+
Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
@@ -612,7 +645,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1232, MI, DW, Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+ Address, CS);
if (Res)
break;
@@ -643,7 +677,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1264, MI, QW, Address, CS);
+ Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+ Address, CS);
if (Res)
break;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 7ba015cdea241..57f74ae08b35c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -416,6 +416,15 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
printDefaultVccOperand(false, STI, O);
break;
}
@@ -807,6 +816,18 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_CNDMASK_B32_e32_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
+ case AMDGPU::V_CNDMASK_B32_dpp_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
+ case AMDGPU::V_CNDMASK_B32_dpp8_gfx12:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
case AMDGPU::V_CNDMASK_B32_e32_vi:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0a06fa88b6b10..dda54a0929c19 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6869,7 +6869,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
break;
case AMDGPU::S_LSHL_B64:
if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHLREV_B64_e64;
+ NewOpcode = ST.getGeneration() >= AMDGPUSubtarget::GFX12
+ ? AMDGPU::V_LSHLREV_B64_pseudo_e64
+ : AMDGPU::V_LSHLREV_B64_e64;
swapOperands(Inst);
}
break;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 68d561a0d9f78..f9b452ccdf449 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -470,11 +470,13 @@ bool isMAC(unsigned Opc) {
Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
+ Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
+ Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
@@ -485,7 +487,9 @@ bool isPermlane16(unsigned Opc) {
return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
- Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
+ Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
+ Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
+ Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12;
}
bool isGenericAtomic(unsigned Opc) {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 53b0513c85d88..27a7c29cb1ac9 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -88,6 +88,12 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
let TRANS = ps.TRANS;
}
+class VOP1_Real_Gen <VOP1_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
+ VOP1_Real <ps, Gen.Subtarget, real_name> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = Gen.DecoderNamespace;
+}
+
class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
VOP_SDWA_Pseudo <OpName, P, pattern> {
let AsmMatchConverter = "cvtSdwaVOP1";
@@ -688,6 +694,13 @@ class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = p
let SubtargetPredicate = HasDPP16;
}
+class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> :
+ VOP1_DPP16 <op, ps, Gen.Subtarget, p> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+}
+
+
class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
VOP_DPP8<ps.OpName, p> {
let hasSideEffects = ps.hasSideEffects;
@@ -702,138 +715,173 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
let Inst{31-25} = 0x3f;
}
+class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> :
+ VOP1_DPP8<op, ps, p> {
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+}
+
//===----------------------------------------------------------------------===//
-// GFX11.
+// GFX11, GFX12
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
- multiclass VOP1Only_Real_gfx11<bits<9> op> {
- let IsSingle = 1 in
- def _gfx11 :
- VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>,
- VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
- }
- multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- def _e32_gfx11 :
- VOP1_Real<ps, SIEncodingFamily.GFX11>,
- VOP1e<op{7-0}, ps.Pfl>;
- }
- multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.AsmOperands in {
- defm NAME : VOP1_Real_e32_gfx11<op, opName>;
- }
- }
- multiclass VOP1_Real_e64_gfx11<bits<9> op> {
- def _e64_gfx11 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>,
- VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
- }
- multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> {
- let DecoderNamespace = "DPPGFX11";
- }
- }
- multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in {
- defm NAME : VOP1_Real_dpp_gfx11<op, opName>;
- }
+multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> {
+ let IsSingle = 1 in
+ def Gen.Suffix :
+ VOP1_Real_Gen<!cast<VOP1_Pseudo>(NAME), Gen>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _e32#Gen.Suffix :
+ VOP1_Real_Gen<ps, Gen>,
+ VOP1e<op{7-0}, ps.Pfl>;
+}
+
+multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.AsmOperands in {
+ defm NAME : VOP1_Real_e32<Gen, op, opName>;
}
- multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> {
- let DecoderNamespace = "DPP8GFX11";
- }
+}
+
+multiclass VOP1_Real_e64<GFXGen Gen, bits<9> op> {
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>,
+ VOP3e_gfx11_gfx12<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _dpp#Gen.Suffix : VOP1_DPP16_Gen<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), Gen>;
+}
+
+multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16 in {
+ defm NAME : VOP1_Real_dpp<Gen, op, opName>;
}
- multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in {
- defm NAME : VOP1_Real_dpp8_gfx11<op, opName>;
- }
+}
+
+multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _dpp8#Gen.Suffix : VOP1_DPP8_Gen<op{7-0}, ps, Gen>;
+}
+
+multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8 in {
+ defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
}
-} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+}
-multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> {
- defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>;
+multiclass VOP1_Realtriple_e64<GFXGen Gen, bits<9> op> {
+ defm NAME : VOP3_Realtriple<Gen, {0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>;
}
-multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName,
+
+multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
- defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName,
+ defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 1, op{6-0}}, opName,
asmName>;
}
-multiclass VOP1_Real_FULL_gfx11<bits<9> op> :
- VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>,
- VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>;
+multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> :
+ VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>,
+ VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>;
multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName,
- string asmName> {
- defm NAME : VOP1_Real_e32_with_name_gfx11<op, opName, asmName>,
- VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>,
- VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>;
+ string asmName> {
+ defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>;
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
def gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>,
Requires<[isGFX11Plus]>;
}
-multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName,
+multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName,
+ string asmName> {
+ defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>;
+}
+
+multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> :
- VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>,
- VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>;
+ VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>,
+ VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>;
-multiclass VOP1_Real_FULL_t16_gfx11<bits<9> op, string asmName,
- string opName = NAME> :
- VOP1_Real_FULL_with_name_gfx11<op, opName, asmName>;
+multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> :
+ VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>;
-multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> :
- VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>;
+multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName,
+ string opName = NAME> :
+ VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
-defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c,
+multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
+ string asmName> :
+ VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
+ VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
+
+multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> :
+ VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>;
+
+multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
+ string opName, string asmName> :
+ VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
+ VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
+
+
+defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
-defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d,
+defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d,
"V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">;
-defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039,
+defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039,
"V_FFBH_U32", "v_clz_i32_u32">;
-defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a,
+defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
"V_FFBL_B32", "v_ctz_i32_b32">;
-defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b,
+defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
"V_FFBH_I32", "v_cls_i32">;
-defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>;
-defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11<0x01c, "v_mov_b16">;
-defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11<0x069, "v_not_b16">;
-defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x06a, "v_cvt_i32_i16">;
-defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x06b, "v_cvt_u32_u16">;
-
-defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x050, "v_cvt_f16_u16">;
-defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x051, "v_cvt_f16_i16">;
-defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x052, "v_cvt_u16_f16">;
-defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x053, "v_cvt_i16_f16">;
-defm V_RCP_F16_t16 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/74853
More information about the llvm-commits
mailing list