[llvm] [AMDGPU] Stop using SDWA DecoderNamespaces. NFCI. (PR #82233)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 19 02:37:03 PST 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/82233
64-bit SDWA encodings have to be checked first because their first 32
bits are a special case of the corresponding 32-bit non-SDWA encoding of
the same instruction. But all 64-bit encodings are checked first, so we
don't need special handling for SDWA.
>From a1177d3201f18655b577db0ea2320aefafa4c758 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 16 Feb 2024 17:38:45 +0000
Subject: [PATCH 1/2] [AMDGPU] Try decoding instructions longest first. NFCI.
AMDGPUDisassembler::getInstruction tries decoding instructions using
different DecoderTables in a confusing order: first 96-bit instructions,
then some 64-bit, then 32-bit, then some more 64-bit.
This patch changes it to always try longer encodings first. The
motivation is to make getInstruction easier to understand, and to pave
the way for combining some 64-bit tables that do not need to be
separate.
---
.../Disassembler/AMDGPUDisassembler.cpp | 89 ++++++++++---------
llvm/lib/Target/AMDGPU/SOPInstructions.td | 3 +
2 files changed, 49 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 98988f881f1b44..dcd036248c4300 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -585,6 +585,52 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res)
break;
}
+
+ Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
+ Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
+ if (Res)
+ break;
+ }
+
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
+ if (Res)
+ break;
+ }
+
+ Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI,
+ QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI,
+ QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS);
+ if (Res)
+ break;
}
// Reinitialize Bytes as DPP64 could have eaten too much
@@ -624,49 +670,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Address, CS);
if (Res)
break;
-
- if (Bytes.size() < 4) break;
- const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
-
- if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
- Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
- if (Res)
- break;
- }
-
- if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
- Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
- if (Res)
- break;
- }
-
- Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
- Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
- Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS);
- if (Res)
- break;
-
- Res = tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS);
} while (false);
if (Res && AMDGPU::isMAC(MI.getOpcode())) {
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1486df04ed950b..0fe2845f8edc31 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -2571,11 +2571,13 @@ multiclass SOPP_Real_32_gfx11_Renamed_gfx12<bits<7> op, string gfx12_name> :
multiclass SOPP_Real_With_Relaxation_gfx12<bits<7> op> {
defm "" : SOPP_Real_32_gfx12<op>;
+ let isCodeGenOnly = 1 in
defm _pad_s_nop : SOPP_Real_64_gfx12<op>;
}
multiclass SOPP_Real_With_Relaxation_gfx11<bits<7> op> {
defm "" : SOPP_Real_32_gfx11<op>;
+ let isCodeGenOnly = 1 in
defm _pad_s_nop : SOPP_Real_64_gfx11<op>;
}
@@ -2697,6 +2699,7 @@ multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> :
//relaxation for insts with no operands not implemented
multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> {
defm "" : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
+ let isCodeGenOnly = 1 in
defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
}
>From 7ad5baad2ec603d63e5ec0a6916b6bc08b3f025a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 19 Feb 2024 10:21:59 +0000
Subject: [PATCH 2/2] [AMDGPU] Stop using SDWA DecoderNamespaces. NFCI.
64-bit SDWA encodings have to be checked first because their first 32
bits are a special case of the corresponding 32-bit non-SDWA encoding of
the same instruction. But all 64-bit encodings are checked first, so we
don't need special handling for SDWA.
---
.../Disassembler/AMDGPUDisassembler.cpp | 13 +---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 8 +--
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 70 ++++++++-----------
llvm/lib/Target/AMDGPU/VOPInstructions.td | 6 +-
4 files changed, 36 insertions(+), 61 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index dcd036248c4300..014a83b345ffd1 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -447,8 +447,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
raw_ostream &CS) const {
- bool IsSDWA = false;
-
unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -562,15 +560,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
break;
}
- Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
-
- Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
-
- Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
- if (Res) { IsSDWA = true; break; }
-
if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
if (Res)
@@ -767,7 +756,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
Res = convertVINTERPInst(MI);
- if (Res && IsSDWA)
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA))
Res = convertSDWAInst(MI);
int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5461c645e608fe..6081e167fd939e 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -980,15 +980,11 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
- let DecoderNamespace = "SDWA10";
- }
+ VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
- def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
- let DecoderNamespace = "SDWA10";
- }
+ def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>;
}
multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index eba9bf64884ec8..4437d5f2a03338 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1740,15 +1740,11 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
- let DecoderNamespace = "SDWA10";
- }
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
- let DecoderNamespace = "SDWA10";
- }
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>;
}
multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
@@ -1777,35 +1773,33 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let AsmString = asmName # ps.AsmOperands;
}
}
- let DecoderNamespace = "SDWA10" in {
- multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
- string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
- def _sdwa_gfx10 :
- VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
- VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
- VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
- let AsmString = asmName # ps.AsmOperands;
- }
- }
- multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
- string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
- VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP16;
+ multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # ps.AsmOperands;
}
+ }
+ multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16;
}
- multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
- string asmName> {
- if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
- def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
- VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8";
- }
+ }
+ multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8";
}
- } // End DecoderNamespace = "SDWA10"
+ }
//===------------------------------ VOP2be ------------------------------===//
multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> {
@@ -1832,7 +1826,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
- let DecoderNamespace = "SDWA10";
}
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_w32_gfx10 :
@@ -1841,9 +1834,8 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
let isAsmParserOnly = 1;
- let DecoderNamespace = "SDWA10";
let WaveSizePredicate = isWave32;
- }
+ }
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_w64_gfx10 :
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
@@ -1851,7 +1843,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
let AsmString = asmName # Ps.AsmOperands;
let isAsmParserOnly = 1;
- let DecoderNamespace = "SDWA10";
let WaveSizePredicate = isWave64;
}
}
@@ -1861,7 +1852,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
let AsmString = asmName # !subst(", vcc", "", AsmDPP);
- let DecoderNamespace = "SDWA10";
}
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp_w32_gfx10 :
@@ -2305,7 +2295,7 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
}
}
@@ -2329,7 +2319,7 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
}
}
@@ -2489,7 +2479,7 @@ let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in {
def _dpp_gfx90a :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
}
}
} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A"
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f2bb58ed4c3b56..4d10f1a8ec9410 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -616,7 +616,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let AssemblerPredicate = HasSDWA;
let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA";
+ let DecoderNamespace = "GFX8";
VOPProfile Pfl = P;
}
@@ -672,7 +672,7 @@ class Base_VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let AssemblerPredicate = HasSDWA9;
let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
AMDGPUAsmVariants.Disable);
- let DecoderNamespace = "SDWA9";
+ let DecoderNamespace = "GFX9";
// Copy relevant pseudo op flags
let AsmMatchConverter = ps.AsmMatchConverter;
@@ -693,7 +693,7 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
class Base_VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> : Base_VOP_SDWA9_Real<ps> {
let SubtargetPredicate = HasSDWA10;
let AssemblerPredicate = HasSDWA10;
- let DecoderNamespace = "SDWA10";
+ let DecoderNamespace = "GFX10";
}
class VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> :
More information about the llvm-commits
mailing list