[llvm] [AMDGPU] Only try DecoderTables for the current subtarget. NFCI. (PR #82992)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 26 04:09:11 PST 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/82992
Speed up disassembly by only calling tryDecodeInst for DecoderTables
that make sense for the current subtarget.
This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my
Release+Asserts build.
>From 9d4151d7dff8af7854fb1052024b34afacf91cbe Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 22 Feb 2024 11:16:53 +0000
Subject: [PATCH] [AMDGPU] Only try DecoderTables for the current subtarget.
NFCI.
Speed up disassembly by only calling tryDecodeInst for DecoderTables
that make sense for the current subtarget.
This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my
Release+Asserts build.
---
.../Disassembler/AMDGPUDisassembler.cpp | 45 ++++++++++++-------
.../AMDGPU/Disassembler/AMDGPUDisassembler.h | 1 +
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 6 +--
3 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index e1cca17bdbf432..8c42304ce0bee5 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -466,15 +466,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- if (tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
DecW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
DecW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
break;
}
@@ -507,27 +510,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
+ if ((isVI() || isGFX9()) &&
+ tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
+ if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
+ if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
break;
}
@@ -538,13 +546,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Bytes.size() >= 4) {
const uint32_t DW = eatBytes<uint32_t>(Bytes);
- if (tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
+ if ((isVI() || isGFX9()) &&
+ tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
break;
if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
+ if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
break;
if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
@@ -555,14 +564,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
+ if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+ if (isGFX11() &&
+ tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
Address, CS))
break;
- if (tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+ if (isGFX12() &&
+ tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
Address, CS))
break;
}
@@ -1750,6 +1761,10 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
return AMDGPU::isGFX11Plus(STI);
}
+bool AMDGPUDisassembler::isGFX12() const {
+ return STI.hasFeature(AMDGPU::FeatureGFX12);
+}
+
bool AMDGPUDisassembler::isGFX12Plus() const {
return AMDGPU::isGFX12Plus(STI);
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 2e1b6fb1c740b7..6a4cb120872089 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -275,6 +275,7 @@ class AMDGPUDisassembler : public MCDisassembler {
bool isGFX10Plus() const;
bool isGFX11() const;
bool isGFX11Plus() const;
+ bool isGFX12() const;
bool isGFX12Plus() const;
bool hasArchitectedFlatScratch() const;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index fe4db0ebb0262d..cc374fbae7cc56 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -527,7 +527,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
let ssamp = 0 in {
if op.HAS_GFX10M then {
def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
- !if(enableDisasm, "GFX10", "")>;
+ !if(enableDisasm, "GFX8", "")>;
if !not(ExtendedImageInst) then
def _V1_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VGPR_32,
!if(enableDisasm, "GFX90A", "")>;
@@ -754,7 +754,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
let ssamp = 0 in {
if op.HAS_GFX10M then {
def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "GFX10", "")>;
+ !if(enableDisasm, "GFX8", "")>;
let hasPostISelHook = 1 in
def _V1_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VGPR_32,
!if(enableDisasm, "GFX90A", "")>;
@@ -1298,7 +1298,7 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
if op.HAS_GFX10M then {
def _V # addr.NumWords
: MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
- !if(!and(enableDisasm, addr.Disassemble), "GFX10", "")>;
+ !if(!and(enableDisasm, addr.Disassemble), "GFX8", "")>;
if !not(ExtendedImageInst) then
def _V # addr.NumWords # _gfx90a
: MIMG_Sampler_gfx90a <op, asm, dst_rc, addr.RegClass,
More information about the llvm-commits
mailing list