[llvm] [AMDGPU] Only try DecoderTables for the current subtarget. NFCI. (PR #82992)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 26 04:09:11 PST 2024


https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/82992

Speed up disassembly by only calling tryDecodeInst for DecoderTables
that make sense for the current subtarget.

This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my
Release+Asserts build.

>From 9d4151d7dff8af7854fb1052024b34afacf91cbe Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 22 Feb 2024 11:16:53 +0000
Subject: [PATCH] [AMDGPU] Only try DecoderTables for the current subtarget.
 NFCI.

Speed up disassembly by only calling tryDecodeInst for DecoderTables
that make sense for the current subtarget.

This gives a 1.3x speed-up on check-llvm-mc-disassembler-amdgpu in my
Release+Asserts build.
---
 .../Disassembler/AMDGPUDisassembler.cpp       | 45 ++++++++++++-------
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  |  1 +
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    |  6 +--
 3 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index e1cca17bdbf432..8c42304ce0bee5 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -466,15 +466,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     if (isGFX11Plus() && Bytes.size() >= 12 ) {
       DecoderUInt128 DecW = eat12Bytes(Bytes);
 
-      if (tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
                         DecW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
                         DecW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
         break;
     }
 
@@ -507,27 +510,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
           tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
+      if ((isVI() || isGFX9()) &&
+          tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
+      if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
+      if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
                         Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
                         Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
         break;
     }
 
@@ -538,13 +546,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     if (Bytes.size() >= 4) {
       const uint32_t DW = eatBytes<uint32_t>(Bytes);
 
-      if (tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
+      if ((isVI() || isGFX9()) &&
+          tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
         break;
 
       if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
+      if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
         break;
 
       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
@@ -555,14 +564,16 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
           tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
+      if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+      if (isGFX11() &&
+          tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
                         Address, CS))
         break;
 
-      if (tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+      if (isGFX12() &&
+          tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
                         Address, CS))
         break;
     }
@@ -1750,6 +1761,10 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
   return AMDGPU::isGFX11Plus(STI);
 }
 
+bool AMDGPUDisassembler::isGFX12() const {
+  return STI.hasFeature(AMDGPU::FeatureGFX12);
+}
+
 bool AMDGPUDisassembler::isGFX12Plus() const {
   return AMDGPU::isGFX12Plus(STI);
 }
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 2e1b6fb1c740b7..6a4cb120872089 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -275,6 +275,7 @@ class AMDGPUDisassembler : public MCDisassembler {
   bool isGFX10Plus() const;
   bool isGFX11() const;
   bool isGFX11Plus() const;
+  bool isGFX12() const;
   bool isGFX12Plus() const;
 
   bool hasArchitectedFlatScratch() const;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index fe4db0ebb0262d..cc374fbae7cc56 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -527,7 +527,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
     let ssamp = 0 in {
       if op.HAS_GFX10M then {
         def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
-                                         !if(enableDisasm, "GFX10", "")>;
+                                         !if(enableDisasm, "GFX8", "")>;
         if !not(ExtendedImageInst) then
         def _V1_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VGPR_32,
                                        !if(enableDisasm, "GFX90A", "")>;
@@ -754,7 +754,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
       let ssamp = 0 in {
         if op.HAS_GFX10M then {
           def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
-                                      !if(enableDisasm, "GFX10", "")>;
+                                      !if(enableDisasm, "GFX8", "")>;
           let hasPostISelHook = 1 in
           def _V1_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VGPR_32,
                                       !if(enableDisasm, "GFX90A", "")>;
@@ -1298,7 +1298,7 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
       if op.HAS_GFX10M then {
         def _V # addr.NumWords
           : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
-                                 !if(!and(enableDisasm, addr.Disassemble), "GFX10", "")>;
+                                 !if(!and(enableDisasm, addr.Disassemble), "GFX8", "")>;
         if !not(ExtendedImageInst) then
         def _V # addr.NumWords # _gfx90a
           : MIMG_Sampler_gfx90a <op, asm, dst_rc, addr.RegClass,



More information about the llvm-commits mailing list