[llvm] [AMDGPU] Reduce duplication in FLAT atomic definitions (PR #85383)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 15 04:35:52 PDT 2024


https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/85383

This simplifies the case where the tablegen name of the defm for the
Real is the same as the name of the Pseudo.


>From c2a8862cfe2dfacc9d09cec493e0974edcfce1c4 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 14 Mar 2024 16:44:32 +0000
Subject: [PATCH] [AMDGPU] Reduce duplication in FLAT atomic definitions

This simplifies the case where the tablegen name of the defm for the
Real is the same as the name of the Pseudo.
---
 llvm/lib/Target/AMDGPU/FLATInstructions.td | 143 +++++++++++----------
 1 file changed, 73 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index f42d4ae416bd76..3ed74354242664 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1787,45 +1787,46 @@ def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
 def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
 def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
 
-multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
+multiclass FLAT_Real_Atomics_ci <bits<7> op> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
   def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
 }
 
-defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
-defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
-defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
-defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
-defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
-defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
-defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
-defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
-defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
-defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
-defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
-defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
-defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
-defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
-defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
-defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
-defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
-defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
-defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
-defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
-defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
-defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
-defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
-defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
-defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
-defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
+defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30>;
+defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31>;
+defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32>;
+defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33>;
+defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35>;
+defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36>;
+defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37>;
+defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38>;
+defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39>;
+defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a>;
+defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b>;
+defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c>;
+defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d>;
+defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50>;
+defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51>;
+defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52>;
+defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53>;
+defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55>;
+defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56>;
+defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57>;
+defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58>;
+defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59>;
+defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a>;
+defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b>;
+defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c>;
+defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d>;
 
 // CI Only flat instructions
-defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
-defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
-defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
-defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
-defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
-defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
+defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e>;
+defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f>;
+defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40>;
+defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e>;
+defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f>;
+defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60>;
 
 
 //===----------------------------------------------------------------------===//
@@ -1925,8 +1926,9 @@ def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
 def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
 def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
 
-multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
+multiclass FLAT_Real_Atomics_vi <bits<7> op,
   bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
   def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
 }
@@ -1939,32 +1941,32 @@ multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
 }
 
 
-defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
-defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
-defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
-defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
-defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
-defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
-defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
-defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
-defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
-defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
-defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
-defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
-defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
-defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
-defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
-defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
-defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
-defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
-defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
-defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
-defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
-defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
-defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
-defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
-defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
-defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
+defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40>;
+defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41>;
+defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42>;
+defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43>;
+defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44>;
+defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45>;
+defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46>;
+defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47>;
+defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48>;
+defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49>;
+defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a>;
+defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b>;
+defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c>;
+defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>;
+defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62>;
+defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63>;
+defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64>;
+defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65>;
+defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66>;
+defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67>;
+defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68>;
+defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69>;
+defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a>;
+defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b>;
+defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c>;
 
 defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
 defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
@@ -2060,9 +2062,9 @@ let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
 }
 
 let SubtargetPredicate = isGFX90AOnly in {
-  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
-  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
-  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
+  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, 0>;
+  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, 0>;
+  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, 0>;
   defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
   defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
   defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
@@ -2073,7 +2075,8 @@ multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
   def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
 }
 
-multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
+multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
+  defvar ps = !cast<FLAT_Pseudo>(NAME);
   def _gfx940     : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
   def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
 }
@@ -2089,15 +2092,15 @@ let SubtargetPredicate = isGFX940Plus in {
   defm GLOBAL_ATOMIC_ADD_F32     : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
   defm GLOBAL_ATOMIC_PK_ADD_F16  : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
 
-  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
-  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
-  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
+  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f>;
+  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50>;
+  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51>;
   defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
   defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
   defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
-  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
-  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
-  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
+  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d>;
+  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e>;
+  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52>;
   defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
 } // End SubtargetPredicate = isGFX940Plus
 



More information about the llvm-commits mailing list