[llvm] AMDGPU: Start fixing inconsistencies in usage of SubtargetPredicate (PR #96337)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 10:11:06 PDT 2024


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96337

SubtargetPredicate should be the primary "does this instruction exist"
predicate, with OtherPredicates used for other side pieces of information.

Changes like 856d1c4410 were backwards. The problematic usage is how
GFX12 is using HasRestrictedOffset. The multiclasses for buffers
should probably be split up instead of hiding OtherPredicates inside
the buffer atomic multiclasses. The two cases are mutually exclusive
and really need a negated predicate for the not-gfx12 case.

It's pretty terrible we have to manage this in the first place.
TableGen should be able to figure out the required predicates
from any instructions that appear in the pattern output.

>From dc0fa1e2d5d0660c9b629da76f09a371e4be6c6e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 21 Jun 2024 16:25:36 +0200
Subject: [PATCH] AMDGPU: Start fixing inconsistencies in usage of
 SubtargetPredicate

SubtargetPredicate should be the primary "does this instruction exist"
predicate, with OtherPredicates used for other side pieces of information.

Changes like 856d1c4410 were backwards. The problematic usage is how
GFX12 is using HasRestrictedOffset. The multiclasses for buffers
should probably be split up instead of hiding OtherPredicates inside
the buffer atomic multiclasses. The two cases are mutually exclusive
and really need a negated predicate for the not-gfx12 case.

It's pretty terrible we have to manage this in the first place.
TableGen should be able to figure out the required predicates
from any instructions that appear in the pattern output.
---
 llvm/lib/Target/AMDGPU/BUFInstructions.td | 66 ++++++++++-------------
 1 file changed, 29 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 21335e9b64647..639f071cafe50 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -532,7 +532,7 @@ multiclass MUBUF_Pseudo_Load_Pats_Common<string BaseInst, ValueType load_vt = i3
 }
 
 multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag>{
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst, load_vt, ld>;
   }
   defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst # "_VBUFFER", load_vt, ld>;
@@ -629,7 +629,7 @@ multiclass MUBUF_Pseudo_Store_Pats_Common<string BaseInst, ValueType store_vt =
 }
 
 multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst, store_vt, st>;
   }
   defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst # "_VBUFFER", store_vt, st>;
@@ -1227,12 +1227,12 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
   "buffer_atomic_pk_add_f16", VGPR_32, v2f16
 >;
 
-let OtherPredicates = [HasAtomicFaddRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
 defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
   "buffer_atomic_add_f32", VGPR_32, f32, null_frag
 >;
 
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
 defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
   "buffer_atomic_pk_add_f16", VGPR_32, v2f16, null_frag
 >;
@@ -1699,9 +1699,11 @@ multiclass SIBufferAtomicPat_Common<string OpPrefix, ValueType vt, string Inst,
 
 multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
                              list<string> RtnModes = ["ret", "noret"]> {
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst, RtnModes>;
   }
+
+  // FIXME: This needs a !HasUnrestrictedSOffset predicate
   defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst # "_VBUFFER", RtnModes>;
 }
 
@@ -1732,18 +1734,19 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
 defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
 
-let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
 defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
 
 let SubtargetPredicate = isGFX12Plus in {
   defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
   defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
+}
 
-  let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
-  defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in {
+defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
 }
 
-let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
+let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
 }
@@ -1803,29 +1806,21 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
   defm : BufferAtomicPatterns_NO_RTN_Common<name, vt, opcode # "_VBUFFER">;
 }
 
-let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;
 
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
-  let SubtargetPredicate = isGFX9Only in
-  defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
-
-  let SubtargetPredicate = isGFX12Plus in
-  defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["noret"]>;
-} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts]
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
+  defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
+} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts
 
-let OtherPredicates = [HasAtomicFaddRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;
 
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
-  let SubtargetPredicate = isGFX9Only in
-  defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
-
-  let SubtargetPredicate = isGFX12Plus in
-  defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["ret"]>;
-} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts]
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
+  defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
+} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
 
-let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
+let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
   defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
@@ -1901,7 +1896,7 @@ multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, stri
 }
 
 multiclass SIBufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst>;
   }
   defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst # "_VBUFFER">;
@@ -1952,7 +1947,7 @@ multiclass MUBUFLoad_PatternOffset_Common <string Instr, ValueType vt,
 
 multiclass MUBUFLoad_PatternOffset <string Instr, ValueType vt,
                                     PatFrag ld> {
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : MUBUFLoad_PatternOffset_Common<Instr, vt, ld>;
   }
   defm : MUBUFLoad_PatternOffset_Common<Instr # "_VBUFFER", vt, ld>;
@@ -2193,7 +2188,7 @@ multiclass MTBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
 
 multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
                                   string opcode, ValueType memoryVt = vt> {
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode, memoryVt>;
   }
   defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
@@ -2208,7 +2203,7 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
 
-let OtherPredicates = [HasUnpackedD16VMem] in {
+let SubtargetPredicate = HasUnpackedD16VMem in {
   defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, f16,   "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
   defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, i32,   "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
   defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
@@ -2216,7 +2211,7 @@ let OtherPredicates = [HasUnpackedD16VMem] in {
   defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
 } // End HasUnpackedD16VMem.
 
-let OtherPredicates = [HasPackedD16VMem] in {
+let SubtargetPredicate = HasPackedD16VMem in {
   defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16,   "TBUFFER_LOAD_FORMAT_D16_X">;
   defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32,   "TBUFFER_LOAD_FORMAT_D16_X">;
   defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
@@ -2265,7 +2260,7 @@ multiclass MTBUF_StoreIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
 
 multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
                                   string opcode, ValueType memoryVt = vt> {
-  let SubtargetPredicate = HasUnrestrictedSOffset in {
+  let OtherPredicates = [HasUnrestrictedSOffset] in {
     defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode, memoryVt>;
   }
   defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
@@ -2280,7 +2275,7 @@ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY"
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
 
-let OtherPredicates = [HasUnpackedD16VMem] in {
+let SubtargetPredicate = HasUnpackedD16VMem in {
   defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, f16,   "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
   defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, i32,   "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
   defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
@@ -2288,7 +2283,7 @@ let OtherPredicates = [HasUnpackedD16VMem] in {
   defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
 } // End HasUnpackedD16VMem.
 
-let OtherPredicates = [HasPackedD16VMem] in {
+let SubtargetPredicate = HasPackedD16VMem in {
   defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16,   "TBUFFER_STORE_FORMAT_D16_X">;
   defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32,   "TBUFFER_STORE_FORMAT_D16_X">;
   defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
@@ -3267,10 +3262,7 @@ defm BUFFER_WBINVL1_VOL         : MUBUF_Real_vi <0x3f>;
 
 
 defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
-
-let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
 defm BUFFER_ATOMIC_ADD_F32    : MUBUF_Real_Atomic_vi <0x4d>;
-} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts
 
 let SubtargetPredicate = isGFX90APlus in {
   defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;



More information about the llvm-commits mailing list