[llvm-branch-commits] [llvm] AMDGPU: Create pseudo to real mapping for flat/buffer atomic fmin/fmax (PR #95591)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 14 12:35:49 PDT 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/95591
The global/flat/buffer atomic fmin/fmax situation is a mess. These
instructions have been renamed 3 times. We currently have
separate pseudos defined for the same opcodes with the different names
(e.g. GLOBAL_ATOMIC_MIN_F64 from gfx90a and GLOBAL_ATOMIC_FMIN_X2 from gfx10).
Use the _FMIN versions as the canonical name for the f32 versions. Use the
_MIN_F64 style as the canonical name for the f64 case. This is because
gfx90a has the most sensible names, but does not have the f32 versions.t sho
Wire through the pseudo to use for the instruction properties vs. the assembly
name like in other cases. This will simplify handling of direct atomicrmw selection.
This will simplify directly selecting these from atomicrmw.
>From b00ad0dab49ad96c160a16c062f35d7788bd77c8 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 13 Jun 2024 18:33:11 +0200
Subject: [PATCH] AMDGPU: Create pseudo to real mapping for flat/buffer atomic
fmin/fmax
The global/flat/buffer atomic fmin/fmax situation is a mess. These
instructions have been renamed 3 times. We currently have
separate pseudos defined for the same opcodes with the different names
(e.g. GLOBAL_ATOMIC_MIN_F64 from gfx90a and GLOBAL_ATOMIC_FMIN_X2 from gfx10).
Use the _FMIN versions as the canonical name for the f32 versions. Use the
_MIN_F64 style as the canonical name for the f64 case. This is because
gfx90a has the most sensible names, but does not have the f32 versions.t sho
Wire through the pseudo to use for the instruction properties vs. the assembly
name like in other cases. This will simplify handling of direct atomicrmw selection.
This will simplify directly selecting these from atomicrmw.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 4 +-
llvm/lib/Target/AMDGPU/BUFInstructions.td | 103 +++++++++--------
llvm/lib/Target/AMDGPU/FLATInstructions.td | 107 +++++++++---------
.../AMDGPU/fp-atomic-to-s_denormmode.mir | 40 +++----
4 files changed, 129 insertions(+), 125 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d0d7a9dc17247..0a1550ccb53c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1864,7 +1864,9 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
def HasBufferFlatGlobalAtomicsF64 :
Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
- AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
+ // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
+ AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
+
def HasLdsAtomicAddF64 :
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 43e5434ea2700..9d21f93a957cc 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1163,12 +1163,6 @@ let SubtargetPredicate = isGFX6GFX7GFX10 in {
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
>;
-defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_fmin_x2", VReg_64, f64, null_frag
->;
-defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_fmax_x2", VReg_64, f64, null_frag
->;
}
@@ -1318,6 +1312,9 @@ let SubtargetPredicate = isGFX90APlus in {
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
+
+ // Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
+ // depending on some subtargets.
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
@@ -1763,8 +1760,8 @@ let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
}
let SubtargetPredicate = isGFX6GFX7GFX10 in {
- defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">;
- defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
}
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
@@ -2315,6 +2312,12 @@ let OtherPredicates = [HasPackedD16VMem] in {
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
+// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
+// specific pseudo (bothen in this case) since any of them will work.
+class get_BUF_ps<string name> {
+ string Mnemonic = !cast<BUF_Pseudo>(name # "_OFFSET").Mnemonic;
+}
+
//===----------------------------------------------------------------------===//
// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
@@ -2346,8 +2349,8 @@ multiclass MUBUF_Real_gfx11<bits<8> op, string real_name = !cast<MUBUF_Pseudo>(N
}
}
-class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
- Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> {
+class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef, string asmName> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef, asmName> {
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
@@ -2357,9 +2360,10 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
let Inst{55} = ps.tfe;
}
-multiclass MUBUF_Real_gfx10<bits<8> op> {
- defvar ps = !cast<MUBUF_Pseudo>(NAME);
- def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
+multiclass MUBUF_Real_gfx10<bits<8> op, string psName = NAME,
+ string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
+ defvar ps = !cast<MUBUF_Pseudo>(psName);
+ def _gfx10 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10, asmName> {
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25} = op{7};
let AssemblerPredicate = isGFX10Only;
@@ -2367,9 +2371,10 @@ multiclass MUBUF_Real_gfx10<bits<8> op> {
}
}
-multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
- defvar ps = !cast<MUBUF_Pseudo>(NAME);
- def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+multiclass MUBUF_Real_gfx6_gfx7<bits<8> op, string psName = NAME,
+ string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
+ defvar ps = !cast<MUBUF_Pseudo>(psName);
+ def _gfx6_gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, asmName> {
let Inst{15} = ps.addr64;
let AssemblerPredicate = isGFX6GFX7;
let DecoderNamespace = "GFX6GFX7";
@@ -2378,7 +2383,7 @@ multiclass MUBUF_Real_gfx6_gfx7<bits<8> op> {
multiclass MUBUF_Real_gfx6<bits<8> op> {
defvar ps = !cast<MUBUF_Pseudo>(NAME);
- def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+ def _gfx6 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
let Inst{15} = ps.addr64;
let AssemblerPredicate = isGFX6;
let DecoderNamespace = "GFX6";
@@ -2387,7 +2392,7 @@ multiclass MUBUF_Real_gfx6<bits<8> op> {
multiclass MUBUF_Real_gfx7<bits<8> op> {
defvar ps = !cast<MUBUF_Pseudo>(NAME);
- def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+ def _gfx7 : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI, ps.Mnemonic> {
let Inst{15} = ps.addr64;
let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
@@ -2488,12 +2493,6 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
// MUBUF - GFX11, GFX12.
//===----------------------------------------------------------------------===//
-// Shortcut to default Mnemonic from BUF_Pseudo. Hides the cast to the
-// specific pseudo (bothen in this case) since any of them will work.
-class get_BUF_ps<string name> {
- string Mnemonic = !cast<BUF_Pseudo>(name # "_BOTHEN").Mnemonic;
-}
-
// gfx11 instruction that accept both old and new assembler name.
class Mnem_gfx11_gfx12 <string mnemonic, string real_name> :
AMDGPUMnemonicAlias<mnemonic, real_name> {
@@ -2715,18 +2714,20 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op, bit isTFE = 0> {
defm _LDS_BOTHEN : MUBUF_Real_gfx10<op>;
}
}
-multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
- defm _BOTHEN_RTN : MUBUF_Real_gfx10<op>;
- defm _IDXEN_RTN : MUBUF_Real_gfx10<op>;
- defm _OFFEN_RTN : MUBUF_Real_gfx10<op>;
- defm _OFFSET_RTN : MUBUF_Real_gfx10<op>;
+multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op, string psName = NAME,
+ string asmName = !cast<MUBUF_Pseudo>(psName).Mnemonic> {
+ defm _BOTHEN_RTN : MUBUF_Real_gfx10<op, psName#"_BOTHEN_RTN", asmName>;
+ defm _IDXEN_RTN : MUBUF_Real_gfx10<op, psName#"_IDXEN_RTN", asmName>;
+ defm _OFFEN_RTN : MUBUF_Real_gfx10<op, psName#"_OFFEN_RTN", asmName>;
+ defm _OFFSET_RTN : MUBUF_Real_gfx10<op, psName#"_OFFSET_RTN", asmName>;
}
-multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
- MUBUF_Real_Atomics_RTN_gfx10<op> {
- defm _BOTHEN : MUBUF_Real_gfx10<op>;
- defm _IDXEN : MUBUF_Real_gfx10<op>;
- defm _OFFEN : MUBUF_Real_gfx10<op>;
- defm _OFFSET : MUBUF_Real_gfx10<op>;
+multiclass MUBUF_Real_Atomics_gfx10<bits<8> op, string psName = NAME,
+ string asmName = get_BUF_ps<psName>.Mnemonic> :
+ MUBUF_Real_Atomics_RTN_gfx10<op, psName, asmName> {
+ defm _BOTHEN : MUBUF_Real_gfx10<op, psName#"_BOTHEN", asmName>;
+ defm _IDXEN : MUBUF_Real_gfx10<op, psName#"_IDXEN", asmName>;
+ defm _OFFEN : MUBUF_Real_gfx10<op, psName#"_OFFEN", asmName>;
+ defm _OFFSET : MUBUF_Real_gfx10<op, psName#"_OFFSET", asmName>;
}
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
@@ -2781,18 +2782,18 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op, bit isTFE = 0> {
defm _LDS_BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
}
}
-multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
- defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op>;
- defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op>;
- defm _IDXEN : MUBUF_Real_gfx6_gfx7<op>;
- defm _OFFEN : MUBUF_Real_gfx6_gfx7<op>;
- defm _OFFSET : MUBUF_Real_gfx6_gfx7<op>;
+multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op, string psName, string asmName> {
+ defm _ADDR64 : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64", asmName>;
+ defm _BOTHEN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN", asmName>;
+ defm _IDXEN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN", asmName>;
+ defm _OFFEN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN", asmName>;
+ defm _OFFSET : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET", asmName>;
- defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op>;
- defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
- defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
- defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op>;
- defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op>;
+ defm _ADDR64_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_ADDR64_RTN", asmName>;
+ defm _BOTHEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_BOTHEN_RTN", asmName>;
+ defm _IDXEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_IDXEN_RTN", asmName>;
+ defm _OFFEN_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFEN_RTN", asmName>;
+ defm _OFFSET_RTN : MUBUF_Real_gfx6_gfx7<op, psName#"_OFFSET_RTN", asmName>;
}
multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
@@ -2807,8 +2808,10 @@ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> {
defm _TFE : MUBUF_Real_AllAddr_Lds_Helper_gfx6_gfx7_gfx10<op, 1>;
}
-multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
- MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
+multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op, string psName = NAME,
+ string asmName = get_BUF_ps<psName>.Mnemonic> :
+ MUBUF_Real_Atomics_gfx6_gfx7<op, psName, asmName>,
+ MUBUF_Real_Atomics_gfx10<op, psName, asmName>;
// FIXME-GFX6: Following instructions are available only on GFX6.
//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
@@ -2868,8 +2871,8 @@ defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
-defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
-defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f, "BUFFER_ATOMIC_MIN_F64", "buffer_atomic_fmin_x2">;
+defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060, "BUFFER_ATOMIC_MAX_F64", "buffer_atomic_fmax_x2">;
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 818cbde592432..a1388b41db428 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -756,12 +756,6 @@ let SubtargetPredicate = isGFX7GFX10 in {
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
VReg_64, f64, v2f64, VReg_128>;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
- VReg_64, f64>;
-
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
- VReg_64, f64>;
-
} // End SubtargetPredicate = isGFX7GFX10
let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
@@ -995,10 +989,6 @@ let SubtargetPredicate = isGFX10Plus in {
FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
- defm GLOBAL_ATOMIC_FMIN_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
- defm GLOBAL_ATOMIC_FMAX_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
} // End SubtargetPredicate = isGFX10Plus
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
@@ -1608,14 +1598,14 @@ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax
}
let OtherPredicates = [isGFX10Only] in {
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
-defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
-defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
-defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>;
-defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>;
-defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>;
-defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
}
let OtherPredicates = [isGFX12Only] in {
@@ -1745,8 +1735,8 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f
// CI
//===----------------------------------------------------------------------===//
-class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
- FLAT_Real <op, ps>,
+class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps, string asmName = ps.Mnemonic> :
+ FLAT_Real <op, ps, asmName>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
let AssemblerPredicate = isGFX7Only;
let DecoderNamespace="GFX7";
@@ -1768,10 +1758,13 @@ def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
-multiclass FLAT_Real_Atomics_ci <bits<7> op> {
- defvar ps = !cast<FLAT_Pseudo>(NAME);
- def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
- def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
+multiclass FLAT_Real_Atomics_ci <bits<7> op, string opName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(opName).Mnemonic> {
+ defvar ps = !cast<FLAT_Pseudo>(opName);
+ defvar ps_rtn = !cast<FLAT_Pseudo>(opName#"_RTN");
+
+ def _ci : FLAT_Real_ci<op, ps, asmName>;
+ def _RTN_ci : FLAT_Real_ci<op, ps_rtn, asmName>;
}
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30>;
@@ -1806,8 +1799,8 @@ defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e>;
defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f>;
defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40>;
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e>;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f>;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">;
//===----------------------------------------------------------------------===//
@@ -2089,8 +2082,8 @@ let SubtargetPredicate = isGFX940Plus in {
// GFX10.
//===----------------------------------------------------------------------===//
-class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
- FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
+class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
+ FLAT_Real<op, ps, opName>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
let AssemblerPredicate = isGFX10Only;
let DecoderNamespace = "GFX10";
@@ -2102,25 +2095,28 @@ class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
let Inst{55} = 0;
}
-
-multiclass FLAT_Real_Base_gfx10<bits<7> op> {
+multiclass FLAT_Real_Base_gfx10<bits<7> op, string psName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> {
def _gfx10 :
- FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName), asmName>;
}
-multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
+multiclass FLAT_Real_RTN_gfx10<bits<7> op, string psName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> {
def _RTN_gfx10 :
- FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_RTN"), asmName>;
}
-multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
+multiclass FLAT_Real_SADDR_gfx10<bits<7> op, string psName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR").Mnemonic> {
def _SADDR_gfx10 :
- FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR"), asmName>;
}
-multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
+multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op, string psName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR_RTN").Mnemonic> {
def _SADDR_RTN_gfx10 :
- FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR_RTN"), asmName>;
}
multiclass FLAT_Real_ST_gfx10<bits<7> op> {
@@ -2128,22 +2124,25 @@ multiclass FLAT_Real_ST_gfx10<bits<7> op> {
FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
-multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
- FLAT_Real_Base_gfx10<op>,
- FLAT_Real_SADDR_gfx10<op>;
+multiclass FLAT_Real_AllAddr_gfx10<bits<7> op, string OpName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> :
+ FLAT_Real_Base_gfx10<op, OpName, asmName>,
+ FLAT_Real_SADDR_gfx10<op, OpName, asmName>;
-multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
- FLAT_Real_Base_gfx10<op>,
- FLAT_Real_RTN_gfx10<op>;
+multiclass FLAT_Real_Atomics_gfx10<bits<7> op, string OpName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> :
+ FLAT_Real_Base_gfx10<op, OpName, asmName>,
+ FLAT_Real_RTN_gfx10<op, OpName, asmName>;
-multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
- FLAT_Real_AllAddr_gfx10<op>,
- FLAT_Real_RTN_gfx10<op>,
- FLAT_Real_SADDR_RTN_gfx10<op>;
+multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op, string OpName = NAME,
+ string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> :
+ FLAT_Real_AllAddr_gfx10<op, OpName, asmName>,
+ FLAT_Real_RTN_gfx10<op, OpName, asmName>,
+ FLAT_Real_SADDR_RTN_gfx10<op, OpName, asmName>;
-multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
- FLAT_Real_RTN_gfx10<op>,
- FLAT_Real_SADDR_RTN_gfx10<op>;
+multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op, string OpName = NAME> :
+ FLAT_Real_RTN_gfx10<op, OpName>,
+ FLAT_Real_SADDR_RTN_gfx10<op, OpName>;
multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
FLAT_Real_Base_gfx10<op>,
@@ -2220,8 +2219,8 @@ defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>;
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">;
// ENC_FLAT_GLBL.
@@ -2278,8 +2277,8 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
-defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
-defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
+defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f, "GLOBAL_ATOMIC_MIN_F64", "global_atomic_fmin_x2">;
+defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060, "GLOBAL_ATOMIC_MAX_F64", "global_atomic_fmax_x2">;
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>;
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>;
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
index b654c28653711..a2d1c0a87ab39 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
@@ -38,14 +38,14 @@ body: |
...
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
-# GCN: FLAT_ATOMIC_FMAX_X2
+# GCN: FLAT_ATOMIC_MAX_F64
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ FLAT_ATOMIC_MAX_F64 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -62,14 +62,14 @@ body: |
...
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
-# GCN: FLAT_ATOMIC_FMIN_X2
+# GCN: FLAT_ATOMIC_MIN_F64
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_to_s_denorm_mode
body: |
bb.0:
- FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ FLAT_ATOMIC_MIN_F64 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -98,14 +98,14 @@ body: |
...
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
-# GCN: FLAT_ATOMIC_FMAX_X2_RTN
+# GCN: FLAT_ATOMIC_MAX_F64_RTN
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ %2:vreg_64 = FLAT_ATOMIC_MAX_F64_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -122,14 +122,14 @@ body: |
...
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
-# GCN: FLAT_ATOMIC_FMIN_X2_RTN
+# GCN: FLAT_ATOMIC_MIN_F64_RTN
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ %2:vreg_64 = FLAT_ATOMIC_MIN_F64_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -182,14 +182,14 @@ body: |
...
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
-# GCN: GLOBAL_ATOMIC_FMAX_X2
+# GCN: GLOBAL_ATOMIC_MAX_F64
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ GLOBAL_ATOMIC_MAX_F64 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -206,14 +206,14 @@ body: |
...
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
-# GCN: GLOBAL_ATOMIC_FMIN_X2
+# GCN: GLOBAL_ATOMIC_MIN_F64
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_to_s_denorm_mode
body: |
bb.0:
- GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ GLOBAL_ATOMIC_MIN_F64 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -254,14 +254,14 @@ body: |
...
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
-# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
+# GCN: GLOBAL_ATOMIC_MAX_F64_RTN
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_MAX_F64_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -278,14 +278,14 @@ body: |
...
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
-# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
+# GCN: GLOBAL_ATOMIC_MIN_F64_RTN
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_MIN_F64_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -326,14 +326,14 @@ body: |
...
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
-# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
+# GCN: GLOBAL_ATOMIC_MAX_F64_SADDR_RTN
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_MAX_F64_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@@ -350,14 +350,14 @@ body: |
...
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
-# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
+# GCN: GLOBAL_ATOMIC_MIN_F64_SADDR_RTN
# GFX10-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
- %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
+ %2:vreg_64 = GLOBAL_ATOMIC_MIN_F64_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `ptr addrspace(1) undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
More information about the llvm-branch-commits
mailing list