[llvm] AMDGPU: Fix mis-selecting saddr flat atomics on gfx9 (PR #156860)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 04:34:51 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This would select the pseudo and then crash when the MC instruction
was used. I believe this has been broken since 9912ccb0b4d17
---
Patch is 437.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156860.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+12-6)
- (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+55-35)
- (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+13-19)
- (modified) llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll (+7748-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0e0b84f7e3374..a366db1c580ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -68,13 +68,15 @@ def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
"FlatGlobalInsts",
"true",
- "Have global_* flat memory instructions"
+ "Have global_* flat memory instructions",
+ [FeatureFlatAddressSpace]
>;
def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
"FlatScratchInsts",
"true",
- "Have scratch_* flat memory instructions"
+ "Have scratch_* flat memory instructions",
+ [FeatureFlatAddressSpace]
>;
def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
@@ -92,7 +94,8 @@ def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch",
def FeatureFlatGVSMode : SubtargetFeature<"flat-gvs-mode",
"FlatGVSMode",
"true",
- "Have GVS addressing mode with flat_* instructions"
+ "Have GVS addressing mode with flat_* instructions",
+ [FeatureFlatAddressSpace]
>;
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
@@ -934,13 +937,15 @@ def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-glo
def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32",
"HasAtomicFMinFMaxF32FlatInsts",
"true",
- "Has flat memory instructions for atomicrmw fmin/fmax for float"
+ "Has flat memory instructions for atomicrmw fmin/fmax for float",
+ [FeatureFlatAddressSpace]
>;
def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64",
"HasAtomicFMinFMaxF64FlatInsts",
"true",
- "Has flat memory instructions for atomicrmw fmin/fmax for double"
+ "Has flat memory instructions for atomicrmw fmin/fmax for double",
+ [FeatureFlatAddressSpace]
>;
def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
@@ -992,7 +997,8 @@ def FeatureFlatAtomicFaddF32Inst
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
"HasFlatAtomicFaddF32Inst",
"true",
- "Has flat_atomic_add_f32 instruction"
+ "Has flat_atomic_add_f32 instruction",
+ [FeatureFlatAddressSpace]
>;
def FeatureFlatBufferGlobalAtomicFaddF64Inst
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 1617f7954a5ee..9056d4cad19ea 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -297,7 +297,7 @@ multiclass FLAT_Flat_Store_Pseudo_t16<string opName> {
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterOperand regClass = AVLdSt_32,
bit HasTiedInput = 0> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
@@ -347,7 +347,7 @@ multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterOperand regClas
}
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterOperand regClass = AVLdSt_32> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
@@ -1296,19 +1296,19 @@ let SubtargetPredicate = isGFX10Plus in {
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", AVLdSt_64, f64, v2f64, AVLdSt_128>;
} // End SubtargetPredicate = isGFX10Plus
-let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_add_f32", AVLdSt_32, f32
>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_pk_add_f16", AVLdSt_32, v2f16
>;
-let OtherPredicates = [HasAtomicFaddRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
"global_atomic_add_f32", AVLdSt_32, f32
>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
"global_atomic_pk_add_f16", AVLdSt_32, v2f16
>;
@@ -1442,8 +1442,10 @@ class FlatStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
class FlatAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ComplexPattern pat,
ValueType vt, ValueType data_vt = vt> : GCNPat <
(vt (node (pat (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), data_vt:$data)),
- (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset, $cpol)
->;
+ (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset, $cpol)> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
+}
class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
@@ -1476,12 +1478,13 @@ multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt,
let AddedComplexity = 1 in
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+ (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node),
GlobalSAddr, vt, data_vt> {
let AddedComplexity = 9;
- let SubtargetPredicate = HasFlatGVSMode;
}
}
@@ -1500,11 +1503,12 @@ multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt,
defvar rtnNode = !cast<SDPatternOperator>(node);
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+ (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, GlobalSAddrGLC, vt, data_vt> {
let AddedComplexity = 8;
- let SubtargetPredicate = HasFlatGVSMode;
}
}
@@ -1540,8 +1544,10 @@ multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
(vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
- (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
->;
+ (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
+}
multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
ValueType data_vt = vt, int complexity = 0,
@@ -1651,30 +1657,42 @@ multiclass GlobalStoreLDSPats<FLAT_Pseudo inst, SDPatternOperator node> {
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat <inst, node, vt> {
let AddedComplexity = 10;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
}
multiclass GlobalFLATLoadPats_M0<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat_M0 <inst, node, vt> {
let AddedComplexity = 10;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
def : GlobalLoadSaddrPat_M0<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
}
multiclass GlobalFLATLoadPats_CPOL<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat_CPOL<inst, node, vt> {
let AddedComplexity = 10;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
def : GlobalLoadSaddrPat_CPOL<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
}
}
@@ -1701,10 +1719,14 @@ multiclass GlobalFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Value
multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> {
def : FlatStoreSignedPat <inst, node, vt> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
let AddedComplexity = 10;
}
def : FlatStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
+ let SubtargetPredicate = inst.SubtargetPredicate;
+ let OtherPredicates = inst.OtherPredicates;
let AddedComplexity = 11;
}
}
@@ -1849,7 +1871,9 @@ multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, Valu
}
multiclass FlatLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
- def : FlatLoadPat <inst, node, vt>;
+ def : FlatLoadPat <inst, node, vt> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 9;
@@ -1876,7 +1900,9 @@ multiclass FlatLoadPats_D16_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueT
}
multiclass FlatStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
- def : FlatStorePat <inst, node, vt>;
+ def : FlatStorePat <inst, node, vt> {
+ let OtherPredicates = [HasFlatAddressSpace];
+ }
def : FlatStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 9;
@@ -1893,8 +1919,6 @@ multiclass FlatStorePats_t16<FLAT_Pseudo inst, SDPatternOperator node, ValueType
}
}
-let OtherPredicates = [HasFlatAddressSpace] in {
-
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_aext_8_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
defm : FlatLoadPats <FLAT_LOAD_USHORT, atomic_load_aext_16_flat, i32>;
@@ -2018,8 +2042,6 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
defm : FlatStorePats <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
defm : FlatStorePats <FLAT_STORE_SHORT, store_flat, i16>;
-} // End OtherPredicates = [HasFlatAddressSpace]
-
let OtherPredicates = [isGFX12Plus] in
defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
@@ -2048,8 +2070,6 @@ defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
defm : FlatLoadPats_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
}
-let OtherPredicates = [HasFlatGlobalInsts] in {
-
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_aext_8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_aext_16_global, i32>;
@@ -2174,7 +2194,7 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_glo
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
-let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
@@ -2194,7 +2214,7 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i
let SubtargetPredicate = isGFX12Plus in {
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>;
- let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+ let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>;
}
@@ -2249,7 +2269,7 @@ let OtherPredicates = [isGFX1250Plus] in {
defm : GlobalStoreLDSPats <GLOBAL_STORE_ASYNC_FROM_LDS_B128, int_amdgcn_global_store_async_from_lds_b128>;
}
-let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
+let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
}
@@ -2267,44 +2287,44 @@ let OtherPredicates = [isGFX12Only] in {
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
}
-let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
}
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
}
-let OtherPredicates = [HasAtomicFaddRtnInsts] in {
+let SubtargetPredicate = HasAtomicFaddRtnInsts in {
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
}
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
}
-let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
+let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
}
-let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
}
-let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
+let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
}
-let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>;
defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>;
}
-let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_global", v2bf16>;
-} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
+} // End SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, AddedComplexity = 10
let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
index 3266fde10f9fb..8d56c17fbc0c1 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
@@ -11,18 +11,16 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v0, v1, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
-; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v0, v1, v0, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr inbounds i32, ptr %addr, i32 -4
@@ -35,18 +33,16 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
+; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v0, v1, s[0:1] offset:-16
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32_forced:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
-; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
+; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v1, v0, s[0:1] offset:-16
; GFX12-GISEL-NEXT: s_endpgm
entry:
%gep = getelementptr inbounds i32, ptr %addr, i32 -4
@@ -57,13 +53,12 @@ entry:
define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr %use) {
; GFX12-SDAG-LABEL: flat_atomic_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
-; GFX12-SDAG-NEXT: s_clause 0x1
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
-; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v0, v1, s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-NEXT: flat_store_b32 v[0:1], v2
@@ -75,9 +70,8 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
-; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156860
More information about the llvm-commits
mailing list