[llvm] r367505 - AMDGPU: Correct FP atomic patterns
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 20:22:40 PDT 2019
Author: arsenm
Date: Wed Jul 31 20:22:40 2019
New Revision: 367505
URL: http://llvm.org/viewvc/llvm-project?rev=367505&view=rev
Log:
AMDGPU: Correct FP atomic patterns
These need to use an fadd, not an add. Also make the noret part clear
in the name.
Modified:
llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=367505&r1=367504&r2=367505&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Wed Jul 31 20:22:40 2019
@@ -1043,10 +1043,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"
let SubtargetPredicate = HasAtomicFaddInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global
+ "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+ "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
>;
} // End SubtargetPredicate = HasAtomicFaddInsts
Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=367505&r1=367504&r2=367505&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Wed Jul 31 20:22:40 2019
@@ -686,10 +686,10 @@ let SubtargetPredicate = isGFX10Plus, is
let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
- "global_atomic_add_f32", VGPR_32, f32, atomic_add_global
+ "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
- "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+ "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
>;
} // End SubtargetPredicate = HasAtomicFaddInsts
@@ -847,9 +847,6 @@ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D
} // End OtherPredicates = [HasFlatAddressSpace]
-def atomic_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_fadd>;
-def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>;
-
let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
@@ -930,8 +927,8 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global, f32>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>;
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=367505&r1=367504&r2=367505&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Jul 31 20:22:40 2019
@@ -309,6 +309,10 @@ def atomic_load_fadd_local : local_binar
def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>;
def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>;
+def atomic_fadd_global_noret : global_binary_atomic_op_frag<SIglobal_atomic_fadd>;
+def atomic_pk_fadd_global_noret : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>;
+
+
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
// This is for SDNodes and PatFrag for local loads and stores to
More information about the llvm-commits
mailing list