[PATCH] D86014: AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 14 19:20:23 PDT 2020
arsenm created this revision.
arsenm added reviewers: rampitec, kzhuravl.
Herald added subscribers: kerbowa, jfb, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely.
Herald added a project: LLVM.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Global instructions have the signed offsets.
https://reviews.llvm.org/D86014
Files:
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
@@ -54,6 +54,15 @@
ret void
}
+; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:
+; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4
+define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) {
+main_body:
+ %p = getelementptr float, float addrspace(1)* %ptr, i64 -1
+ call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %p, float %data)
+ ret void
+}
+
; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:
; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off
define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
@@ -71,6 +80,15 @@
ret void
}
+; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
+; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4
+define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+main_body:
+ %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1
+ call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
+ ret void
+}
+
; Make sure this artificially selects with an incorrect subtarget, but
; the feature set.
; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
Index: llvm/lib/Target/AMDGPU/FLATInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -783,6 +783,11 @@
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
+class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data),
+ (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
+>;
+
class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : GCNPat <
(vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)),
@@ -971,8 +976,8 @@
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
-def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_fadd_global_noret, v2f16>;
+def : FlatSignedAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;
+def : FlatSignedAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_fadd_global_noret, v2f16>;
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D86014.285808.patch
Type: text/x-patch
Size: 2936 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200815/1ad80898/attachment.bin>
More information about the llvm-commits
mailing list