[llvm] [AMDGPU] Overload image atomic swap to allow float as well. (PR #107283)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 11:22:49 PDT 2024
https://github.com/sstipanovic created https://github.com/llvm/llvm-project/pull/107283
LLPC can generate llvm.amdgcn.image.atomic.swap intrinsic with data argument as float type as well as float return type. This went unnoticed until CreateIntrinsic with implicit mangling was used.
>From b1f072558cca2995849b03f8e9d66b1c52557d7b Mon Sep 17 00:00:00 2001
From: Stefan Stipanovic <Stefan.Stipanovic at amd.com>
Date: Wed, 4 Sep 2024 20:15:29 +0200
Subject: [PATCH] [AMDGPU] Overload image atomic swap to allow float as well.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 21 ++++++++++++++-----
.../AMDGPU/llvm.amdgcn.image.atomic.dim.ll | 13 ++++++++++++
2 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index dc13a35c66f9ab..4cf967ed77d642 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -811,6 +811,12 @@ class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
let RetTypes = [llvm_anyfloat_ty];
}
+class AMDGPUDimAtomicAnyProfile<string opmod, AMDGPUDimProps dim,
+ list<AMDGPUArg> dataargs>
+ : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
+ let RetTypes = [llvm_any_ty];
+}
+
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
: AMDGPUDimProfile<"GET_RESINFO", dim> {
let RetTypes = [llvm_anyfloat_ty];
@@ -1023,26 +1029,31 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
//////////////////////////////////////////////////////////////////////////
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
- int isFloat = 0> {
+ int isFloat = 0, int isAny = 0> {
foreach dim = AMDGPUDims.All in {
def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
!if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
- AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
+ !if (isAny, AMDGPUDimAtomicAnyProfile<opmod, dim, dataargs>,
+ AMDGPUDimAtomicProfile<opmod, dim, dataargs>)),
[], [SDNPMemOperand]>;
}
}
- multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
+ multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0, int isAny = 0 > {
defm ""
: AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
- isFloat>;
+ isFloat, isAny>;
}
multiclass AMDGPUImageDimFloatAtomic<string opmod> {
defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
}
- defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
+ multiclass AMDGPUImageDimAnyAtomic<string opmod> {
+ defm "" : AMDGPUImageDimAtomic<opmod, 0 /*isFloat*/, 1 /*isAny*/>;
+ }
+
+ defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAnyAtomic<"ATOMIC_SWAP">;
defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
index f13b897971707a..a661730ba2d1b5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -30,6 +30,17 @@ main_body:
ret <2 x float> %out
}
+; GCN-LABEL: {{^}}atomic_swap_1d_float:
+; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
+; GFX12: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN ;
+define amdgpu_ps float @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) {
+main_body:
+ %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret float %v
+}
+
; GCN-LABEL: {{^}}atomic_add_1d:
; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}}
@@ -299,6 +310,8 @@ declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32
declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
+declare float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float, i32, <8 x i32>, i32, i32) #0
+
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
More information about the llvm-commits
mailing list