[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 27 02:12:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/96874.diff
3 Files Affected:
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+6-11)
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl (+4-2)
- (modified) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl (+2-1)
``````````diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2a1861e4413fd..54e363d6fd0e8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18657,10 +18657,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
Intrinsic::ID IID;
llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
switch (BuiltinID) {
@@ -18670,19 +18668,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
IID = Intrinsic::amdgcn_global_atomic_fmax;
break;
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
- IID = Intrinsic::amdgcn_flat_atomic_fadd;
- break;
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
IID = Intrinsic::amdgcn_flat_atomic_fmin;
break;
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
IID = Intrinsic::amdgcn_flat_atomic_fmax;
break;
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
- ArgTy = llvm::Type::getFloatTy(getLLVMContext());
- IID = Intrinsic::amdgcn_flat_atomic_fadd;
- break;
}
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19075,7 +19066,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
llvm::AtomicRMWInst::BinOp BinOp;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19095,6 +19088,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
BinOp = llvm::AtomicRMWInst::FAdd;
break;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index 4980c44215743..60a3033a36c17 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){
}
// CHECK-LABEL: test_flat_add_local_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8{{$}}
+
// GFX90A-LABEL: test_flat_add_local_f64$local
// GFX90A: ds_add_rtn_f64
void test_flat_add_local_f64(__local double *addr, double x){
@@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){
}
// CHECK-LABEL: test_flat_global_add_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
+
// GFX90A-LABEL: test_flat_global_add_f64$local
// GFX90A: global_atomic_add_f64
void test_flat_global_add_f64(__global double *addr, double x){
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
index a4f438bea33a6..2618e2809fbbf 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
@@ -10,7 +10,8 @@ typedef half __attribute__((ext_vector_type(2))) half2;
typedef short __attribute__((ext_vector_type(2))) short2;
// CHECK-LABEL: test_flat_add_f32
-// CHECK: call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %{{.*}}, float %{{.*}})
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+
// GFX940-LABEL: test_flat_add_f32
// GFX940: flat_atomic_add_f32
half2 test_flat_add_f32(__generic float *addr, float x) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/96874
More information about the llvm-branch-commits
mailing list