[clang] 69061f9 - [AMDGPU] Add clang builtin for __builtin_amdgcn_ds_atomic_fadd_v2f16

Fri Mar 24 08:28:02 PDT 2023

Author: Mariusz Sikora
Date: 2023-03-24T16:27:44+01:00
New Revision: 69061f96275c3053623a8699ce641c0f0ac61aed

URL: https://github.com/llvm/llvm-project/commit/69061f96275c3053623a8699ce641c0f0ac61aed
DIFF: https://github.com/llvm/llvm-project/commit/69061f96275c3053623a8699ce641c0f0ac61aed.diff

LOG: [AMDGPU] Add clang builtin for __builtin_amdgcn_ds_atomic_fadd_v2f16

Differential Revision: https://reviews.llvm.org/D146808

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
    clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ed75b58ddbf96..965bd97a97d79 100644

--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -231,6 +231,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "ato
 TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
 TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
 
 //===----------------------------------------------------------------------===//
 // Deep learning builtins.

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b3aea13878c1c..c8112b0ea0ec0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17213,7 +17213,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {Addr, Val});
   }
   case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
-  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
+  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
     Intrinsic::ID IID;
     llvm::Type *ArgTy;
     switch (BuiltinID) {
@@ -17225,6 +17226,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
       ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
       IID = Intrinsic::amdgcn_ds_fadd;
       break;
+    case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
+      ArgTy = llvm::FixedVectorType::get(
+          llvm::Type::getHalfTy(getLLVMContext()), 2);
+      IID = Intrinsic::amdgcn_ds_fadd;
+      break;
     }
     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
     llvm::Value *Val = EmitScalarExpr(E->getArg(1));

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
index 3044fdedca36b..39191322ca6e4 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx11-err.cl
@@ -15,4 +15,5 @@ void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
   __builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}}
   __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
   __builtin_amdgcn_ds_atomic_fadd_v2bf16(addrs2l, xs2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2bf16' needs target feature atomic-ds-pk-add-16-insts}}
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
index fd813ac029eab..0548b825a7265 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
@@ -4,9 +4,9 @@
 
 typedef half __attribute__((ext_vector_type(2))) half2;
 
-void test_global_add_2f16(__global half2 *addrh2, half2 xh2,
-                          __global float *addrf, float xf,
-                          __global double *addr, double x) {
+void test_global_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
+                      __global float *addrf, float xf,
+                      __global double *addr, double x) {
   half2 *half_rtn;
   float *fp_rtn;
   double *rtn;
@@ -18,4 +18,5 @@ void test_global_add_2f16(__global half2 *addrh2, half2 xh2,
   *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmin_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_flat_atomic_fmax_f64' needs target feature gfx90a-insts}}
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
index e3b4df540246f..f651ce349e206 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
@@ -6,7 +6,7 @@
 typedef half  __attribute__((ext_vector_type(2))) half2;
 typedef short __attribute__((ext_vector_type(2))) short2;
 
-void test_atomic_fadd(__global half2 *addrh2, half2 xh2,
+void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
                       __global short2 *addrs2, __local short2 *addrs2l, short2 xs2,
                       __global float *addrf, float xf) {
   __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx940-insts}}
@@ -14,4 +14,5 @@ void test_atomic_fadd(__global half2 *addrh2, half2 xh2,
   __builtin_amdgcn_flat_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2bf16' needs target feature atomic-flat-pk-add-16-insts}}
   __builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}}
   __builtin_amdgcn_ds_atomic_fadd_v2bf16(addrs2l, xs2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2bf16' needs target feature atomic-ds-pk-add-16-insts}}
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addrh2l, xh2); // expected-error{{'__builtin_amdgcn_ds_atomic_fadd_v2f16' needs target feature atomic-ds-pk-add-16-insts}}
 }

diff  --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
index 12b85937b9063..d8fbc5fb4d724 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
@@ -48,3 +48,19 @@ short2 test_global_add_2bf16(__global short2 *addr, short2 x) {
 short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
   return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
 }
+
+// CHECK-LABEL: test_local_add_2f16
+// CHECK: call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %{{.*}}, <2 x half> %
+// GFX940-LABEL:  test_local_add_2f16
+// GFX940: ds_pk_add_rtn_f16
+half2 test_local_add_2f16(__local half2 *addr, half2 x) {
+  return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
+}
+
+// CHECK-LABEL: test_local_add_2f16_noret
+// CHECK: call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %{{.*}}, <2 x half> %
+// GFX940-LABEL:  test_local_add_2f16_noret
+// GFX940: ds_pk_add_f16
+void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
+  __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
+}