[PATCH] D146840: [AMDGPU] Replace target feature for global fadd32

Anshil Gandhi via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 28 10:24:08 PDT 2023


gandhi21299 updated this revision to Diff 509067.
gandhi21299 added a comment.

- removed builtins-amdgcn-gfx908.cl


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146840/new/

https://reviews.llvm.org/D146840

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
  clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl


Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -43,3 +43,9 @@
 void test_s_wait_event_export_ready() {
   __builtin_amdgcn_s_wait_event_export_ready();
 }
+
+// CHECK-LABEL: @test_global_add_f32
+// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}})
+void test_global_add_f32(float *rtn, global float *addr, float x) {
+  *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
+}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl
@@ -11,7 +11,7 @@
   float *fp_rtn;
   double *rtn;
   *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}}
-  *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature gfx90a-insts}}
+  *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature atomic-fadd-rtn-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}}
   *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}}
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -72,9 +72,9 @@
 // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
@@ -86,10 +86,10 @@
 // GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 // GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1103-W64: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
 
 kernel void test() {}
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -206,6 +206,7 @@
       Features["gfx10-insts"] = true;
       Features["gfx10-3-insts"] = true;
       Features["gfx11-insts"] = true;
+      Features["atomic-fadd-rtn-insts"] = true;
       break;
     case GK_GFX1036:
     case GK_GFX1035:
@@ -264,6 +265,7 @@
     case GK_GFX90A:
       Features["gfx90a-insts"] = true;
       Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+      Features["atomic-fadd-rtn-insts"] = true;
       [[fallthrough]];
     case GK_GFX908:
       Features["dot3-insts"] = true;
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -214,7 +214,7 @@
 TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts")
 TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D146840.509067.patch
Type: text/x-patch
Size: 9635 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20230328/8e337f29/attachment-0001.bin>


More information about the cfe-commits mailing list