[flang-commits] [flang] b8a9c50 - [AMDGPU] Add target feature gws to clang

Yaxun Liu via flang-commits flang-commits at lists.llvm.org
Fri Aug 25 08:50:59 PDT 2023


Author: Yaxun (Sam) Liu
Date: 2023-08-25T11:50:47-04:00
New Revision: b8a9c50f22947284577a0a49038ea8ae5d45d6fa

URL: https://github.com/llvm/llvm-project/commit/b8a9c50f22947284577a0a49038ea8ae5d45d6fa
DIFF: https://github.com/llvm/llvm-project/commit/b8a9c50f22947284577a0a49038ea8ae5d45d6fa.diff

LOG: [AMDGPU] Add target feature gws to clang

Reviewed by: Matt Arsenault

Differential Revision: https://reviews.llvm.org/D158367

Added: 
    clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.def
    clang/lib/Basic/Targets/AMDGPU.cpp
    flang/test/Lower/OpenMP/target_cpu_features.f90
    llvm/lib/TargetParser/TargetParser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 29aa9ca7552ed6..532a91fd903e87 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -67,11 +67,6 @@ BUILTIN(__builtin_amdgcn_sched_group_barrier, "vIiIiIi", "n")
 BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
 BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
-BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n")
-BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n")
-BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n")
-BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
-BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
 BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
 BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
 
@@ -172,6 +167,15 @@ BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
 BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc")
 BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc")
 
+//===----------------------------------------------------------------------===//
+// GWS builtins.
+//===----------------------------------------------------------------------===//
+TARGET_BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n", "gws")
+TARGET_BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n", "gws")
+TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n", "gws")
+TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n", "gws")
+TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n", "gws")
+
 //===----------------------------------------------------------------------===//
 // CI+ only builtins.
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0567fcdf080d4f..409ae32ab42421 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -244,7 +244,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
 
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
-  ReadOnlyFeatures.insert("image-insts");
+  for (auto F : {"image-insts", "gws"})
+    ReadOnlyFeatures.insert(F);
   HalfArgsAndReturns = true;
 }
 

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
new file mode 100644
index 00000000000000..d23e6f29483261
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple amdgcn -target-feature +gws -o /dev/null %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=GWS %s
+
+// GWS: warning: feature flag '+gws' is ignored since the feature is read only [-Winvalid-command-line-argument]
+
+kernel void test() {}

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl
new file mode 100644
index 00000000000000..0f59b31202882e
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx803 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @test_builtins_amdgcn_gws_insts
+// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.amdgcn.ds.gws.init(i32 [[A]], i32 [[B]])
+// CHECK-NEXT:    tail call void @llvm.amdgcn.ds.gws.barrier(i32 [[A]], i32 [[B]])
+// CHECK-NEXT:    tail call void @llvm.amdgcn.ds.gws.sema.v(i32 [[A]])
+// CHECK-NEXT:    tail call void @llvm.amdgcn.ds.gws.sema.br(i32 [[A]], i32 [[B]])
+// CHECK-NEXT:    tail call void @llvm.amdgcn.ds.gws.sema.p(i32 [[A]])
+// CHECK-NEXT:    ret void
+//
+kernel void test_builtins_amdgcn_gws_insts(uint a, uint b) {
+  __builtin_amdgcn_ds_gws_init(a, b);
+  __builtin_amdgcn_ds_gws_barrier(a, b);
+  __builtin_amdgcn_ds_gws_sema_v(a);
+  __builtin_amdgcn_ds_gws_sema_br(a, b);
+  __builtin_amdgcn_ds_gws_sema_p(a);
+}

diff  --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index 08aee02febb263..c6159342c023aa 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -8,7 +8,7 @@
 !CHECK: omp.target = #omp.target<target_cpu = "gfx908",
 !CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
 !CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
-!CHECK-SAME: +gfx8-insts,+gfx9-insts,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
+!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
 !CHECK-SAME: +wavefrontsize64">
 !CHECK-LABEL: func.func @_QPomp_target_simple()
 subroutine omp_target_simple

diff  --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 7faa992e472ec5..fb7ede1b37e609 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -286,6 +286,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["gfx11-insts"] = true;
       Features["atomic-fadd-rtn-insts"] = true;
       Features["image-insts"] = true;
+      Features["gws"] = true;
       break;
     case GK_GFX1036:
     case GK_GFX1035:
@@ -311,6 +312,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["image-insts"] = true;
       Features["s-memrealtime"] = true;
       Features["s-memtime-inst"] = true;
+      Features["gws"] = true;
       break;
     case GK_GFX1012:
     case GK_GFX1011:
@@ -333,6 +335,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["image-insts"] = true;
       Features["s-memrealtime"] = true;
       Features["s-memtime-inst"] = true;
+      Features["gws"] = true;
       break;
     case GK_GFX942:
     case GK_GFX941:
@@ -362,6 +365,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
       Features["s-memrealtime"] = true;
       Features["ci-insts"] = true;
       Features["s-memtime-inst"] = true;
+      Features["gws"] = true;
       break;
     case GK_GFX90A:
       Features["gfx90a-insts"] = true;
@@ -412,6 +416,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
     case GK_GFX600:
       Features["image-insts"] = true;
       Features["s-memtime-inst"] = true;
+      Features["gws"] = true;
       break;
     case GK_NONE:
       break;


        


More information about the flang-commits mailing list