[clang] 2650375 - [OpenMP] Add amdgpu-num-work-groups attribute to OpenMP kernels (#87695)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 5 05:39:04 PDT 2024
Author: Joseph Huber
Date: 2024-04-05T07:38:01-05:00
New Revision: 2650375b3beeb60596ca38e2e06685e48e8ed01f
URL: https://github.com/llvm/llvm-project/commit/2650375b3beeb60596ca38e2e06685e48e8ed01f
DIFF: https://github.com/llvm/llvm-project/commit/2650375b3beeb60596ca38e2e06685e48e8ed01f.diff
LOG: [OpenMP] Add amdgpu-num-work-groups attribute to OpenMP kernels (#87695)
Summary:
This new attribute was introduced recently. We already do this for NVPTX
kernels so we should apply this for AMDGPU as well. This patch simply
applies this metadata in cases where a lower bound is known
Added:
clang/test/OpenMP/thread_limit_amdgpu.c
Modified:
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Removed:
################################################################################
diff --git a/clang/test/OpenMP/thread_limit_amdgpu.c b/clang/test/OpenMP/thread_limit_amdgpu.c
new file mode 100644
index 00000000000000..f884eeb73c3ff1
--- /dev/null
+++ b/clang/test/OpenMP/thread_limit_amdgpu.c
@@ -0,0 +1,34 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void foo(int N) {
+#pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < N; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd thread_limit(4)
+ for (int i = 0; i < N; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42))))
+ for (int i = 0; i < N; ++i)
+ ;
+#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22)
+ for (int i = 0; i < N; ++i)
+ ;
+}
+
+#endif
+
+// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l10({{.*}}) #[[ATTR1:.+]] {
+// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l13({{.*}}) #[[ATTR2:.+]] {
+// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l16({{.*}}) #[[ATTR3:.+]] {
+// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l19({{.*}}) #[[ATTR4:.+]] {
+
+// CHECK: attributes #[[ATTR1]] = { {{.*}} "amdgpu-flat-work-group-size"="1,256" {{.*}} }
+// CHECK: attributes #[[ATTR2]] = { {{.*}} "amdgpu-flat-work-group-size"="1,4" {{.*}} }
+// CHECK: attributes #[[ATTR3]] = { {{.*}} "amdgpu-flat-work-group-size"="1,42" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} }
+// CHECK: attributes #[[ATTR4]] = { {{.*}} "amdgpu-flat-work-group-size"="1,22" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} }
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 16507a69ea8502..7fd8474c2ec890 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4791,6 +4791,9 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel,
updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
updateNVPTXMetadata(Kernel, "minctasm", LB, false);
}
+ if (T.isAMDGPU())
+ Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
+
Kernel.addFnAttr("omp_target_num_teams", std::to_string(LB));
}
More information about the cfe-commits
mailing list