[clang] 5e4369e - [OpenMP][5.1] Support `thread_limit` on `omp target`
Johannes Doerfert via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 11 22:38:17 PST 2023
Author: Johannes Doerfert
Date: 2023-01-11T22:24:23-08:00
New Revision: 5e4369e53d3c33b3ec69c0b7ec180db8851e792a
URL: https://github.com/llvm/llvm-project/commit/5e4369e53d3c33b3ec69c0b7ec180db8851e792a
DIFF: https://github.com/llvm/llvm-project/commit/5e4369e53d3c33b3ec69c0b7ec180db8851e792a.diff
LOG: [OpenMP][5.1] Support `thread_limit` on `omp target`
It is unclear to me what happens if we have two thread_limit clauses to
choose from. I will recommend to the standards committee to disallow
that. For now, we pick the teams one.
Fixes https://github.com/llvm/llvm-project/issues/59940
Differential Revision: https://reviews.llvm.org/D141540
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/target_ast_print.cpp
clang/test/OpenMP/target_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c2328d28ec50a..2b99b302c38d0 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6429,10 +6429,8 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
}
if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
return CGF.Builder.getInt32(1);
- return DefaultThreadLimitVal;
}
- return DefaultThreadLimitVal ? DefaultThreadLimitVal
- : CGF.Builder.getInt32(0);
+ return DefaultThreadLimitVal;
}
const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
@@ -6575,12 +6573,14 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
return NumThreads;
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
CGF.getContext(), CS->getCapturedStmt());
+ // TODO: The standard is not clear how to resolve two thread limit clauses,
+ // let's pick the teams one if it's present, otherwise the target one.
+ const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
- if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
+ if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
+ ThreadLimitClause = TLC;
CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- const auto *ThreadLimitClause =
- Dir->getSingleClause<OMPThreadLimitClause>();
CodeGenFunction::LexicalScope Scope(
CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
if (const auto *PreInit =
@@ -6595,11 +6595,15 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
}
}
}
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
- ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
- ThreadLimitVal =
- Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
}
+ }
+ if (ThreadLimitClause) {
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+ ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+ ThreadLimitVal =
+ Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
+ }
+ if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
!isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
CS = Dir->getInnermostCapturedStmt();
@@ -6650,7 +6654,10 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
ThreadLimitVal =
Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
}
- return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
+ if (llvm::Value *NumThreads =
+ getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
+ return NumThreads;
+ return Bld.getInt32(0);
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index ed7d8998a6ede..e6035578a9cde 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15635,6 +15635,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
break;
case OMPC_thread_limit:
switch (DKind) {
+ case OMPD_target:
case OMPD_target_teams:
case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
@@ -15676,7 +15677,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_parallel_for:
case OMPD_parallel_for_simd:
case OMPD_parallel_loop:
- case OMPD_target:
case OMPD_target_simd:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
diff --git a/clang/test/OpenMP/target_ast_print.cpp b/clang/test/OpenMP/target_ast_print.cpp
index aae5fb681ace7..c1f37fa5b1726 100644
--- a/clang/test/OpenMP/target_ast_print.cpp
+++ b/clang/test/OpenMP/target_ast_print.cpp
@@ -1108,6 +1108,8 @@ T tmain(T argc, T *argv) {
foo();
#pragma omp target defaultmap(present: pointer)
foo();
+ #pragma omp target thread_limit(C)
+ foo();
return 0;
}
@@ -1119,6 +1121,8 @@ T tmain(T argc, T *argv) {
// OMP51-NEXT: foo()
// OMP51-NEXT: #pragma omp target defaultmap(present: pointer)
// OMP51-NEXT: foo()
+// OMP51-NEXT: #pragma omp target thread_limit(C)
+// OMP51-NEXT: foo()
// OMP51-LABEL: int main(int argc, char **argv) {
int main (int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp
index a66317560494e..5a283338eaf05 100644
--- a/clang/test/OpenMP/target_codegen.cpp
+++ b/clang/test/OpenMP/target_codegen.cpp
@@ -13,6 +13,13 @@
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50
+// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP51
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -fopenmp-version=51 -D_DOMP51 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -fopenmp-version=51 -D_DOMP51 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP51
+// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP51
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP51
+
// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
@@ -902,6 +909,52 @@ class S2 {
}
};
+#ifdef _DOMP51
+void thread_limit_target(int TargetTL, int TeamsTL) {
+
+#pragma omp target
+{}
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 -1, i32 0,
+
+#pragma omp target
+#pragma omp teams
+{}
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 0,
+
+#pragma omp target thread_limit(TargetTL)
+{}
+// OMP51: [[TL:%.*]] = load {{.*}} %TargetTL.addr
+// OMP51: store {{.*}} [[TL]], {{.*}} [[CEA:%.*]]
+// OMP51: load {{.*}} [[CEA]]
+// OMP51: [[CE:%.*]] = load {{.*}} [[CEA]]
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 -1, i32 [[CE]],
+
+#pragma omp target thread_limit(TargetTL)
+#pragma omp teams
+{}
+// OMP51: [[TL:%.*]] = load {{.*}} %TargetTL.addr
+// OMP51: store {{.*}} [[TL]], {{.*}} [[CEA:%.*]]
+// OMP51: load {{.*}} [[CEA]]
+// OMP51: [[CE:%.*]] = load {{.*}} [[CEA]]
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[CE]],
+
+#pragma omp target
+#pragma omp teams thread_limit(TeamsTL)
+{}
+// OMP51: load {{.*}} %TeamsTL.addr
+// OMP51: [[TeamsL:%.*]] = load {{.*}} %TeamsTL.addr
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[TeamsL]],
+
+#pragma omp target thread_limit(TargetTL)
+#pragma omp teams thread_limit(TeamsTL)
+{}
+// OMP51: load {{.*}} %TeamsTL.addr
+// OMP51: [[TeamsL:%.*]] = load {{.*}} %TeamsTL.addr
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[TeamsL]],
+
+}
+#endif
+
// CHECK: define internal void @.omp_offloading.requires_reg()
// CHECK: call void @__tgt_register_requires(i64 1)
// CHECK: ret void
More information about the cfe-commits
mailing list