[clang] 5e4369e - [OpenMP][5.1] Support `thread_limit` on `omp target`

Johannes Doerfert via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 11 22:38:17 PST 2023


Author: Johannes Doerfert
Date: 2023-01-11T22:24:23-08:00
New Revision: 5e4369e53d3c33b3ec69c0b7ec180db8851e792a

URL: https://github.com/llvm/llvm-project/commit/5e4369e53d3c33b3ec69c0b7ec180db8851e792a
DIFF: https://github.com/llvm/llvm-project/commit/5e4369e53d3c33b3ec69c0b7ec180db8851e792a.diff

LOG: [OpenMP][5.1] Support `thread_limit` on `omp target`

It is unclear to me what happens if we have two thread_limit clauses to
choose from. I will recommend to the standards committee to disallow
that. For now, we pick the teams one.

Fixes https://github.com/llvm/llvm-project/issues/59940

Differential Revision: https://reviews.llvm.org/D141540

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/Sema/SemaOpenMP.cpp
    clang/test/OpenMP/target_ast_print.cpp
    clang/test/OpenMP/target_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c2328d28ec50a..2b99b302c38d0 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6429,10 +6429,8 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
     }
     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
       return CGF.Builder.getInt32(1);
-    return DefaultThreadLimitVal;
   }
-  return DefaultThreadLimitVal ? DefaultThreadLimitVal
-                               : CGF.Builder.getInt32(0);
+  return DefaultThreadLimitVal;
 }
 
 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
@@ -6575,12 +6573,14 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
       return NumThreads;
     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
         CGF.getContext(), CS->getCapturedStmt());
+    // TODO: The standard is not clear how to resolve two thread limit clauses,
+    //       let's pick the teams one if it's present, otherwise the target one.
+    const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
-      if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
+      if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
+        ThreadLimitClause = TLC;
         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-        const auto *ThreadLimitClause =
-            Dir->getSingleClause<OMPThreadLimitClause>();
         CodeGenFunction::LexicalScope Scope(
             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
         if (const auto *PreInit =
@@ -6595,11 +6595,15 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
             }
           }
         }
-        llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
-            ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
-        ThreadLimitVal =
-            Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
       }
+    }
+    if (ThreadLimitClause) {
+      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
+          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
+      ThreadLimitVal =
+          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
+    }
+    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
         CS = Dir->getInnermostCapturedStmt();
@@ -6650,7 +6654,10 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
       ThreadLimitVal =
           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
     }
-    return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
+    if (llvm::Value *NumThreads =
+            getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
+      return NumThreads;
+    return Bld.getInt32(0);
   case OMPD_target_parallel:
   case OMPD_target_parallel_for:
   case OMPD_target_parallel_for_simd:

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index ed7d8998a6ede..e6035578a9cde 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15635,6 +15635,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     break;
   case OMPC_thread_limit:
     switch (DKind) {
+    case OMPD_target:
     case OMPD_target_teams:
     case OMPD_target_teams_distribute:
     case OMPD_target_teams_distribute_simd:
@@ -15676,7 +15677,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_parallel_for:
     case OMPD_parallel_for_simd:
     case OMPD_parallel_loop:
-    case OMPD_target:
     case OMPD_target_simd:
     case OMPD_target_parallel:
     case OMPD_target_parallel_for:

diff  --git a/clang/test/OpenMP/target_ast_print.cpp b/clang/test/OpenMP/target_ast_print.cpp
index aae5fb681ace7..c1f37fa5b1726 100644
--- a/clang/test/OpenMP/target_ast_print.cpp
+++ b/clang/test/OpenMP/target_ast_print.cpp
@@ -1108,6 +1108,8 @@ T tmain(T argc, T *argv) {
   foo();
   #pragma omp target defaultmap(present: pointer)
   foo();
+  #pragma omp target thread_limit(C)
+  foo();
 
   return 0;
 }
@@ -1119,6 +1121,8 @@ T tmain(T argc, T *argv) {
 // OMP51-NEXT: foo()
 // OMP51-NEXT: #pragma omp target defaultmap(present: pointer)
 // OMP51-NEXT: foo()
+// OMP51-NEXT: #pragma omp target thread_limit(C)
+// OMP51-NEXT: foo()
 
 // OMP51-LABEL: int main(int argc, char **argv) {
 int main (int argc, char **argv) {

diff  --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp
index a66317560494e..5a283338eaf05 100644
--- a/clang/test/OpenMP/target_codegen.cpp
+++ b/clang/test/OpenMP/target_codegen.cpp
@@ -13,6 +13,13 @@
 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50
 
+// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP51
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -fopenmp-version=51 -D_DOMP51 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -fopenmp-version=51 -D_DOMP51 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP51
+// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP51
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -fopenmp-version=51 -D_DOMP51 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP51
+
 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
@@ -902,6 +909,52 @@ class S2 {
   }
 };
 
+#ifdef _DOMP51
+void thread_limit_target(int TargetTL, int TeamsTL) {
+
+#pragma omp target
+{}
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 -1, i32 0,
+
+#pragma omp target
+#pragma omp teams
+{}
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 0,
+
+#pragma omp target thread_limit(TargetTL)
+{}
+// OMP51: [[TL:%.*]] = load {{.*}} %TargetTL.addr
+// OMP51: store {{.*}} [[TL]], {{.*}} [[CEA:%.*]]
+// OMP51: load {{.*}} [[CEA]]
+// OMP51: [[CE:%.*]] = load {{.*}} [[CEA]]
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 -1, i32 [[CE]],
+
+#pragma omp target thread_limit(TargetTL)
+#pragma omp teams
+{}
+// OMP51: [[TL:%.*]] = load {{.*}} %TargetTL.addr
+// OMP51: store {{.*}} [[TL]], {{.*}} [[CEA:%.*]]
+// OMP51: load {{.*}} [[CEA]]
+// OMP51: [[CE:%.*]] = load {{.*}} [[CEA]]
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[CE]],
+
+#pragma omp target
+#pragma omp teams thread_limit(TeamsTL)
+{}
+// OMP51: load {{.*}} %TeamsTL.addr
+// OMP51: [[TeamsL:%.*]] = load {{.*}} %TeamsTL.addr
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[TeamsL]],
+
+#pragma omp target thread_limit(TargetTL)
+#pragma omp teams thread_limit(TeamsTL)
+{}
+// OMP51: load {{.*}} %TeamsTL.addr
+// OMP51: [[TeamsL:%.*]] = load {{.*}} %TeamsTL.addr
+// OMP51: call i32 @__tgt_target_kernel({{.*}}, i64 -1, i32 0, i32 [[TeamsL]],
+
+}
+#endif
+
 // CHECK:     define internal void @.omp_offloading.requires_reg()
 // CHECK:     call void @__tgt_register_requires(i64 1)
 // CHECK:     ret void


        


More information about the cfe-commits mailing list