[Openmp-commits] [openmp] 64549f0 - [OpenMP][5.1] Fix parallel masked is ignored #59939
Jose Manuel Monsalve Diaz via Openmp-commits
openmp-commits at lists.llvm.org
Mon Apr 3 13:36:36 PDT 2023
Author: Rafael A. Herrera Guaitero
Date: 2023-04-03T20:33:55Z
New Revision: 64549f0903e244fbe2e7f0131698334b6e45dc10
URL: https://github.com/llvm/llvm-project/commit/64549f0903e244fbe2e7f0131698334b6e45dc10
DIFF: https://github.com/llvm/llvm-project/commit/64549f0903e244fbe2e7f0131698334b6e45dc10.diff
LOG: [OpenMP][5.1] Fix parallel masked is ignored #59939
Code generation support for 'parallel masked' directive.
The `EmitOMPParallelMaskedDirective` was implemented.
In addition, the appropiate device functions were added.
Fix #59939.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D143527
Added:
clang/test/OpenMP/parallel_masked.cpp
clang/test/OpenMP/parallel_masked_target.cpp
Modified:
clang/lib/CodeGen/CGStmt.cpp
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Parse/ParseOpenMP.cpp
openmp/libomptarget/DeviceRTL/include/Interface.h
openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 4432205eac7e1..af8edbf87f94c 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -428,7 +428,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
llvm_unreachable("target parallel loop directive not supported yet.");
break;
case Stmt::OMPParallelMaskedDirectiveClass:
- llvm_unreachable("parallel masked directive not supported yet.");
+ EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S));
break;
}
}
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c2c441207d8af..f0f662c5c5ea3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4489,6 +4489,33 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(
checkForLastprivateConditionalUpdate(*this, S);
}
+void CodeGenFunction::EmitOMPParallelMaskedDirective(
+ const OMPParallelMaskedDirective &S) {
+ // Emit directive as a combined directive that consists of two implicit
+ // directives: 'parallel' with 'masked' directive.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ OMPPrivateScope PrivateScope(CGF);
+ emitOMPCopyinClause(CGF, S);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ emitMasked(CGF, S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
+ };
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
+}
+
void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &S) {
// Emit directive as a combined directive that consists of two implicit
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 4298eb6c2b714..dfd8b9e6e00a7 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3585,6 +3585,7 @@ class CodeGenFunction : public CodeGenTypeCache {
const OMPTargetTeamsDistributeSimdDirective &S);
void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
void EmitOMPInteropDirective(const OMPInteropDirective &S);
+ void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S);
/// Emit device code for the target directive.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index a31ceaeebd80a..10f0b532ebf3c 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2483,8 +2483,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
/// simd' | 'teams distribute parallel for simd' | 'teams distribute
/// parallel for' | 'target teams' | 'target teams distribute' | 'target
/// teams distribute parallel for' | 'target teams distribute parallel
-/// for simd' | 'target teams distribute simd' | 'masked' {clause}
-/// annot_pragma_openmp_end
+/// for simd' | 'target teams distribute simd' | 'masked' |
+/// 'parallel masked' {clause} annot_pragma_openmp_end
///
StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
ParsedStmtContext StmtCtx, bool ReadDirectiveWithinMetadirective) {
diff --git a/clang/test/OpenMP/parallel_masked.cpp b/clang/test/OpenMP/parallel_masked.cpp
new file mode 100644
index 0000000000000..7071e95b60097
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked.cpp
@@ -0,0 +1,109 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+void foo();
+
+void masked() {
+ #pragma omp parallel masked
+ {
+ foo();
+ }
+}
+
+void maskedFilter() {
+ const int tid = 1;
+ #pragma omp parallel masked filter(tid)
+ {
+ foo();
+ }
+}
+
+void master() {
+ #pragma omp parallel master
+ {
+ foo();
+ }
+}
+// CHECK-LABEL: define {{[^@]+}}@masked
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+// CHECK: omp_if.then:
+// CHECK-NEXT: call void (...) @foo()
+// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label [[OMP_IF_END]]
+// CHECK: omp_if.end:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@maskedFilter
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 1, ptr [[TID]], align 4
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1)
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+// CHECK: omp_if.then:
+// CHECK-NEXT: call void (...) @foo()
+// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label [[OMP_IF_END]]
+// CHECK: omp_if.end:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@master
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+// CHECK: omp_if.then:
+// CHECK-NEXT: call void (...) @foo()
+// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label [[OMP_IF_END]]
+// CHECK: omp_if.end:
+// CHECK-NEXT: ret void
+//
diff --git a/clang/test/OpenMP/parallel_masked_target.cpp b/clang/test/OpenMP/parallel_masked_target.cpp
new file mode 100644
index 0000000000000..fbd01d771184f
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_target.cpp
@@ -0,0 +1,112 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -fopenmp-targets=nvptx64 -offload-device-only -x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+void foo();
+
+void masked() {
+ #pragma target
+ #pragma omp parallel masked
+ {
+ foo();
+ }
+}
+
+void maskedFilter() {
+ const int tid = 1;
+ #pragma target
+ #pragma omp parallel masked filter(tid)
+ {
+ foo();
+ }
+}
+
+void master() {
+ #pragma target
+ #pragma omp parallel master
+ {
+ foo();
+ }
+}
+// CHECK-LABEL: define {{[^@]+}}@masked
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+// CHECK: omp_if.then:
+// CHECK-NEXT: call void (...) @foo()
+// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label [[OMP_IF_END]]
+// CHECK: omp_if.end:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@maskedFilter
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 1, ptr [[TID]], align 4
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1)
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+// CHECK: omp_if.then:
+// CHECK-NEXT: call void (...) @foo()
+// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label [[OMP_IF_END]]
+// CHECK: omp_if.end:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@master
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2)
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+// CHECK: omp_if.then:
+// CHECK-NEXT: call void (...) @foo()
+// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK-NEXT: br label [[OMP_IF_END]]
+// CHECK: omp_if.end:
+// CHECK-NEXT: ret void
+//
diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h
index 02c79a5e7abd7..648da49b86f55 100644
--- a/openmp/libomptarget/DeviceRTL/include/Interface.h
+++ b/openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -260,6 +260,10 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
void __kmpc_end_master(IdentTy *Loc, int32_t TId);
+int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
+
+void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
+
int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
void __kmpc_end_single(IdentTy *Loc, int32_t TId);
diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 90d03dd490b24..eddf37f851e73 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -520,6 +520,13 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId) {
void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
+int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter) {
+ FunctionTracingRAII();
+ return omp_get_thread_num() == Filter;
+}
+
+void __kmpc_end_masked(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
+
int32_t __kmpc_single(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
return __kmpc_master(Loc, TId);
More information about the Openmp-commits
mailing list