[clang] 3274cdc - [Clang][OpenMP] Remove the mandatory flush for capture for OpenMP 5.1
Shilei Tian via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 26 08:00:49 PDT 2021
Author: Shilei Tian
Date: 2021-07-26T11:00:44-04:00
New Revision: 3274cdc83ecdf2af569ad4f564d55d0e43b1072e
URL: https://github.com/llvm/llvm-project/commit/3274cdc83ecdf2af569ad4f564d55d0e43b1072e
DIFF: https://github.com/llvm/llvm-project/commit/3274cdc83ecdf2af569ad4f564d55d0e43b1072e.diff
LOG: [Clang][OpenMP] Remove the mandatory flush for capture for OpenMP 5.1
In OpenMP 5.1:
> If the `write` or `update` clause is specifieded, the atomic operation is not an atomic conditional update for which the comparison fails, and the effective memory ordering is `release`, `acq_rel`, or `seq_cst`, the strong flush on entry to the atomic operation is also a release flush. If the `read` or `update` clause is specified and the effective memory ordering is `acquire`, `acq_rel`, or `seq_cst` then the strong flush on exit from the atomic operation is also an acquire flush.
In OpenMP 5.0:
> If the `write`, `update`, or **`capture`** clause is specified and the `release`, `acq_rel`, or `seq_cst` clause is specified then the strong flush on entry to the atomic operation is also a release flush. If the `read` or `capture` clause is specified and the `acquire`, `acq_rel`, or `seq_cst` clause is specified then the strong flush on exit from the atomic operation is also an acquire flush.
>From my understanding, in OpenMP 5.1, `capture` is removed from the requirement for flush, therefore we don't have to enforce it.
Reviewed By: ABataev
Differential Revision: https://reviews.llvm.org/D100768
Added:
Modified:
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/atomic_capture_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 486b48bca0a62..1f913590339f8 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5725,32 +5725,35 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
// Emit post-update store to 'v' of old/new 'x' value.
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
- // OpenMP, 2.17.7, atomic Construct
- // If the write, update, or capture clause is specified and the release,
- // acq_rel, or seq_cst clause is specified then the strong flush on entry to
- // the atomic operation is also a release flush.
- // If the read or capture clause is specified and the acquire, acq_rel, or
- // seq_cst clause is specified then the strong flush on exit from the atomic
- // operation is also an acquire flush.
- switch (AO) {
- case llvm::AtomicOrdering::Release:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
- llvm::AtomicOrdering::Release);
- break;
- case llvm::AtomicOrdering::Acquire:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
- llvm::AtomicOrdering::Acquire);
- break;
- case llvm::AtomicOrdering::AcquireRelease:
- case llvm::AtomicOrdering::SequentiallyConsistent:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
- llvm::AtomicOrdering::AcquireRelease);
- break;
- case llvm::AtomicOrdering::Monotonic:
- break;
- case llvm::AtomicOrdering::NotAtomic:
- case llvm::AtomicOrdering::Unordered:
- llvm_unreachable("Unexpected ordering.");
+ // OpenMP 5.1 removes the required flush for capture clause.
+ if (CGF.CGM.getLangOpts().OpenMP < 51) {
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ // If the read or capture clause is specified and the acquire, acq_rel, or
+ // seq_cst clause is specified then the strong flush on exit from the atomic
+ // operation is also an acquire flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Acquire);
+ break;
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(
+ CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease);
+ break;
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
}
diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp
index 62e41c8f26934..1cdd5056397b6 100644
--- a/clang/test/OpenMP/atomic_capture_codegen.cpp
+++ b/clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -1,11 +1,19 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-50 %s
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
@@ -343,7 +351,7 @@ int main() {
// CHECK: [[IM_CAST:%.+]] = fptrunc double [[NEW_IM]] to float
// CHECK: store float [[RE_CAST]], float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 0),
// CHECK: store float [[IM_CAST]], float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 1),
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic capture seq_cst
{cdx = cdx - cdv; cfv = cdx;}
// CHECK: [[BV:%.+]] = load i8, i8* @{{.+}}
@@ -393,7 +401,7 @@ int main() {
// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
// CHECK: [[EXIT]]
// CHECK: store i8 [[NEW]], i8* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic capture, seq_cst
{cx = cx >> ucv; cv = cx;}
// CHECK: [[SV:%.+]] = load i16, i16* @{{.+}},
@@ -436,7 +444,7 @@ int main() {
// CHECK: [[OLD:%.+]] = atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst, align 4
// CHECK: [[DESIRED:%.+]] = or i32 [[EXPR]], [[OLD]]
// CHECK: store i32 [[DESIRED]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic seq_cst, capture
{uix = iv | uix; uiv = uix;}
// CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}}
@@ -904,7 +912,7 @@ int main() {
// CHECK: [[EXIT]]
// CHECK: [[NEW_VAL:%.+]] = trunc i64 [[CONV]] to i32
// CHECK: store i32 [[NEW_VAL]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic capture release
{bfx4.b /= ldv; iv = bfx4.b;}
// CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
@@ -937,7 +945,7 @@ int main() {
// CHECK: [[EXIT]]
// CHECK: [[NEW_VAL_I32:%.+]] = trunc i64 [[NEW_VAL]] to i32
// CHECK: store i32 [[NEW_VAL_I32]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic capture acquire
iv = bfx4_packed.b += ldv;
// CHECK: load i64, i64*
@@ -963,7 +971,7 @@ int main() {
// CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
// CHECK: [[EXIT]]
// CHECK: store float [[X]], float* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic capture acq_rel
{fv = float2x.x; float2x.x = ulv - float2x.x;}
// CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
@@ -973,7 +981,7 @@ int main() {
// CHECK: [[NEW_VAL:%.+]] = fptosi double [[DIV]] to i32
// CHECK: call void @llvm.write_register.i32([[REG]], i32 [[NEW_VAL]])
// CHECK: store i32 [[NEW_VAL]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
#pragma omp atomic capture seq_cst
{rix = dv / rix; iv = rix;}
// CHECK: [[OLD_VAL:%.+]] = atomicrmw xchg i32* @{{.+}}, i32 5 monotonic, align 4
More information about the cfe-commits
mailing list