[clang] 3274cdc - [Clang][OpenMP] Remove the mandatory flush for capture for OpenMP 5.1

Shilei Tian via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 26 08:00:49 PDT 2021


Author: Shilei Tian
Date: 2021-07-26T11:00:44-04:00
New Revision: 3274cdc83ecdf2af569ad4f564d55d0e43b1072e

URL: https://github.com/llvm/llvm-project/commit/3274cdc83ecdf2af569ad4f564d55d0e43b1072e
DIFF: https://github.com/llvm/llvm-project/commit/3274cdc83ecdf2af569ad4f564d55d0e43b1072e.diff

LOG: [Clang][OpenMP] Remove the mandatory flush for capture for OpenMP 5.1

In OpenMP 5.1:
> If the `write` or `update` clause is specifieded, the atomic operation is not an atomic conditional update for which the comparison fails, and the effective memory ordering is `release`, `acq_rel`, or `seq_cst`, the strong flush on entry to the atomic operation is also a release flush. If the `read` or `update` clause is specified and the effective memory ordering is `acquire`, `acq_rel`, or `seq_cst` then the strong flush on exit from the atomic operation is also an acquire flush.

In OpenMP 5.0:
> If the `write`, `update`, or **`capture`** clause is specified and the `release`, `acq_rel`, or `seq_cst` clause is specified then the strong flush on entry to the atomic operation is also a release flush. If the `read` or `capture` clause is specified and the `acquire`, `acq_rel`, or `seq_cst` clause is specified then the strong flush on exit from the atomic operation is also an acquire flush.

>From my understanding, in OpenMP 5.1, `capture` is removed from the requirement for flush, therefore we don't have to enforce it.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D100768

Added: 
    

Modified: 
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/test/OpenMP/atomic_capture_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 486b48bca0a62..1f913590339f8 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5725,32 +5725,35 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
   // Emit post-update store to 'v' of old/new 'x' value.
   CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
   CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
-  // OpenMP, 2.17.7, atomic Construct
-  // If the write, update, or capture clause is specified and the release,
-  // acq_rel, or seq_cst clause is specified then the strong flush on entry to
-  // the atomic operation is also a release flush.
-  // If the read or capture clause is specified and the acquire, acq_rel, or
-  // seq_cst clause is specified then the strong flush on exit from the atomic
-  // operation is also an acquire flush.
-  switch (AO) {
-  case llvm::AtomicOrdering::Release:
-    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
-                                         llvm::AtomicOrdering::Release);
-    break;
-  case llvm::AtomicOrdering::Acquire:
-    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
-                                         llvm::AtomicOrdering::Acquire);
-    break;
-  case llvm::AtomicOrdering::AcquireRelease:
-  case llvm::AtomicOrdering::SequentiallyConsistent:
-    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
-                                         llvm::AtomicOrdering::AcquireRelease);
-    break;
-  case llvm::AtomicOrdering::Monotonic:
-    break;
-  case llvm::AtomicOrdering::NotAtomic:
-  case llvm::AtomicOrdering::Unordered:
-    llvm_unreachable("Unexpected ordering.");
+  // OpenMP 5.1 removes the required flush for capture clause.
+  if (CGF.CGM.getLangOpts().OpenMP < 51) {
+    // OpenMP, 2.17.7, atomic Construct
+    // If the write, update, or capture clause is specified and the release,
+    // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+    // the atomic operation is also a release flush.
+    // If the read or capture clause is specified and the acquire, acq_rel, or
+    // seq_cst clause is specified then the strong flush on exit from the atomic
+    // operation is also an acquire flush.
+    switch (AO) {
+    case llvm::AtomicOrdering::Release:
+      CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+                                           llvm::AtomicOrdering::Release);
+      break;
+    case llvm::AtomicOrdering::Acquire:
+      CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+                                           llvm::AtomicOrdering::Acquire);
+      break;
+    case llvm::AtomicOrdering::AcquireRelease:
+    case llvm::AtomicOrdering::SequentiallyConsistent:
+      CGF.CGM.getOpenMPRuntime().emitFlush(
+          CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease);
+      break;
+    case llvm::AtomicOrdering::Monotonic:
+      break;
+    case llvm::AtomicOrdering::NotAtomic:
+    case llvm::AtomicOrdering::Unordered:
+      llvm_unreachable("Unexpected ordering.");
+    }
   }
 }
 

diff  --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp
index 62e41c8f26934..1cdd5056397b6 100644
--- a/clang/test/OpenMP/atomic_capture_codegen.cpp
+++ b/clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -1,11 +1,19 @@
 
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-50 %s
 
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 #ifndef HEADER
@@ -343,7 +351,7 @@ int main() {
 // CHECK: [[IM_CAST:%.+]] = fptrunc double [[NEW_IM]] to float
 // CHECK: store float [[RE_CAST]], float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 0),
 // CHECK: store float [[IM_CAST]], float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 1),
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture seq_cst
   {cdx = cdx - cdv; cfv = cdx;}
 // CHECK: [[BV:%.+]] = load i8, i8* @{{.+}}
@@ -393,7 +401,7 @@ int main() {
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 // CHECK: store i8 [[NEW]], i8* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture, seq_cst
   {cx = cx >> ucv; cv = cx;}
 // CHECK: [[SV:%.+]]  = load i16, i16* @{{.+}},
@@ -436,7 +444,7 @@ int main() {
 // CHECK: [[OLD:%.+]] = atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst, align 4
 // CHECK: [[DESIRED:%.+]] = or i32 [[EXPR]], [[OLD]]
 // CHECK: store i32 [[DESIRED]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic seq_cst, capture
   {uix = iv | uix; uiv = uix;}
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}}
@@ -904,7 +912,7 @@ int main() {
 // CHECK: [[EXIT]]
 // CHECK: [[NEW_VAL:%.+]] = trunc i64 [[CONV]] to i32
 // CHECK: store i32 [[NEW_VAL]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture release
   {bfx4.b /= ldv; iv = bfx4.b;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
@@ -937,7 +945,7 @@ int main() {
 // CHECK: [[EXIT]]
 // CHECK: [[NEW_VAL_I32:%.+]] = trunc i64 [[NEW_VAL]] to i32
 // CHECK: store i32 [[NEW_VAL_I32]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture acquire
   iv = bfx4_packed.b += ldv;
 // CHECK: load i64, i64*
@@ -963,7 +971,7 @@ int main() {
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
 // CHECK: [[EXIT]]
 // CHECK: store float [[X]], float* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture acq_rel
   {fv = float2x.x; float2x.x = ulv - float2x.x;}
 // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
@@ -973,7 +981,7 @@ int main() {
 // CHECK: [[NEW_VAL:%.+]] = fptosi double [[DIV]] to i32
 // CHECK: call void @llvm.write_register.i32([[REG]], i32 [[NEW_VAL]])
 // CHECK: store i32 [[NEW_VAL]], i32* @{{.+}},
-// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK-50: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture seq_cst
   {rix = dv / rix; iv = rix;}
 // CHECK: [[OLD_VAL:%.+]] = atomicrmw xchg i32* @{{.+}}, i32 5 monotonic, align 4


        


More information about the cfe-commits mailing list