[PATCH] D64585: [OpenMP] With nested parallelism, threadprivate variables become shared on outer parallel when appearing in inner parallel copyin clause

Princeton Ferro via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 11 13:19:45 PDT 2019


Prince781 created this revision.
Prince781 added reviewers: ABataev, faisalv, malcolm.parsons, efriedma, eli.friedman, maskray0, MaskRay, tareqsiraj, rsmith.
Prince781 added projects: clang, OpenMP.
Herald added subscribers: jdoerfert, jfb, guansong.

There is a bug since at least clang 8.0.0 wherein a static threadprivate variable appearing in a copyin() clause on a parallel construct (that is nested within another parallel construct) becomes shared on the outer parallel. This happens only when the threadprivate variable is backed by TLS and does not appear in global scope. Here is an example that compiles incorrectly:

  #include <omp.h>
  #include <stdbool.h>
  #include <stdio.h>
  #include <assert.h>
  #define NT 4
   
  int main(void) {
      static int threadprivate_var = 1;
      #pragma omp threadprivate(threadprivate_var)
   
      omp_set_dynamic(false);
      omp_set_num_threads(NT);
      omp_set_nested(true);
   
      #pragma omp parallel
      {
          threadprivate_var = 1;
          printf("[B] thread %d: val %d: threadprivate @ %p\n", omp_get_thread_num(), threadprivate_var, &threadprivate_var);
   
          #pragma omp master
          {
              threadprivate_var = 2;
              #pragma omp parallel copyin(threadprivate_var)
              {
                  printf("[B] thread %d, %d: val %d: threadprivate @ %p\n", omp_get_ancestor_thread_num(1), omp_get_thread_num(), threadprivate_var, &threadprivate_var);
                  // check that copyin succeeded
                  assert(threadprivate_var == 2);
              }
          }
          #pragma omp barrier
          printf("[A] thread %d: val %d: threadprivate @ %p\n", omp_get_thread_num(), threadprivate_var, &threadprivate_var);
          if (omp_get_thread_num() != 0)  // 0 is the master thread
              // non-master threads should not have seen changes
              assert(threadprivate_var == 1);
      }
  }

The resulting IR looks something like this:

  @main.threadprivate_var = internal thread_local global i32 1, align 4
  …
  main() {
     call void __kmpc_fork_call(omp_outlined_outer_parallel_region, &main.threadprivate_var)
  }
  …
  omp_outlined_outer_parallel_region(…, i32* %threadprivate_var) {
      if (I am the master thread)
          call void __kmpc_fork_call(omp_outlined_inner_parallel_region, %threadprivate_var)
  }
  …
  omp_outlined_inner_parallel_region(…, i32* %threadprivate_var) {
  }

When it should look something like this:

  @main.threadprivate_var = internal thread_local global i32 1, align 4
  …
  main() {
     call void __kmpc_fork_call(omp_outlined_outer_parallel_region)
  }
  …
  omp_outlined_outer_parallel_region(…) {
      if (I am the master thread)
          call void __kmpc_fork_call(omp_outlined_inner_parallel_region, &main.threadprivate_var)
  }
  …
  omp_outlined_inner_parallel_region(…, i32* %threadprivate_var) {
  }

Without the copyin, the function for the outer parallel region does not have the extra parameter. For the copyin clause above to work, the inner parallel needs a reference to the thread-local variable of the encountering thread (in this case, the master thread) in an extra parameter. It does not make sense for the outer parallel function(s) to capture the thread-local variable.

I’ve made a patch that prevents TLS-backed threadprivate variables from being captured in outer scopes. I don’t know if this is the best way to go about it, so I welcome feedback from someone with much more knowledge on clang’s OpenMP backend.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D64585

Files:
  clang/lib/Sema/SemaExpr.cpp
  clang/test/OpenMP/nested_parallel_threadprivate_copyin.cpp


Index: clang/test/OpenMP/nested_parallel_threadprivate_copyin.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/nested_parallel_threadprivate_copyin.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 %s -fopenmp -emit-llvm -o - | FileCheck %s
+#define NT 4                   /* default number of threads */
+
+extern "C" {
+extern int printf(const char *, ...);
+extern void assert(int);
+extern void omp_set_dynamic(bool);
+extern void omp_set_num_threads(int);
+extern void omp_set_nested(bool);
+extern int omp_get_thread_num(void);
+extern int omp_get_ancestor_thread_num(int);
+};
+
+int main(void) {
+    static int threadprivate_var = 1;
+    #pragma omp threadprivate(threadprivate_var)
+
+    // These commands are not strictly necessary, but they make it easier to
+    // see when things go wrong.
+    omp_set_dynamic(false);
+    omp_set_num_threads(NT);
+    omp_set_nested(true);
+
+    // CHECK-NOT: call void.*@__kmpc_fork_call({{.*}}%{{\w+}}threadprivate{{.*}})
+    // CHECK-NOT: define internal void @.omp_outlined.({{.*}}%threadprivate_var{{.*}})
+    #pragma omp parallel
+    {
+        threadprivate_var = 1;
+        printf("[B] thread %d: val %d: threadprivate @ %p\n", omp_get_thread_num(), threadprivate_var, &threadprivate_var);
+
+        #pragma omp master
+        {
+            threadprivate_var = 2;
+            // CHECK: define internal void @.omp_outlined..2({{.*}}%threadprivate_var{{.*}})
+            #pragma omp parallel copyin(threadprivate_var)
+            {
+                printf("[B] thread %d, %d: val %d: threadprivate @ %p\n", omp_get_ancestor_thread_num(1), omp_get_thread_num(), threadprivate_var, &threadprivate_var);
+                // check that copyin succeeded
+                assert(threadprivate_var == 2);
+            }
+        }
+        #pragma omp barrier
+        printf("[A] thread %d: val %d: threadprivate @ %p\n", omp_get_thread_num(), threadprivate_var, &threadprivate_var);
+        if (omp_get_thread_num() != 0)  // 0 is the master thread
+            // non-master threads should not have seen changes
+            assert(threadprivate_var == 1);
+    }
+}
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -15326,6 +15326,15 @@
       return true;
     }
 
+    if (getLangOpts().OpenMP && getLangOpts().OpenMPUseTLS) {
+      // Avoid capturing TLS-backed threadprivate variables in outer scopes.
+      if (VarDC->Equals(ParentDC) && Var->hasAttr<OMPThreadPrivateDeclAttr>() &&
+          IsGlobal) {
+        FunctionScopesIndex = MaxFunctionScopesIndex - 1;
+        break;
+      }
+    }
+
     FunctionScopeInfo  *FSI = FunctionScopes[FunctionScopesIndex];
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FSI);
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D64585.209308.patch
Type: text/x-patch
Size: 2876 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20190711/1bc78abf/attachment.bin>


More information about the cfe-commits mailing list