[Openmp-commits] [openmp] 56223b1 - [OpenMP] Help static loop code avoid over/underflow

via Openmp-commits openmp-commits at lists.llvm.org
Mon Feb 22 11:22:26 PST 2021


Author: Peyton, Jonathan L
Date: 2021-02-22T13:22:01-06:00
New Revision: 56223b1e91ed444a054aa9bd20878d0b62c7b53b

URL: https://github.com/llvm/llvm-project/commit/56223b1e91ed444a054aa9bd20878d0b62c7b53b
DIFF: https://github.com/llvm/llvm-project/commit/56223b1e91ed444a054aa9bd20878d0b62c7b53b.diff

LOG: [OpenMP] Help static loop code avoid over/underflow

This code alleviates some pathological loop parameters (lower,
upper, stride) within calculations involved in the static loop code.  It
bounds the chunk size to the trip count if it is greater than the trip
count and also minimizes problematic code for when trip count < nth.

Differential Revision: https://reviews.llvm.org/D96426

Added: 
    openmp/runtime/test/worksharing/for/omp_for_static_large_chunk.c

Modified: 
    openmp/runtime/src/kmp_dispatch.cpp
    openmp/runtime/src/kmp_sched.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index cf0d40144b475..17b6072a532a6 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -643,9 +643,10 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
     break;
   case kmp_sch_static_chunked:
   case kmp_sch_dynamic_chunked:
-    if (pr->u.p.parm1 <= 0) {
+    if (pr->u.p.parm1 <= 0)
       pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
-    }
+    else if (pr->u.p.parm1 > tc)
+      pr->u.p.parm1 = tc;
     KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "
                    "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
                    gtid));

diff  --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index 8da89a63b040b..7e6d6df7227f2 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -301,7 +301,8 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
       if (tid < trip_count) {
         *pupper = *plower = *plower + tid * incr;
       } else {
-        *plower = *pupper + incr;
+        // set bounds so non-active threads execute no iterations
+        *plower = *pupper + (incr > 0 ? 1 : -1);
       }
       if (plastiter != NULL)
         *plastiter = (tid == trip_count - 1);
@@ -345,15 +346,28 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
   }
   case kmp_sch_static_chunked: {
     ST span;
-    if (chunk < 1) {
+    UT nchunks;
+    if (chunk < 1)
       chunk = 1;
-    }
+    else if ((UT)chunk > trip_count)
+      chunk = trip_count;
+    nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
     span = chunk * incr;
-    *pstride = span * nth;
-    *plower = *plower + (span * tid);
-    *pupper = *plower + span - incr;
+    if (nchunks < nth) {
+      *pstride = span * nchunks;
+      if (tid < nchunks) {
+        *plower = *plower + (span * tid);
+        *pupper = *plower + span - incr;
+      } else {
+        *plower = *pupper + (incr > 0 ? 1 : -1);
+      }
+    } else {
+      *pstride = span * nth;
+      *plower = *plower + (span * tid);
+      *pupper = *plower + span - incr;
+    }
     if (plastiter != NULL)
-      *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
+      *plastiter = (tid == (nchunks - 1) % nth);
     break;
   }
   case kmp_sch_static_balanced_chunked: {

diff  --git a/openmp/runtime/test/worksharing/for/omp_for_static_large_chunk.c b/openmp/runtime/test/worksharing/for/omp_for_static_large_chunk.c
new file mode 100644
index 0000000000000..c0a1d7ccaf2d1
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/omp_for_static_large_chunk.c
@@ -0,0 +1,112 @@
+// RUN: %libomp-compile
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 5 5005 500 1000000000
+// It fails using gcc compilers because the gcc compiler does not use any
+// runtime interface to calculate the iterations for static loop schedule
+// Hence, the runtime is never involved.
+// XFAIL: gcc
+//
+// This test makes sure that large chunks sizes are handled correctly
+// including internal runtime calculations which incorporate the chunk size
+#include <stdio.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#ifndef DEBUG_OUTPUT
+#define DEBUG_OUTPUT 0
+#endif
+
+// Used in qsort() to compare integers
+int compare_ints(const void *v1, const void *v2) {
+  int i1 = *(const int *)v1;
+  int i2 = *(const int *)v2;
+  return i1 - i2;
+}
+
+int main(int argc, char **argv) {
+  int i, j, lb, ub, stride, nthreads, chunk;
+  int num_iters = 0;
+  int counted_iters = 0;
+  int errs = 0;
+  if (argc != 5) {
+    fprintf(stderr, "error: incorrect number of arguments\n");
+    fprintf(stderr, "usage: %s <lb> <ub> <stride> <chunk>\n", argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  lb = atoi(argv[1]);
+  ub = atoi(argv[2]);
+  stride = atoi(argv[3]);
+  chunk = atoi(argv[4]);
+  nthreads = omp_get_max_threads();
+  if (lb >= ub) {
+    fprintf(stderr, "error: lb must be less than ub\n");
+    exit(EXIT_FAILURE);
+  }
+  if (stride <= 0) {
+    fprintf(stderr, "error: stride must be positive integer\n");
+    exit(EXIT_FAILURE);
+  }
+  if (chunk <= 0) {
+    fprintf(stderr, "error: chunk must be positive integer\n");
+    exit(EXIT_FAILURE);
+  }
+  for (i = lb; i < ub; i += stride)
+    num_iters++;
+  // Thread private record of iterations each thread performed
+  int *iters = (int *)malloc(sizeof(int) * nthreads * num_iters);
+  // This will be the list of all iteration performed by every thread
+  int *final_iters = (int *)malloc(sizeof(int) * nthreads * num_iters);
+  for (i = 0; i < nthreads * num_iters; ++i) {
+    iters[i] = -1;
+    final_iters[i] = -1;
+  }
+
+  #pragma omp parallel num_threads(nthreads)
+  {
+    int j = 0;
+    int *my_iters = iters + omp_get_thread_num() * num_iters;
+    #pragma omp for schedule(static, chunk)
+    for (i = lb; i < ub; i += stride) {
+      #pragma omp atomic
+      counted_iters++;
+      my_iters[j++] = i;
+    }
+  }
+
+  // Put all iterations into final_iters then sort it from lowest to highest
+  for (i = 0, j = 0; i < nthreads * num_iters; ++i) {
+    if (iters[i] != -1)
+      final_iters[j++] = iters[i];
+  }
+  if (j != counted_iters) {
+    fprintf(stderr, "error: wrong number of final iterations counted!\n");
+    exit(EXIT_FAILURE);
+  }
+  qsort(final_iters, j, sizeof(int), compare_ints);
+
+  // Check for the right number of iterations
+  if (counted_iters != num_iters) {
+    fprintf(stderr, "error: wrong number of iterations executed. Expected %d "
+                    "but executed %d\n",
+            num_iters, counted_iters);
+    exit(EXIT_FAILURE);
+  }
+
+#if DEBUG_OUTPUT
+  for (i = 0; i < num_iters; ++i)
+    printf("final_iters[%d] = %d\n", i, final_iters[i]);
+#endif
+
+  // Check that the iterations performed were correct
+  for (i = lb, j = 0; i < ub; i += stride, ++j) {
+    if (final_iters[j] != i) {
+      fprintf(stderr,
+              "error: iteration j=%d i=%d is incorrect. Expect %d but see %d\n",
+              j, i, i, final_iters[j]);
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  free(iters);
+  free(final_iters);
+  return EXIT_SUCCESS;
+}


        


More information about the Openmp-commits mailing list