[Openmp-commits] [openmp] add loop collapse tests (PR #86243)

Vadim Paretsky via Openmp-commits openmp-commits at lists.llvm.org
Thu Mar 21 21:35:04 PDT 2024


https://github.com/vadikp-intel created https://github.com/llvm/llvm-project/pull/86243

This PR adds loop collapse tests ported from MSVC.

>From 2b7d3459db79f6882891e6714550d0d192fe0e7d Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Fri, 15 Mar 2024 17:38:36 -0700
Subject: [PATCH 1/5] rectangular loop collapse tests

---
 .../for/collapse_many_GELTGT_int.c            | 54 +++++++++++++++++
 .../for/collapse_many_GTGEGT_int.c            | 60 +++++++++++++++++++
 .../for/collapse_many_LTLEGE_int.c            | 55 +++++++++++++++++
 .../test/worksharing/for/collapse_many_int.c  | 52 ++++++++++++++++
 .../test/worksharing/for/collapse_one_int.c   | 25 ++++++++
 5 files changed, 246 insertions(+)
 create mode 100644 openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c
 create mode 100644 openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c
 create mode 100644 openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c
 create mode 100644 openmp/runtime/test/worksharing/for/collapse_many_int.c
 create mode 100644 openmp/runtime/test/worksharing/for/collapse_one_int.c

diff --git a/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c b/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c
new file mode 100644
index 00000000000000..23808244db4475
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c
@@ -0,0 +1,54 @@
+// RUN: %libomp-compile-and-run
+
+// Non-rectangular loop collapsing.
+//
+// Nested loops conform to OpenMP 5.2 standard,
+// inner loops bounds may depend on outer loops induction variables.
+
+#define LOOP_TYPES int
+#define COMPARE0 >=
+#define COMPARE1 <
+#define COMPARE2 >
+#define LOOP                                                                    \
+        for (i = iLB; i COMPARE0 iUB; i += iStep)                               \
+            for (j = jA0; j COMPARE1 jB0; j += jStep)                           \
+                for (k = kA0; k COMPARE2 kB0; k += kStep)
+#include "collapse_test.inc"
+
+int main() {
+  int fail;
+
+  iLB = 3; iUB = -2; jA0 = -3; jA1 = 0; jB0 = -6; jB1 = 0; kA0 = -2; kA1 = 0; kB0 = -4; kB1 = 0; iStep = -1; jStep = -1; kStep = -4;
+  PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
+         "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
+  fail = (test() == 0);
+
+  if (!fail) {
+    for (iStep = -3; iStep >= -6; iStep -= 2) {
+      for (jA0 = -6; jA0 <= 6; jA0 += 3) {
+        for (jB0 = -3; jB0 <= 10; jB0 += 3) {
+          for (jStep = 1; jStep <= 10; jStep += 2) {
+            for (kA0 = -2; kA0 <= 4; ++kA0) {
+              for (kB0 = -4; kB0 <= 2; ++kB0) {
+                for (kStep = -2; kStep >= -10; kStep -= 4) {
+                  {
+                    PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
+                           "jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
+                           "jStep=%d; kStep=%d;\n",
+                           iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
+                           iStep, jStep, kStep);
+                    fail = fail || (test() == 0);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+return fail;
+}
+
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c b/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c
new file mode 100644
index 00000000000000..9a10b5d01895cd
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c
@@ -0,0 +1,60 @@
+// RUN: %libomp-compile-and-run
+
+// Non-rectangular loop collapsing.
+//
+// Nested loops conform to OpenMP 5.2 standard,
+// inner loops bounds may depend on outer loops induction variables.
+
+#define LOOP_TYPES int
+#define COMPARE0 >
+#define COMPARE1 >=
+#define COMPARE2 >
+
+#define DLOOP_GT0
+#define DLOOP_GE1
+#define DLOOP_GT2
+
+#define LOOP                                                                    \
+        for (i = iLB; i COMPARE0 iUB; i += iStep)                               \
+            for (j = jA0; j COMPARE1 jB0; j += jStep)                           \
+                for (k = kA0; k COMPARE2 kB0; k += kStep)
+#include "collapse_test.inc"
+
+int main() {
+  int fail;
+
+  iLB = 3; iUB = -2; jA0 = -3; jA1 = 0; jB0 = -6; jB1 = 0; kA0 = -2; kA1 = 0; kB0 = -4; kB1 = 0; iStep = -1; jStep = -1; kStep = -4;
+  PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
+         "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
+  fail = (test() == 0);
+
+  if (!fail) {
+
+    for (iStep = -3; iStep >= -6; iStep -= 2) {
+      for (jA0 = -3; jA0 <= 10; jA0 += 3) {
+        for (jB0 = -6; jB0 <= 6; jB0 += 3) {
+          for (jStep = -1; jStep >= -10; jStep -= 2) {
+            for (kA0 = -2; kA0 <= 4; ++kA0) {
+              for (kB0 = -4; kB0 <= 2; ++kB0) {
+                for (kStep = -2; kStep >= -10; kStep -= 4) {
+                  {
+                    PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
+                           "jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
+                           "jStep=%d; kStep=%d;\n",
+                           iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
+                           iStep, jStep, kStep);
+                    fail = fail || (test() == 0);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+return fail;
+}
+
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c b/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c
new file mode 100644
index 00000000000000..171b02daf8b24f
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c
@@ -0,0 +1,55 @@
+// RUN: %libomp-compile-and-run
+
+// Non-rectangular loop collapsing.
+//
+// Nested loops conform to OpenMP 5.2 standard,
+// inner loops bounds may depend on outer loops induction variables.
+
+#define LOOP_TYPES int
+#define COMPARE0 <
+#define COMPARE1 <=
+#define COMPARE2 >=
+#define LOOP                                                                    \
+        for (i = iLB; i COMPARE0 iUB; i += iStep)                               \
+            for (j = jA0; j COMPARE1 jB0; j += jStep)                           \
+                for (k = kA0; k COMPARE2 kB0; k += kStep)
+#include "collapse_test.inc"
+
+int main() {
+  int fail;
+
+  iLB = -2; iUB = 3; jA0 = -3; jA1 = 0; jB0 = -6; jB1 = 0; kA0 = -2; kA1 = 0; kB0 = -4; kB1 = 0; iStep = -1; jStep = -1; kStep = -4;
+  PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
+         "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
+  fail = (test() == 0);
+
+  if (!fail) {
+
+    for (iStep = 2; iStep <= 6; iStep += 2) {
+      for (jA0 = -6; jA0 <= 6; jA0 += 3) {
+        for (jB0 = -3; jB0 <= 10; jB0 += 3) {
+          for (jStep = 1; jStep <= 10; jStep += 2) {
+            for (kA0 = -2; kA0 <= 4; ++kA0) {
+              for (kB0 = -4; kB0 <= 2; ++kB0) {
+                for (kStep = -2; kStep >= -10; kStep -= 4) {
+                  {
+                    PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
+                           "jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
+                           "jStep=%d; kStep=%d;\n",
+                           iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
+                           iStep, jStep, kStep);
+                    fail = fail || (test() == 0);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+return fail;
+}
+
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_int.c b/openmp/runtime/test/worksharing/for/collapse_many_int.c
new file mode 100644
index 00000000000000..d834c5014ed5df
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/collapse_many_int.c
@@ -0,0 +1,52 @@
+// RUN: %libomp-compile-and-run
+
+// Non-rectangular loop collapsing.
+//
+// Nested loops conform to OpenMP 5.2 standard,
+// inner loops bounds may depend on outer loops induction variables.
+
+#define LOOP_TYPES int
+#define LOOP                                                                   \
+  for (i = iLB; i <= iUB; i += iStep)                                          \
+    for (j = i * jA1 + jA0; j <= i * jB1 + jB0; j += jStep)                    \
+      for (k = j * kA1 + kA0; k <= j * kB1 + kB0; k += kStep)
+#include "collapse_test.inc"
+
+int main()
+{
+    int fail = 0;
+
+    iLB = -2; iUB = 3; jA0 = -7; jA1 = -1; jB0 = 13; jB1 = 3; kA0 = -20; kA1 = -2; kB0 = 111; kB1 = -1; iStep = 5; jStep = 9; kStep = 10;
+    PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+        iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
+    fail = fail || (test()==0);
+
+    if (!fail) {
+
+        // NOTE: if a loop on some level won't execute  for all iterations of an outer loop, it still should work. 
+        // Runtime doesn't require lower bounds to be <= upper bounds for all possible i, j, k.
+
+        iLB = -2; iUB = 3; jA0 = -7; jB0 = 5; kA0 = -13; kB0 = 37;
+
+        for (kA1 = -2; kA1 <= 2; ++kA1) {                                       // <=
+            for (kB1 = -2; kB1 <= 2; ++kB1) {
+                for (jA1 = -3; jA1 <= 3; ++jA1) {
+                    for (jB1 = -3; jB1 <= 3; ++jB1) {
+                        for (iStep = 1; iStep <= 3; ++iStep) {
+                            for (jStep = 2; jStep <= 6; jStep += 2) {
+                                for (kStep = 2; kStep <= 8; kStep += 3) {
+                                    PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+                                         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
+                                    fail = fail || (test() == 0);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return fail;
+}
+
diff --git a/openmp/runtime/test/worksharing/for/collapse_one_int.c b/openmp/runtime/test/worksharing/for/collapse_one_int.c
new file mode 100644
index 00000000000000..122f9a5a87b160
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/collapse_one_int.c
@@ -0,0 +1,25 @@
+// RUN: %libomp-compile-and-run
+
+// Non-rectangular loop collapsing.
+//
+// Nested loops conform to OpenMP 5.2 standard,
+// inner loops bounds may depend on outer loops induction variables.
+
+#define LOOP_TYPES int
+#define LOOP                                                                   \
+  for (i = iLB; i <= iUB; i += iStep)                                          \
+    for (j = i + jA0; j <= i + jB0; j += jStep)                                \
+      for (k = j + kA0; k <= j + kB0; k += kStep)
+
+#include "collapse_test.inc"
+
+int main()
+{
+    int fail;
+    iLB = -2; iUB = 3; jA0 = -7; jB0 = 13; kA0 = -20; kB0 = 111; iStep = 5; jStep = 9; kStep = 10;
+    PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jB0=%d; kA0=%d; kB0=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+        iLB, iUB, jA0, jB0, kA0, kB0, iStep, jStep, kStep);
+    fail = (test() == 0);
+    return fail;
+}
+

>From 68bfd9648c9af499ede6d74290ad2ea5f76c2ec0 Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Thu, 21 Mar 2024 21:01:09 -0700
Subject: [PATCH 2/5] missing file

---
 .../test/worksharing/for/collapse_test.inc    | 204 ++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 openmp/runtime/test/worksharing/for/collapse_test.inc

diff --git a/openmp/runtime/test/worksharing/for/collapse_test.inc b/openmp/runtime/test/worksharing/for/collapse_test.inc
new file mode 100644
index 00000000000000..8c88dc9860626e
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/collapse_test.inc
@@ -0,0 +1,204 @@
+#include <omp.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <memory.h>
+
+#define LOOP_IV_TYPE0 LOOP_TYPES
+#define LOOP_TYPE0 LOOP_TYPES
+#define LOOP_STYPE0 LOOP_TYPES
+
+#define LOOP_IV_TYPE1 LOOP_TYPES
+#define LOOP_TYPE1 LOOP_TYPES
+#define LOOP_STYPE1 LOOP_TYPES
+
+#define LOOP_IV_TYPE2 LOOP_TYPES
+#define LOOP_TYPE2 LOOP_TYPES
+#define LOOP_STYPE2 LOOP_TYPES
+
+#define MAX_THREADS 256
+
+#if defined VERBOSE
+#define PRINTF printf
+#else
+#define PRINTF
+#endif
+
+LOOP_TYPE0 iLB, iUB;
+LOOP_TYPE1 jA0, jB0;
+LOOP_TYPE2 kA0, kB0;
+
+LOOP_STYPE0 iStep;
+LOOP_STYPE1 jA1, jB1, jStep;
+LOOP_STYPE2 kA1, kB1, kStep;
+
+// We can check <=, <, >=, > (!= has different pattern)
+// Additional definition of LOOP_LEi, LOOP_LTi, etc. is helpful to build calls
+// of the test from main
+
+#if defined LOOP_LE0
+#define COMPARE0 <=
+#elif defined LOOP_LT0
+#define COMPARE0 <
+#elif defined LOOP_GE0
+#define COMPARE0 >=
+#elif defined LOOP_GT0
+#define COMPARE0 >
+#endif
+
+#if defined LOOP_LE1
+#define COMPARE1 <=
+#elif defined LOOP_LT1
+#define COMPARE1 <
+#elif defined LOOP_GE1
+#define COMPARE1 >=
+#elif defined LOOP_GT1
+#define COMPARE1 >
+#endif
+
+#if defined LOOP_LE2
+#define COMPARE2 <=
+#elif defined LOOP_LT2
+#define COMPARE2 <
+#elif defined LOOP_GE2
+#define COMPARE2 >=
+#elif defined LOOP_GT2
+#define COMPARE2 >
+#endif
+
+
+typedef struct
+{
+    LOOP_IV_TYPE0 i;
+    LOOP_IV_TYPE1 j;
+    LOOP_IV_TYPE2 k;
+} spaceType;
+
+spaceType* AllocSpace(unsigned size)
+{
+
+    spaceType *p = (spaceType*) malloc(size * sizeof(spaceType));
+    memset(p, 0, size * sizeof(spaceType));
+    return p;
+}
+
+void FreeSpace(spaceType* space)
+{
+    free(space);
+}
+
+// record an iteration
+void Set(spaceType* space, unsigned count, unsigned trueCount, LOOP_IV_TYPE0 i, LOOP_IV_TYPE1 j, LOOP_IV_TYPE0 k)
+{
+    if (count > trueCount) {
+        // number of iterations exceeded
+        // will be reported with checks
+        return;
+    }
+    space[count-1].i = i;
+    space[count-1].j = j;
+    space[count-1].k = k;
+}
+int test()
+{
+    int pass = 1;
+    LOOP_IV_TYPE0 i;
+    LOOP_IV_TYPE1 j;
+    LOOP_IV_TYPE2 k;
+
+    spaceType* openmpSpace;
+    spaceType* scalarSpace;
+
+    unsigned trueCount = 0;
+    unsigned openmpCount = 0;
+    unsigned scalarCount = 0;
+    unsigned uselessThreadsOpenMP = 0;
+    unsigned usefulThreadsOpenMP = 0;
+    unsigned chunkSizesOpenmp[MAX_THREADS] = {0};
+
+    unsigned num_threads = omp_get_max_threads();
+    if (num_threads > MAX_THREADS) num_threads = MAX_THREADS;
+    omp_set_num_threads(num_threads);
+
+    // count iterations and allocate space
+    LOOP {
+           ++trueCount;
+         }
+    
+    openmpSpace = AllocSpace(trueCount);
+    scalarSpace = AllocSpace(trueCount);
+
+    // fill the scalar (compare) space
+    LOOP {
+           ++scalarCount;
+           Set(scalarSpace, scalarCount, trueCount, i, j, k);
+         }
+
+    // test run body:
+    // perform and record OpenMP iterations and thread use
+#pragma omp parallel num_threads(num_threads)
+    {
+#pragma omp for collapse(3) private (i, j, k)
+       LOOP 
+       {
+                unsigned count;
+                unsigned gtid = omp_get_thread_num();
+#pragma omp atomic update
+                ++chunkSizesOpenmp[gtid];
+#pragma omp atomic capture
+                count = ++openmpCount;
+                Set(openmpSpace, count, trueCount, i, j, k);
+       }
+    }
+
+    // check for the right number of iterations processed
+    // (only need to check for less, greater is checked when recording)
+    if (openmpCount < trueCount) {
+        PRINTF("OpenMP FAILURE: Openmp processed fewer iterations: %d vs %d\n", openmpCount, trueCount);
+        pass = 0;
+    } else if (openmpCount > trueCount) {
+        PRINTF("OpenMP FAILURE: Openmp processed more iterations: %d vs %d\n", openmpCount, trueCount);
+        pass = 0;
+    }
+
+    // check openMP for iteration correctnes against scalar
+    for (unsigned i = 0; i < trueCount; i++) {
+      unsigned j;
+      for (j = 0; j < openmpCount; j++) {
+        if ((scalarSpace[i].i == openmpSpace[j].i) && (scalarSpace[i].j == openmpSpace[j].j) && (scalarSpace[i].k == openmpSpace[j].k)) {
+            break;
+        }
+      }
+      if (j == openmpCount) {
+            PRINTF("OpenMP FAILURE: (%d %d %d) not processed\n", scalarSpace[i].i, scalarSpace[i].j, scalarSpace[i].k);
+            pass = 0;
+      }
+    }
+    
+    // check for efficient thread use
+    for (unsigned i = 0; i < num_threads; ++i) {
+        if (chunkSizesOpenmp[i] == 0) {
+            ++uselessThreadsOpenMP;
+        }
+    }
+
+    // a check to see if at least more than one thread was used (weakish)
+    if ((uselessThreadsOpenMP == num_threads - 1) && (trueCount > 1)) {
+        PRINTF("OpenMP FAILURE: threads are not used\n");
+        pass = 0;
+    }
+
+#if 0
+    // a check to see if the load was spread more or less evenly so that
+    // when there was more work than threads each one got at least something 
+    // (stronger, but may currently fail for a general collapse case)
+    if ((trueCount >= num_threads) && (uselessThreadsOpenMP > 0)) {
+       PRINTF("OpenMP FAILURE: %d threads not used with %d iterations\n", uselessThreadsOpenMP, openmpCount);
+       pass = 0;
+    }
+#endif
+
+    // clean up space
+    FreeSpace(openmpSpace);
+    FreeSpace(scalarSpace);
+    return pass;
+}

>From 2f007182addfc760965b69948900ec3140d66b82 Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Thu, 21 Mar 2024 21:11:54 -0700
Subject: [PATCH 3/5] updated comment

---
 openmp/runtime/src/kmp_collapse.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/openmp/runtime/src/kmp_collapse.cpp b/openmp/runtime/src/kmp_collapse.cpp
index 569d2c1508319d..52e0f194e0f25a 100644
--- a/openmp/runtime/src/kmp_collapse.cpp
+++ b/openmp/runtime/src/kmp_collapse.cpp
@@ -1517,16 +1517,11 @@ void kmp_handle_upper_triangle_matrix(
   kmp_uint64 iter_with_current = iter_before_current + iter_current;
   // calculate the outer loop lower bound (lbo) which is the max outer iv value
   // that gives the number of iterations that is equal or just below the total
-  // number of iterations executed by the previous threads, for less_than
-  // (1-based) inner loops (inner_ub0 == -1) it will be i.e.
-  // lbo*(lbo-1)/2<=iter_before_current => lbo^2-lbo-2*iter_before_current<=0
-  // for less_than_equal (0-based) inner loops (inner_ub == 0) it will be:
-  // i.e. lbo*(lbo+1)/2<=iter_before_current =>
-  // lbo^2+lbo-2*iter_before_current<=0 both cases can be handled similarily
-  // using a parameter to control the equatio sign
+  // number of iterations executed by the previous threads,
+  // lbo*(lbo+1)/2<=iter_before_current =>
+  // lbo^2+lbo-2*iter_before_current<=0
   kmp_uint64 lower_bound_outer =
       (kmp_uint64)(sqrt_newton_approx(1 + 8 * iter_before_current) + 1) / 2 - 1;
-  ;
   // calculate the inner loop lower bound which is the remaining number of
   // iterations required to hit the total number of iterations executed by the
   // previous threads giving the starting point of this thread

>From ea5890cabc72fd6c4cd8f15644548111f2375065 Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Thu, 21 Mar 2024 21:13:14 -0700
Subject: [PATCH 4/5] disabled a failing test

---
 openmp/runtime/test/worksharing/for/collapse_many_int.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openmp/runtime/test/worksharing/for/collapse_many_int.c b/openmp/runtime/test/worksharing/for/collapse_many_int.c
index d834c5014ed5df..6a126e1b49b31b 100644
--- a/openmp/runtime/test/worksharing/for/collapse_many_int.c
+++ b/openmp/runtime/test/worksharing/for/collapse_many_int.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile-and-run
+// XFAIL: true
 
 // Non-rectangular loop collapsing.
 //

>From d8655f4ffed2b3e9c99c4a9e1f7e0c567ff8920f Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Thu, 21 Mar 2024 21:15:35 -0700
Subject: [PATCH 5/5] clang format

---
 openmp/runtime/src/kmp_collapse.cpp           |  2 +-
 .../for/collapse_many_GELTGT_int.c            | 25 ++++--
 .../for/collapse_many_GTGEGT_int.c            | 25 ++++--
 .../for/collapse_many_LTLEGE_int.c            | 25 ++++--
 .../test/worksharing/for/collapse_many_int.c  | 84 ++++++++++++-------
 .../test/worksharing/for/collapse_one_int.c   | 25 ++++--
 6 files changed, 123 insertions(+), 63 deletions(-)

diff --git a/openmp/runtime/src/kmp_collapse.cpp b/openmp/runtime/src/kmp_collapse.cpp
index 52e0f194e0f25a..e63a98081db9b8 100644
--- a/openmp/runtime/src/kmp_collapse.cpp
+++ b/openmp/runtime/src/kmp_collapse.cpp
@@ -1517,7 +1517,7 @@ void kmp_handle_upper_triangle_matrix(
   kmp_uint64 iter_with_current = iter_before_current + iter_current;
   // calculate the outer loop lower bound (lbo) which is the max outer iv value
   // that gives the number of iterations that is equal or just below the total
-  // number of iterations executed by the previous threads,
+  // number of iterations executed by the previous threads:
   // lbo*(lbo+1)/2<=iter_before_current =>
   // lbo^2+lbo-2*iter_before_current<=0
   kmp_uint64 lower_bound_outer =
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c b/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c
index 23808244db4475..77b2d6918d8721 100644
--- a/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c
+++ b/openmp/runtime/test/worksharing/for/collapse_many_GELTGT_int.c
@@ -9,16 +9,28 @@
 #define COMPARE0 >=
 #define COMPARE1 <
 #define COMPARE2 >
-#define LOOP                                                                    \
-        for (i = iLB; i COMPARE0 iUB; i += iStep)                               \
-            for (j = jA0; j COMPARE1 jB0; j += jStep)                           \
-                for (k = kA0; k COMPARE2 kB0; k += kStep)
+#define LOOP                                                                   \
+  for (i = iLB; i COMPARE0 iUB; i += iStep)                                    \
+    for (j = jA0; j COMPARE1 jB0; j += jStep)                                  \
+      for (k = kA0; k COMPARE2 kB0; k += kStep)
 #include "collapse_test.inc"
 
 int main() {
   int fail;
 
-  iLB = 3; iUB = -2; jA0 = -3; jA1 = 0; jB0 = -6; jB1 = 0; kA0 = -2; kA1 = 0; kB0 = -4; kB1 = 0; iStep = -1; jStep = -1; kStep = -4;
+  iLB = 3;
+  iUB = -2;
+  jA0 = -3;
+  jA1 = 0;
+  jB0 = -6;
+  jB1 = 0;
+  kA0 = -2;
+  kA1 = 0;
+  kB0 = -4;
+  kB1 = 0;
+  iStep = -1;
+  jStep = -1;
+  kStep = -4;
   PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
          "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
          iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
@@ -49,6 +61,5 @@ int main() {
     }
   }
 
-return fail;
+  return fail;
 }
-
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c b/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c
index 9a10b5d01895cd..985211172e6283 100644
--- a/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c
+++ b/openmp/runtime/test/worksharing/for/collapse_many_GTGEGT_int.c
@@ -14,16 +14,28 @@
 #define DLOOP_GE1
 #define DLOOP_GT2
 
-#define LOOP                                                                    \
-        for (i = iLB; i COMPARE0 iUB; i += iStep)                               \
-            for (j = jA0; j COMPARE1 jB0; j += jStep)                           \
-                for (k = kA0; k COMPARE2 kB0; k += kStep)
+#define LOOP                                                                   \
+  for (i = iLB; i COMPARE0 iUB; i += iStep)                                    \
+    for (j = jA0; j COMPARE1 jB0; j += jStep)                                  \
+      for (k = kA0; k COMPARE2 kB0; k += kStep)
 #include "collapse_test.inc"
 
 int main() {
   int fail;
 
-  iLB = 3; iUB = -2; jA0 = -3; jA1 = 0; jB0 = -6; jB1 = 0; kA0 = -2; kA1 = 0; kB0 = -4; kB1 = 0; iStep = -1; jStep = -1; kStep = -4;
+  iLB = 3;
+  iUB = -2;
+  jA0 = -3;
+  jA1 = 0;
+  jB0 = -6;
+  jB1 = 0;
+  kA0 = -2;
+  kA1 = 0;
+  kB0 = -4;
+  kB1 = 0;
+  iStep = -1;
+  jStep = -1;
+  kStep = -4;
   PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
          "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
          iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
@@ -55,6 +67,5 @@ int main() {
     }
   }
 
-return fail;
+  return fail;
 }
-
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c b/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c
index 171b02daf8b24f..47e3b42226c838 100644
--- a/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c
+++ b/openmp/runtime/test/worksharing/for/collapse_many_LTLEGE_int.c
@@ -9,16 +9,28 @@
 #define COMPARE0 <
 #define COMPARE1 <=
 #define COMPARE2 >=
-#define LOOP                                                                    \
-        for (i = iLB; i COMPARE0 iUB; i += iStep)                               \
-            for (j = jA0; j COMPARE1 jB0; j += jStep)                           \
-                for (k = kA0; k COMPARE2 kB0; k += kStep)
+#define LOOP                                                                   \
+  for (i = iLB; i COMPARE0 iUB; i += iStep)                                    \
+    for (j = jA0; j COMPARE1 jB0; j += jStep)                                  \
+      for (k = kA0; k COMPARE2 kB0; k += kStep)
 #include "collapse_test.inc"
 
 int main() {
   int fail;
 
-  iLB = -2; iUB = 3; jA0 = -3; jA1 = 0; jB0 = -6; jB1 = 0; kA0 = -2; kA1 = 0; kB0 = -4; kB1 = 0; iStep = -1; jStep = -1; kStep = -4;
+  iLB = -2;
+  iUB = 3;
+  jA0 = -3;
+  jA1 = 0;
+  jB0 = -6;
+  jB1 = 0;
+  kA0 = -2;
+  kA1 = 0;
+  kB0 = -4;
+  kB1 = 0;
+  iStep = -1;
+  jStep = -1;
+  kStep = -4;
   PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
          "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
          iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
@@ -50,6 +62,5 @@ int main() {
     }
   }
 
-return fail;
+  return fail;
 }
-
diff --git a/openmp/runtime/test/worksharing/for/collapse_many_int.c b/openmp/runtime/test/worksharing/for/collapse_many_int.c
index 6a126e1b49b31b..4455602df8a23b 100644
--- a/openmp/runtime/test/worksharing/for/collapse_many_int.c
+++ b/openmp/runtime/test/worksharing/for/collapse_many_int.c
@@ -13,41 +13,61 @@
       for (k = j * kA1 + kA0; k <= j * kB1 + kB0; k += kStep)
 #include "collapse_test.inc"
 
-int main()
-{
-    int fail = 0;
-
-    iLB = -2; iUB = 3; jA0 = -7; jA1 = -1; jB0 = 13; jB1 = 3; kA0 = -20; kA1 = -2; kB0 = 111; kB1 = -1; iStep = 5; jStep = 9; kStep = 10;
-    PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
-        iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
-    fail = fail || (test()==0);
-
-    if (!fail) {
-
-        // NOTE: if a loop on some level won't execute  for all iterations of an outer loop, it still should work. 
-        // Runtime doesn't require lower bounds to be <= upper bounds for all possible i, j, k.
-
-        iLB = -2; iUB = 3; jA0 = -7; jB0 = 5; kA0 = -13; kB0 = 37;
-
-        for (kA1 = -2; kA1 <= 2; ++kA1) {                                       // <=
-            for (kB1 = -2; kB1 <= 2; ++kB1) {
-                for (jA1 = -3; jA1 <= 3; ++jA1) {
-                    for (jB1 = -3; jB1 <= 3; ++jB1) {
-                        for (iStep = 1; iStep <= 3; ++iStep) {
-                            for (jStep = 2; jStep <= 6; jStep += 2) {
-                                for (kStep = 2; kStep <= 8; kStep += 3) {
-                                    PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
-                                         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
-                                    fail = fail || (test() == 0);
-                                }
-                            }
-                        }
-                    }
+int main() {
+  int fail = 0;
+
+  iLB = -2;
+  iUB = 3;
+  jA0 = -7;
+  jA1 = -1;
+  jB0 = 13;
+  jB1 = 3;
+  kA0 = -20;
+  kA1 = -2;
+  kB0 = 111;
+  kB1 = -1;
+  iStep = 5;
+  jStep = 9;
+  kStep = 10;
+  PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; jB1=%d; kA0=%d; "
+         "kA1=%d; kB0=%d; kB1=%d; iStep=%d; jStep=%d; kStep=%d;\n",
+         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1, iStep, jStep, kStep);
+  fail = fail || (test() == 0);
+
+  if (!fail) {
+
+    // NOTE: if a loop on some level won't execute  for all iterations of an
+    // outer loop, it still should work. Runtime doesn't require lower bounds to
+    // be <= upper bounds for all possible i, j, k.
+
+    iLB = -2;
+    iUB = 3;
+    jA0 = -7;
+    jB0 = 5;
+    kA0 = -13;
+    kB0 = 37;
+
+    for (kA1 = -2; kA1 <= 2; ++kA1) { // <=
+      for (kB1 = -2; kB1 <= 2; ++kB1) {
+        for (jA1 = -3; jA1 <= 3; ++jA1) {
+          for (jB1 = -3; jB1 <= 3; ++jB1) {
+            for (iStep = 1; iStep <= 3; ++iStep) {
+              for (jStep = 2; jStep <= 6; jStep += 2) {
+                for (kStep = 2; kStep <= 8; kStep += 3) {
+                  PRINTF("\nTrying iLB=%d; iUB=%d; jA0=%d; jA1=%d; jB0=%d; "
+                         "jB1=%d; kA0=%d; kA1=%d; kB0=%d; kB1=%d; iStep=%d; "
+                         "jStep=%d; kStep=%d;\n",
+                         iLB, iUB, jA0, jA1, jB0, jB1, kA0, kA1, kB0, kB1,
+                         iStep, jStep, kStep);
+                  fail = fail || (test() == 0);
                 }
+              }
             }
+          }
         }
+      }
     }
+  }
 
-    return fail;
+  return fail;
 }
-
diff --git a/openmp/runtime/test/worksharing/for/collapse_one_int.c b/openmp/runtime/test/worksharing/for/collapse_one_int.c
index 122f9a5a87b160..437d4bff31eb37 100644
--- a/openmp/runtime/test/worksharing/for/collapse_one_int.c
+++ b/openmp/runtime/test/worksharing/for/collapse_one_int.c
@@ -13,13 +13,20 @@
 
 #include "collapse_test.inc"
 
-int main()
-{
-    int fail;
-    iLB = -2; iUB = 3; jA0 = -7; jB0 = 13; kA0 = -20; kB0 = 111; iStep = 5; jStep = 9; kStep = 10;
-    PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jB0=%d; kA0=%d; kB0=%d; iStep=%d; jStep=%d; kStep=%d;\n",
-        iLB, iUB, jA0, jB0, kA0, kB0, iStep, jStep, kStep);
-    fail = (test() == 0);
-    return fail;
+int main() {
+  int fail;
+  iLB = -2;
+  iUB = 3;
+  jA0 = -7;
+  jB0 = 13;
+  kA0 = -20;
+  kB0 = 111;
+  iStep = 5;
+  jStep = 9;
+  kStep = 10;
+  PRINTF("\nOne off iLB=%d; iUB=%d; jA0=%d; jB0=%d; kA0=%d; kB0=%d; iStep=%d; "
+         "jStep=%d; kStep=%d;\n",
+         iLB, iUB, jA0, jB0, kA0, kB0, iStep, jStep, kStep);
+  fail = (test() == 0);
+  return fail;
 }
-



More information about the Openmp-commits mailing list