[Openmp-commits] [openmp] fix endianness dependent definitions in kmp.h for MSVC (PR #84530)
Vadim Paretsky via Openmp-commits
openmp-commits at lists.llvm.org
Fri Mar 8 11:03:17 PST 2024
https://github.com/vadikp-intel updated https://github.com/llvm/llvm-project/pull/84530
>From 75d7c576bf4e5a0f13a4fdb0fda036e3955bed89 Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Fri, 8 Mar 2024 10:17:46 -0800
Subject: [PATCH 1/3] fix endianess dependent definitions for MSVC in kmp.h
---
openmp/runtime/src/kmp.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 121e7e959129ea..59a90ea702f688 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2507,7 +2507,7 @@ typedef struct kmp_depend_info {
union {
kmp_uint8 flag; // flag as an unsigned char
struct { // flag as a set of 8 bits
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Same fields as in the #else branch, but in reverse order */
unsigned all : 1;
unsigned unused : 3;
@@ -2672,7 +2672,7 @@ typedef struct kmp_task_stack {
#endif // BUILD_TIED_TASK_STACK
typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
/* Same fields as in the #else branch, but in reverse order */
#if OMPX_TASKGRAPH
unsigned reserved31 : 6;
>From 570edd0dcb4d76e1714612328fdf14ca5c57d52f Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Fri, 8 Mar 2024 10:34:46 -0800
Subject: [PATCH 2/3] additional instance
---
openmp/runtime/src/kmp_lock.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index e2a0cda01a9718..16eef38ac4fe7b 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -120,7 +120,8 @@ extern void __kmp_validate_locks(void);
struct kmp_base_tas_lock {
// KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) && \
+ __LP64__
// Flip the ordering of the high and low 32-bit member to be consistent
// with the memory layout of the address in 64-bit big-endian.
kmp_int32 depth_locked; // depth locked, for nested locks only
>From 919bc9588f817401d23b2934d385e519ec44b4da Mon Sep 17 00:00:00 2001
From: Vadim Paretsky <b-vadipa at microsoft.com>
Date: Fri, 8 Mar 2024 11:03:04 -0800
Subject: [PATCH 3/3] clang format
---
openmp/runtime/src/kmp.h | 13 +-
openmp/runtime/src/kmp_lock.h | 7 +-
.../for/omp_for_schedule_static_collapse.c | 203 ++++++++++++++++++
3 files changed, 214 insertions(+), 9 deletions(-)
create mode 100644 openmp/runtime/test/worksharing/for/omp_for_schedule_static_collapse.c
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 59a90ea702f688..b5c0307b49e192 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -20,7 +20,7 @@
/* This fix replaces gettimeofday with clock_gettime for better scalability on
the Altix. Requires user code to be linked with -lrt. */
-//#define FIX_SGI_CLOCK
+// #define FIX_SGI_CLOCK
/* Defines for OpenMP 3.0 tasking and auto scheduling */
@@ -463,8 +463,9 @@ enum sched_type : kmp_int32 {
#define SCHEDULE_WITHOUT_MODIFIERS(s) \
(enum sched_type)( \
(s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
-#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
-#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
+#define SCHEDULE_HAS_MONOTONIC(s) (((s) & kmp_sch_modifier_monotonic) != 0)
+#define SCHEDULE_HAS_NONMONOTONIC(s) \
+ (((s) & kmp_sch_modifier_nonmonotonic) != 0)
#define SCHEDULE_HAS_NO_MODIFIERS(s) \
(((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
#define SCHEDULE_GET_MODIFIERS(s) \
@@ -2729,7 +2730,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
unsigned freed : 1; /* 1==freed, 0==allocated */
unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
#if OMPX_TASKGRAPH
- unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay purposes */
+ unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay
+ purposes */
unsigned reserved31 : 6; /* reserved for library use */
#else
unsigned reserved31 : 7; /* reserved for library use */
@@ -3881,7 +3883,8 @@ extern void __kmp_check_stack_overlap(kmp_info_t *thr);
extern void __kmp_expand_host_name(char *buffer, size_t size);
extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM))
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || \
+ (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM))
extern void
__kmp_initialize_system_tick(void); /* Initialize timer tick value */
#endif
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 16eef38ac4fe7b..cd8927095e7048 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -120,7 +120,7 @@ extern void __kmp_validate_locks(void);
struct kmp_base_tas_lock {
// KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
-#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) && \
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) && \
__LP64__
// Flip the ordering of the high and low 32-bit member to be consistent
// with the memory layout of the address in 64-bit big-endian.
@@ -661,9 +661,8 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
KMP_INIT_BACKOFF(time); \
do { \
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
- } while ( \
- lck->tas.lk.poll != 0 || \
- !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
+ } while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \
+ &lck->tas.lk.poll, 0, gtid + 1)); \
} \
KMP_FSYNC_ACQUIRED(lck); \
} else { \
diff --git a/openmp/runtime/test/worksharing/for/omp_for_schedule_static_collapse.c b/openmp/runtime/test/worksharing/for/omp_for_schedule_static_collapse.c
new file mode 100644
index 00000000000000..637eb6a3e573b9
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/omp_for_schedule_static_collapse.c
@@ -0,0 +1,203 @@
+// RUN: %libomp-compile-and-run
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "omp_testsuite.h"
+
+#ifdef _MSC_VER
+#define EFFICIENCY_CHECK
+#endif
+
+int* Alloc(unsigned size) {
+ int* p = malloc(size * sizeof(int));
+ memset(p, 0, size * sizeof(int));
+ return p;
+}
+
+#define I(i,j) (i * (upper_bound + outer_eq) + j)
+char *loop_type[] = {"< ,<", "< ,<=", "<=,< ", "<=,<="};
+
+int test_omp_for_schedule_static_collapse_lower_triangle(unsigned num_threads, unsigned upper_bound, int outer_eq, int inner_eq) {
+
+ unsigned outer_iterations = upper_bound + outer_eq;
+ unsigned first_inner_element = inner_eq;
+ unsigned last_inner_element = first_inner_element + (outer_iterations - 1);
+ unsigned total_iterations =
+ (first_inner_element + last_inner_element) * outer_iterations / 2;
+ int i, j;
+
+ /* To ensure Correctness, only valid iterations are executed and are executed
+ only once. Stores the number of times an iteration is executed. */
+ int *execution_count = Alloc(outer_iterations * outer_iterations);
+ /* Stores the number of iterations executed by each thread. */
+ int* iterations_per_thread = Alloc(num_threads);
+
+ char *loop_type[] = {"< ,<", "< ,<=", "<=,< ", "<=,<="};
+
+ omp_set_num_threads(num_threads);
+
+#ifdef VERBOSE
+ fprintf(stderr, "INFO: Using %6d threads for %6d outer iterations (%6d chunks) loop type lower triangle: %s - ", num_threads, upper_bound, total_iterations, loop_type[outer_eq*2 + inner_eq]);
+#endif
+
+#pragma omp parallel shared(iterations_per_thread, execution_count)
+ { /* begin of parallel */
+ /* Lower triangular execution_count matrix */
+ #pragma omp for schedule (static) collapse(2)
+ for(i = 0; i < upper_bound + outer_eq; i++) {
+ for (j = 0; j < i + inner_eq; j++) {
+ iterations_per_thread[omp_get_thread_num()]++;
+ execution_count[I(i, j)]++;
+ }
+ }/* end of for*/
+ }/* end of parallel */
+
+ /* check the execution_count array */
+ for (i = 0; i < upper_bound + outer_eq; i++) {
+ for (j = 0; j < i + inner_eq; j++) {
+ /* iteration with j<=i are valid, but should be executed only once */
+ if (execution_count[I(i, j)] != 1) {
+#ifdef VERBOSE
+ fprintf(
+ stderr,
+ "ERROR: valid iteration [%i,%i]:%i not executed exactly once.\n", i,
+ j, execution_count[I(i, j)]);
+#endif
+ return 0;
+ }
+ }
+ for (j = i + inner_eq; j < upper_bound + outer_eq; j++) {
+ /* iteration with j>=i are invalid should not have executed */
+ if (execution_count[I(i, j)] > 0) {
+#ifdef VERBOSE
+ fprintf(stderr, "ERROR: invalid iteration [%i,%i]:%i executed.\n", i,
+ j, execution_count[I(i, j)]);
+#endif
+ return 0;
+ }
+ }
+ }
+#ifdef EFFICIENCY_CHECK
+ /* Ensure the number of iterations executed by each thread is within bounds */
+ for(i = 0;i < num_threads; i++) {
+ if (iterations_per_thread[i] < total_iterations / num_threads ||
+ iterations_per_thread[i] > total_iterations / num_threads + 1) {
+#ifdef VERBOSE
+ fprintf(stderr, "ERROR: Inefficient Collapse thread:%i [%i,%i]:%i\n", i,
+ total_iterations / num_threads,
+ total_iterations / num_threads + 1, iterations_per_thread[i]);
+#endif
+ return 0;
+ }
+ }
+#endif
+#ifdef VERBOSE
+ fprintf(stderr, "PASSED\r");
+#endif
+
+ free(execution_count);
+ free(iterations_per_thread);
+ return 1;
+}
+
+int test_omp_for_schedule_static_collapse_upper_triangle(unsigned num_threads, unsigned upper_bound) {
+
+ int outer_eq = 0;
+ int inner_eq = 0;
+ unsigned outer_iterations = upper_bound + outer_eq;
+ unsigned last_inner_element = inner_eq;
+ unsigned first_inner_element = last_inner_element + outer_iterations + 1;
+ unsigned total_iterations =
+ (first_inner_element + last_inner_element) * outer_iterations / 2;
+ int i, j;
+
+ /* To ensure Correctness, only valid iterations are executed and are executed
+ only once. Stores the number of times an iteration is executed. */
+ int *execution_count = Alloc(outer_iterations * outer_iterations);
+ /* Stores the number of iterations executed by each thread. */
+ int* iterations_per_thread = Alloc(num_threads);
+
+ omp_set_num_threads(num_threads);
+#ifdef VERBOSE
+ fprintf(stderr, "INFO: Using %6d threads for %6d outer iterations (%6d chunks) loop type upper triangle: %s - ", num_threads, upper_bound, total_iterations, loop_type[outer_eq*2 + inner_eq]);
+#endif
+
+#pragma omp parallel shared(iterations_per_thread, execution_count)
+ { /* begin of parallel */
+ /* Lower triangular execution_count matrix */
+ #pragma omp for schedule (static) collapse(2)
+ for(i = 0; i < upper_bound + outer_eq; i++) {
+ for (j = i; j < upper_bound + inner_eq; j++) {
+ iterations_per_thread[omp_get_thread_num()]++;
+ execution_count[I(i, j)]++;
+ }
+ }/* end of for*/
+ }/* end of parallel */
+
+ /* check the execution_count array */
+ for (i = 0; i < upper_bound + outer_eq; i++) {
+ for (j = i; j < upper_bound + inner_eq; j++) {
+ /* iteration with j>=i are valid, but should be executed only once */
+ if (execution_count[I(i, j)] != 1) {
+#ifdef VERBOSE
+ fprintf(
+ stderr,
+ "ERROR: valid iteration [%i,%i]:%i not executed exactly once.\n", i,
+ j, execution_count[I(i, j)]);
+#endif
+ return 0;
+ }
+ }
+ for (j = 0; j < i; j++) {
+ /* iteration with j<i are invalid should not have executed */
+ if (execution_count[I(i, j)] > 0) {
+#ifdef VERBOSE
+ fprintf(stderr, "ERROR: invalid iteration [%i,%i]:%i executed.\n", i,
+ j, execution_count[I(i, j)]);
+#endif
+ return 0;
+ }
+ }
+ }
+#ifdef EFFICIENCY_CHECK
+ /* Ensure the number of iterations executed by each thread is within bounds */
+ for(i = 0;i < num_threads; i++) {
+ if (iterations_per_thread[i] < total_iterations / num_threads ||
+ iterations_per_thread[i] > total_iterations / num_threads + 1) {
+#ifdef VERBOSE
+ fprintf(stderr, "ERROR: Inefficient Collapse thread:%i [%i,%i]:%i\n", i,
+ total_iterations / num_threads,
+ total_iterations / num_threads + 1, iterations_per_thread[i]);
+#endif
+ return 0;
+ }
+ }
+#endif
+#ifdef VERBOSE
+ fprintf(stderr, "PASSED\r");
+#endif
+
+ free(execution_count);
+ free(iterations_per_thread);
+ return 1;
+}
+
+
+int main(int narg, char* argv[]) {
+ unsigned min_threads = 0;
+ unsigned max_threads = 64;
+ unsigned min_iter = 0;
+ unsigned max_iter = 64;
+ int i, j, outer_eq, inner_eq;
+
+ for(i = min_threads; i <= max_threads; i++)
+ for (j = min_iter; j <= max_iter; j++) {
+ for (outer_eq = 0;outer_eq <= 1; outer_eq++)
+ for (inner_eq = 0; inner_eq <= 1; inner_eq++)
+ if (!test_omp_for_schedule_static_collapse_lower_triangle(i, j, outer_eq, inner_eq))
+ return 1;
+ if (!test_omp_for_schedule_static_collapse_upper_triangle(i, j))
+ return 1;
+ }
+ return 0;
+}
More information about the Openmp-commits
mailing list