[Openmp-commits] [openmp] 18b6724 - [OpenMP][VE] Support OpenMP runtime on VE
Kazushi Marukawa via Openmp-commits
openmp-commits at lists.llvm.org
Sat Sep 9 16:30:00 PDT 2023
Author: Kazushi (Jam) Marukawa
Date: 2023-09-10T08:29:53+09:00
New Revision: 18b67243550a4f2ef12cc2d93a12f5bfecb50752
URL: https://github.com/llvm/llvm-project/commit/18b67243550a4f2ef12cc2d93a12f5bfecb50752
DIFF: https://github.com/llvm/llvm-project/commit/18b67243550a4f2ef12cc2d93a12f5bfecb50752.diff
LOG: [OpenMP][VE] Support OpenMP runtime on VE
Support OpenMP runtime library on VE. This patch makes OpenMP compilable
for VE architecture. Almost all tests run correctly on VE.
Reviewed By: tianshilei1992
Differential Revision: https://reviews.llvm.org/D159401
Added:
Modified:
openmp/runtime/CMakeLists.txt
openmp/runtime/cmake/LibompGetArchitecture.cmake
openmp/runtime/cmake/LibompUtils.cmake
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_affinity.h
openmp/runtime/src/kmp_os.h
openmp/runtime/src/kmp_platform.h
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
openmp/runtime/src/z_Linux_asm.S
openmp/runtime/src/z_Linux_util.cpp
openmp/runtime/test/ompt/callback.h
Removed:
################################################################################
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 2b7a3eb5bfce932..4441c4babdc07c0 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD})
# If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
libomp_get_architecture(LIBOMP_DETECTED_ARCH)
set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
- "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).")
+ "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/ve).")
# Should assertions be enabled? They are on by default.
set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
"enable assertions?")
@@ -63,6 +63,8 @@ else() # Part of LLVM build
set(LIBOMP_ARCH riscv64)
elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64")
set(LIBOMP_ARCH loongarch64)
+ elseif(LIBOMP_NATIVE_ARCH MATCHES "ve")
+ set(LIBOMP_ARCH ve)
else()
# last ditch effort
libomp_get_architecture(LIBOMP_ARCH)
@@ -83,7 +85,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
endif()
endif()
-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64)
+libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 ve)
set(LIBOMP_LIB_TYPE normal CACHE STRING
"Performance,Profiling,Stubs library (normal/profile/stubs)")
@@ -162,6 +164,7 @@ set(MIPS64 FALSE)
set(MIPS FALSE)
set(RISCV64 FALSE)
set(LOONGARCH64 FALSE)
+set(VE FALSE)
if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture
set(IA32 TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture
@@ -188,6 +191,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture
set(RISCV64 TRUE)
elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture
set(LOONGARCH64 TRUE)
+elseif("${LIBOMP_ARCH}" STREQUAL "ve") # VE architecture
+ set(VE TRUE)
endif()
# Set some flags based on build_type
diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake
index c338493bad538ec..98bfce9ae990a7b 100644
--- a/openmp/runtime/cmake/LibompGetArchitecture.cmake
+++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake
@@ -49,6 +49,8 @@ function(libomp_get_architecture return_arch)
#error ARCHITECTURE=riscv64
#elif defined(__loongarch__) && __loongarch_grlen == 64
#error ARCHITECTURE=loongarch64
+ #elif defined(__ve__)
+ #error ARCHITECTURE=ve
#else
#error ARCHITECTURE=UnknownArchitecture
#endif
diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake
index b5ffc97fca3d217..0151ca0ea826bd7 100644
--- a/openmp/runtime/cmake/LibompUtils.cmake
+++ b/openmp/runtime/cmake/LibompUtils.cmake
@@ -111,6 +111,8 @@ function(libomp_get_legal_arch return_arch_string)
set(${return_arch_string} "RISCV64" PARENT_SCOPE)
elseif(${LOONGARCH64})
set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE)
+ elseif(${VE})
+ set(${return_arch_string} "VE" PARENT_SCOPE)
else()
set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE)
libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}")
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 33895f8fbb1e36f..f6bfa242aaa55f3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1170,6 +1170,10 @@ extern void __kmp_init_target_task();
#elif KMP_ARCH_X86_64
#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
+#elif KMP_ARCH_VE
+// Minimum stack size for pthread for VE is 4MB.
+// https://www.hpc.nec/documents/veos/en/glibc/Difference_Points_glibc.htm
+#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
#else
#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
#endif
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index fbc0d400de772e0..97808b528538097 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -286,6 +286,17 @@ class KMPHwlocAffinity : public KMPAffinity {
#elif __NR_sched_getaffinity != 123
#error Wrong code for getaffinity system call.
#endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_VE
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 203
+#elif __NR_sched_setaffinity != 203
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 204
+#elif __NR_sched_getaffinity != 204
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
#else
#error Unknown or unsupported architecture
#endif /* KMP_ARCH_* */
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index fec589ab6018a9c..2c632112a8d8e35 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -178,7 +178,7 @@ typedef unsigned long long kmp_uint64;
#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
#else
#error "Can't determine size_t printf format specifier."
@@ -1043,7 +1043,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#endif /* KMP_OS_WINDOWS */
#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
#if KMP_OS_WINDOWS
#undef KMP_MB
#define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)
diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h
index fcfd8bc5d8d9ae3..1a2197d338342ac 100644
--- a/openmp/runtime/src/kmp_platform.h
+++ b/openmp/runtime/src/kmp_platform.h
@@ -93,6 +93,7 @@
#define KMP_ARCH_MIPS64 0
#define KMP_ARCH_RISCV64 0
#define KMP_ARCH_LOONGARCH64 0
+#define KMP_ARCH_VE 0
#if KMP_OS_WINDOWS
#if defined(_M_AMD64) || defined(__x86_64)
@@ -142,6 +143,9 @@
#elif defined __loongarch__ && __loongarch_grlen == 64
#undef KMP_ARCH_LOONGARCH64
#define KMP_ARCH_LOONGARCH64 1
+#elif defined __ve__
+#undef KMP_ARCH_VE
+#define KMP_ARCH_VE 1
#endif
#endif
@@ -206,7 +210,7 @@
// TODO: Fixme - This is clever, but really fugly
#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \
KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \
- KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64)
+ KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE)
#error Unknown or unsupported architecture
#endif
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 6a1ba97311dcc68..385fb6bc49cc5c7 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -8830,7 +8830,7 @@ __kmp_determine_reduction_method(
int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
- KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index a452b7643bdb436..ff37eb4ed175e67 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -162,6 +162,10 @@
#define ITT_ARCH_ARM64 6
#endif /* ITT_ARCH_ARM64 */
+#ifndef ITT_ARCH_VE
+#define ITT_ARCH_VE 8
+#endif /* ITT_ARCH_VE */
+
#ifndef ITT_ARCH
#if defined _M_IX86 || defined __i386__
#define ITT_ARCH ITT_ARCH_IA32
@@ -175,6 +179,8 @@
#define ITT_ARCH ITT_ARCH_ARM64
#elif defined __powerpc64__
#define ITT_ARCH ITT_ARCH_PPC64
+#elif defined __ve__
+#define ITT_ARCH ITT_ARCH_VE
#endif
#endif
diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index 27b063f09e7a16d..2c0df6e3b08505a 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -2060,6 +2060,198 @@ __kmp_invoke_microtask:
#endif /* KMP_ARCH_LOONGARCH64 */
+#if KMP_ARCH_VE
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)(int *gtid, int *tid, ...);
+//
+// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
+// void *p_argv[]
+// #if OMPT_SUPPORT
+// ,
+// void **exit_frame_ptr
+// #endif
+// ) {
+// #if OMPT_SUPPORT
+// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+// (*pkfn)(>id, &tid, argv[0], ...);
+//
+// return 1;
+// }
+//
+// Parameters:
+// s0: pkfn
+// s1: gtid
+// s2: tid
+// s3: argc
+// s4: p_argv
+// s5: exit_frame_ptr
+//
+// Locals:
+// __gtid: gtid param pushed on stack so can pass >id to pkfn
+// __tid: tid param pushed on stack so can pass &tid to pkfn
+//
+// Temp. registers:
+//
+// s34: used to calculate the dynamic stack size
+// s35: used as temporary for stack placement calculation
+// s36: used as temporary for stack arguments
+// s37: used as temporary for number of remaining pkfn parms
+// s38: used to traverse p_argv array
+//
+// return: s0 (always 1/TRUE)
+//
+
+__gtid = -4
+__tid = -8
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+ .text
+ .globl __kmp_invoke_microtask
+ // A function requires 8 bytes align.
+ .p2align 3
+ .type __kmp_invoke_microtask, at function
+__kmp_invoke_microtask:
+ .cfi_startproc
+
+ // First, save fp and lr. VE stores them at caller stack frame.
+ st %fp, 0(, %sp)
+ st %lr, 8(, %sp)
+ or %fp, 0, %sp
+ .cfi_def_cfa %fp, 0
+ .cfi_offset %lr, 8
+ .cfi_offset %fp, 0
+
+ // Compute the dynamic stack size:
+ //
+ // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
+ // by reference
+ // - We need 8 bytes for whole arguments. We have two + 'argc'
+ // arguments (condider >id and &tid). We need to reserve
+ // (argc + 2) * 8 bytes.
+ // - We need 176 bytes for RSA and others
+ //
+ // The total number of bytes is then (argc + 2) * 8 + 8 + 176.
+ //
+ // |------------------------------|
+ // | return address of callee | 8(%fp)
+ // |------------------------------|
+ // | frame pointer of callee | 0(%fp)
+ // |------------------------------| <------------------ %fp
+ // | __tid / __gtid | -8(%fp) / -4(%fp)
+ // |------------------------------|
+ // | argc+2 for arguments | 176(%sp)
+ // |------------------------------|
+ // | RSA |
+ // |------------------------------|
+ // | return address |
+ // |------------------------------|
+ // | frame pointer |
+ // |------------------------------| <------------------ %sp
+
+ adds.w.sx %s34, 2, %s3
+ sll %s34, %s34, 3
+ lea %s34, 184(, %s34)
+ subs.l %sp, %sp, %s34
+
+ // Align the stack to 16 bytes.
+ and %sp, -16, %sp
+
+ // Save pkfn.
+ or %s12, 0, %s0
+
+ // Call host to allocate stack if it is necessary.
+ brge.l %sp, %sl, .L_kmp_pass
+ ld %s61, 24(, %tp)
+ lea %s63, 0x13b
+ shm.l %s63, 0(%s61)
+ shm.l %sl, 8(%s61)
+ shm.l %sp, 16(%s61)
+ monc
+
+.L_kmp_pass:
+ lea %s35, 176(, %sp)
+ adds.w.sx %s37, 0, %s3
+ or %s38, 0, %s4
+
+#if OMPT_SUPPORT
+ // Save frame pointer into exit_frame.
+ st %fp, 0(%s5)
+#endif
+
+ // Prepare arguments for the pkfn function (first 8 using s0-s7
+ // registers, but need to store stack also because of varargs).
+
+ stl %s1, __gtid(%fp)
+ stl %s2, __tid(%fp)
+
+ adds.l %s0, __gtid, %fp
+ st %s0, 0(, %s35)
+ adds.l %s1, __tid, %fp
+ st %s1, 8(, %s35)
+
+ breq.l 0, %s37, .L_kmp_call
+ ld %s2, 0(, %s38)
+ st %s2, 16(, %s35)
+
+ breq.l 1, %s37, .L_kmp_call
+ ld %s3, 8(, %s38)
+ st %s3, 24(, %s35)
+
+ breq.l 2, %s37, .L_kmp_call
+ ld %s4, 16(, %s38)
+ st %s4, 32(, %s35)
+
+ breq.l 3, %s37, .L_kmp_call
+ ld %s5, 24(, %s38)
+ st %s5, 40(, %s35)
+
+ breq.l 4, %s37, .L_kmp_call
+ ld %s6, 32(, %s38)
+ st %s6, 48(, %s35)
+
+ breq.l 5, %s37, .L_kmp_call
+ ld %s7, 40(, %s38)
+ st %s7, 56(, %s35)
+
+ breq.l 6, %s37, .L_kmp_call
+
+ // Prepare any additional argument passed through the stack.
+ adds.l %s37, -6, %s37
+ lea %s38, 48(, %s38)
+ lea %s35, 64(, %s35)
+.L_kmp_loop:
+ ld %s36, 0(, %s38)
+ st %s36, 0(, %s35)
+ adds.l %s37, -1, %s37
+ adds.l %s38, 8, %s38
+ adds.l %s35, 8, %s35
+ brne.l 0, %s37, .L_kmp_loop
+
+.L_kmp_call:
+ // Call pkfn function.
+ bsic %lr, (, %s12)
+
+ // Return value.
+ lea %s0, 1
+
+ // Restore stack and return.
+ or %sp, 0, %fp
+ ld %lr, 8(, %sp)
+ ld %fp, 0(, %sp)
+ b.l.t (, %lr)
+.Lfunc_end0:
+ .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
+ .cfi_endproc
+
+// -- End __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_VE */
+
#if KMP_ARCH_ARM || KMP_ARCH_MIPS
.data
COMMON .gomp_critical_user_, 32, 3
@@ -2073,7 +2265,8 @@ __kmp_unnamed_critical_addr:
#endif
#endif /* KMP_ARCH_ARM */
-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
+ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
#ifndef KMP_PREFIX_UNDERSCORE
# define KMP_PREFIX_UNDERSCORE(x) x
#endif
@@ -2088,7 +2281,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
.size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
#endif
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
- KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */
+ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE */
#if KMP_OS_LINUX
# if KMP_ARCH_ARM || KMP_ARCH_AARCH64
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index ad3cb05fc37acdb..11d9ac8dc44792f 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -2456,7 +2456,7 @@ int __kmp_get_load_balance(int max) {
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
- KMP_ARCH_ARM)
+ KMP_ARCH_ARM || KMP_ARCH_VE)
// we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function
diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index 8180b3d2663f769..c5266e230c26f77 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -221,6 +221,13 @@ ompt_label_##id:
printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
ompt_get_thread_data()->value, ((char *)addr) - 4, \
((char *)addr) - 8, ((char *)addr) - 12)
+#elif KMP_ARCH_VE
+// On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
+// a ??? instruction for non-void runtime functions which is ? bytes long.
+#define print_possible_return_addresses(addr) \
+ printf("%" PRIu64 ": current_address=%p or %p\n", \
+ ompt_get_thread_data()->value, ((char *)addr) - 8, \
+ ((char *)addr) - 8)
#else
#error Unsupported target architecture, cannot determine address offset!
#endif
More information about the Openmp-commits
mailing list