[Openmp-commits] [openmp] r307680 - Rename z_Linux_asm.s to z_Linux_asm.S
Dimitry Andric via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jul 11 11:04:56 PDT 2017
Author: dim
Date: Tue Jul 11 11:04:56 2017
New Revision: 307680
URL: http://llvm.org/viewvc/llvm-project?rev=307680&view=rev
Log:
Rename z_Linux_asm.s to z_Linux_asm.S
Summary:
On Unix, a .S file is normally an assembly source which must be
preprocessed with a C preprocessor, while a .s file is "plain" assembly.
The former is handled by the compiler driver (cc), the latter is
directly passed to the assembler binary (as).
Because z_Linux_asm.s is supposed to be preprocessed, rename it to .S,
so it can be automatically picked up correctly by build systems.
Reviewers: AndreyChurbanov, emaste, jlpeyton
Reviewed By: AndreyChurbanov
Subscribers: mgorny, openmp-commits
Differential Revision: https://reviews.llvm.org/D35171
Added:
openmp/trunk/runtime/src/z_Linux_asm.S (contents, props changed)
- copied, changed from r307679, openmp/trunk/runtime/src/z_Linux_asm.s
Removed:
openmp/trunk/runtime/src/z_Linux_asm.s
Modified:
openmp/trunk/runtime/src/CMakeLists.txt
Modified: openmp/trunk/runtime/src/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/CMakeLists.txt?rev=307680&r1=307679&r2=307680&view=diff
==============================================================================
--- openmp/trunk/runtime/src/CMakeLists.txt (original)
+++ openmp/trunk/runtime/src/CMakeLists.txt Tue Jul 11 11:04:56 2017
@@ -93,7 +93,7 @@ else()
# Unix specific files
libomp_append(LIBOMP_CXXFILES z_Linux_util.cpp)
libomp_append(LIBOMP_CXXFILES kmp_gsupport.cpp)
- libomp_append(LIBOMP_ASMFILES z_Linux_asm.s) # Unix assembly file
+ libomp_append(LIBOMP_ASMFILES z_Linux_asm.S) # Unix assembly file
endif()
libomp_append(LIBOMP_CFILES thirdparty/ittnotify/ittnotify_static.c LIBOMP_USE_ITT_NOTIFY)
libomp_append(LIBOMP_CXXFILES kmp_debugger.cpp LIBOMP_USE_DEBUGGER)
Copied: openmp/trunk/runtime/src/z_Linux_asm.S (from r307679, openmp/trunk/runtime/src/z_Linux_asm.s)
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.S?p2=openmp/trunk/runtime/src/z_Linux_asm.S&p1=openmp/trunk/runtime/src/z_Linux_asm.s&r1=307679&r2=307680&rev=307680&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.S Tue Jul 11 11:04:56 2017
@@ -1,4 +1,4 @@
-// z_Linux_asm.s: - microtasking routines specifically
+// z_Linux_asm.S: - microtasking routines specifically
// written for Intel platforms running Linux* OS
//
@@ -681,7 +681,7 @@ KMP_LABEL(invoke_3):
// -- Machine type P
// mark_description "Intel Corporation";
.ident "Intel Corporation"
-// -- .file "z_Linux_asm.s"
+// -- .file "z_Linux_asm.S"
.data
ALIGN 4
Propchange: openmp/trunk/runtime/src/z_Linux_asm.S
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: openmp/trunk/runtime/src/z_Linux_asm.S
------------------------------------------------------------------------------
svn:keywords = Author Date Id Rev URL
Propchange: openmp/trunk/runtime/src/z_Linux_asm.S
------------------------------------------------------------------------------
svn:mime-type = text/plain
Removed: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=307679&view=auto
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s (removed)
@@ -1,1730 +0,0 @@
-// z_Linux_asm.s: - microtasking routines specifically
-// written for Intel platforms running Linux* OS
-
-//
-////===----------------------------------------------------------------------===//
-////
-//// The LLVM Compiler Infrastructure
-////
-//// This file is dual licensed under the MIT and the University of Illinois Open
-//// Source Licenses. See LICENSE.txt for details.
-////
-////===----------------------------------------------------------------------===//
-//
-
-// -----------------------------------------------------------------------
-// macros
-// -----------------------------------------------------------------------
-
-#include "kmp_config.h"
-
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-
-# if KMP_MIC
-// the 'delay r16/r32/r64' should be used instead of the 'pause'.
-// The delay operation has the effect of removing the current thread from
-// the round-robin HT mechanism, and therefore speeds up the issue rate of
-// the other threads on the same core.
-//
-// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
-// barrier time to increase greatly for 3 or more threads per core.
-//
-// A value of 100 works pretty well for up to 4 threads per core, but isn't
-// quite as fast as 0 for 2 threads per core.
-//
-// We need to check what happens for oversubscription / > 4 threads per core.
-// It is possible that we need to pass the delay value in as a parameter
-// that the caller determines based on the total # threads / # cores.
-//
-//.macro pause_op
-// mov $100, %rax
-// delay %rax
-//.endm
-# else
-# define pause_op .byte 0xf3,0x90
-# endif // KMP_MIC
-
-# if KMP_OS_DARWIN
-# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
-# define KMP_LABEL(x) L_##x // form the name of label
-.macro KMP_CFI_DEF_OFFSET
-.endmacro
-.macro KMP_CFI_OFFSET
-.endmacro
-.macro KMP_CFI_REGISTER
-.endmacro
-.macro KMP_CFI_DEF
-.endmacro
-.macro ALIGN
- .align $0
-.endmacro
-.macro DEBUG_INFO
-/* Not sure what .size does in icc, not sure if we need to do something
- similar for OS X*.
-*/
-.endmacro
-.macro PROC
- ALIGN 4
- .globl KMP_PREFIX_UNDERSCORE($0)
-KMP_PREFIX_UNDERSCORE($0):
-.endmacro
-# else // KMP_OS_DARWIN
-# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
-// Format labels so that they don't override function names in gdb's backtraces
-// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
-// on OS X*)
-# if KMP_MIC
-# define KMP_LABEL(x) L_##x // local label
-# else
-# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
-# endif // KMP_MIC
-.macro ALIGN size
- .align 1<<(\size)
-.endm
-.macro DEBUG_INFO proc
- .cfi_endproc
-// Not sure why we need .type and .size for the functions
- .align 16
- .type \proc, at function
- .size \proc,.-\proc
-.endm
-.macro PROC proc
- ALIGN 4
- .globl KMP_PREFIX_UNDERSCORE(\proc)
-KMP_PREFIX_UNDERSCORE(\proc):
- .cfi_startproc
-.endm
-.macro KMP_CFI_DEF_OFFSET sz
- .cfi_def_cfa_offset \sz
-.endm
-.macro KMP_CFI_OFFSET reg, sz
- .cfi_offset \reg,\sz
-.endm
-.macro KMP_CFI_REGISTER reg
- .cfi_def_cfa_register \reg
-.endm
-.macro KMP_CFI_DEF reg, sz
- .cfi_def_cfa \reg,\sz
-.endm
-# endif // KMP_OS_DARWIN
-#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
-
-#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
-
-# if KMP_OS_DARWIN
-# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
-# define KMP_LABEL(x) L_##x // form the name of label
-
-.macro ALIGN
- .align $0
-.endmacro
-
-.macro DEBUG_INFO
-/* Not sure what .size does in icc, not sure if we need to do something
- similar for OS X*.
-*/
-.endmacro
-
-.macro PROC
- ALIGN 4
- .globl KMP_PREFIX_UNDERSCORE($0)
-KMP_PREFIX_UNDERSCORE($0):
-.endmacro
-# else // KMP_OS_DARWIN
-# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
-// Format labels so that they don't override function names in gdb's backtraces
-# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
-
-.macro ALIGN size
- .align 1<<(\size)
-.endm
-
-.macro DEBUG_INFO proc
- .cfi_endproc
-// Not sure why we need .type and .size for the functions
- ALIGN 2
- .type \proc, at function
- .size \proc,.-\proc
-.endm
-
-.macro PROC proc
- ALIGN 2
- .globl KMP_PREFIX_UNDERSCORE(\proc)
-KMP_PREFIX_UNDERSCORE(\proc):
- .cfi_startproc
-.endm
-# endif // KMP_OS_DARWIN
-
-#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
-
-// -----------------------------------------------------------------------
-// data
-// -----------------------------------------------------------------------
-
-#ifdef KMP_GOMP_COMPAT
-
-// Support for unnamed common blocks.
-//
-// Because the symbol ".gomp_critical_user_" contains a ".", we have to
-// put this stuff in assembly.
-
-# if KMP_ARCH_X86
-# if KMP_OS_DARWIN
- .data
- .comm .gomp_critical_user_,32
- .data
- .globl ___kmp_unnamed_critical_addr
-___kmp_unnamed_critical_addr:
- .long .gomp_critical_user_
-# else /* Linux* OS */
- .data
- .comm .gomp_critical_user_,32,8
- .data
- ALIGN 4
- .global __kmp_unnamed_critical_addr
-__kmp_unnamed_critical_addr:
- .4byte .gomp_critical_user_
- .type __kmp_unnamed_critical_addr, at object
- .size __kmp_unnamed_critical_addr,4
-# endif /* KMP_OS_DARWIN */
-# endif /* KMP_ARCH_X86 */
-
-# if KMP_ARCH_X86_64
-# if KMP_OS_DARWIN
- .data
- .comm .gomp_critical_user_,32
- .data
- .globl ___kmp_unnamed_critical_addr
-___kmp_unnamed_critical_addr:
- .quad .gomp_critical_user_
-# else /* Linux* OS */
- .data
- .comm .gomp_critical_user_,32,8
- .data
- ALIGN 8
- .global __kmp_unnamed_critical_addr
-__kmp_unnamed_critical_addr:
- .8byte .gomp_critical_user_
- .type __kmp_unnamed_critical_addr, at object
- .size __kmp_unnamed_critical_addr,8
-# endif /* KMP_OS_DARWIN */
-# endif /* KMP_ARCH_X86_64 */
-
-#endif /* KMP_GOMP_COMPAT */
-
-
-#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
-
-// -----------------------------------------------------------------------
-// microtasking routines specifically written for IA-32 architecture
-// running Linux* OS
-// -----------------------------------------------------------------------
-
- .ident "Intel Corporation"
- .data
- ALIGN 4
-// void
-// __kmp_x86_pause( void );
-
- .text
- PROC __kmp_x86_pause
-
- pause_op
- ret
-
- DEBUG_INFO __kmp_x86_pause
-
-// void
-// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
-
- PROC __kmp_x86_cpuid
-
- pushl %ebp
- movl %esp,%ebp
- pushl %edi
- pushl %ebx
- pushl %ecx
- pushl %edx
-
- movl 8(%ebp), %eax
- movl 12(%ebp), %ecx
- cpuid // Query the CPUID for the current processor
-
- movl 16(%ebp), %edi
- movl %eax, 0(%edi)
- movl %ebx, 4(%edi)
- movl %ecx, 8(%edi)
- movl %edx, 12(%edi)
-
- popl %edx
- popl %ecx
- popl %ebx
- popl %edi
- movl %ebp, %esp
- popl %ebp
- ret
-
- DEBUG_INFO __kmp_x86_cpuid
-
-
-# if !KMP_ASM_INTRINS
-
-//------------------------------------------------------------------------
-// kmp_int32
-// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
-
- PROC __kmp_test_then_add32
-
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- lock
- xaddl %eax,(%ecx)
- ret
-
- DEBUG_INFO __kmp_test_then_add32
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed8
-//
-// kmp_int32
-// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
-//
-// parameters:
-// p: 4(%esp)
-// d: 8(%esp)
-//
-// return: %al
- PROC __kmp_xchg_fixed8
-
- movl 4(%esp), %ecx // "p"
- movb 8(%esp), %al // "d"
-
- lock
- xchgb %al,(%ecx)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed8
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed16
-//
-// kmp_int16
-// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
-//
-// parameters:
-// p: 4(%esp)
-// d: 8(%esp)
-// return: %ax
- PROC __kmp_xchg_fixed16
-
- movl 4(%esp), %ecx // "p"
- movw 8(%esp), %ax // "d"
-
- lock
- xchgw %ax,(%ecx)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed16
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed32
-//
-// kmp_int32
-// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
-//
-// parameters:
-// p: 4(%esp)
-// d: 8(%esp)
-//
-// return: %eax
- PROC __kmp_xchg_fixed32
-
- movl 4(%esp), %ecx // "p"
- movl 8(%esp), %eax // "d"
-
- lock
- xchgl %eax,(%ecx)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed32
-
-
-// kmp_int8
-// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
- PROC __kmp_compare_and_store8
-
- movl 4(%esp), %ecx
- movb 8(%esp), %al
- movb 12(%esp), %dl
- lock
- cmpxchgb %dl,(%ecx)
- sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- ret
-
- DEBUG_INFO __kmp_compare_and_store8
-
-// kmp_int16
-// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
- PROC __kmp_compare_and_store16
-
- movl 4(%esp), %ecx
- movw 8(%esp), %ax
- movw 12(%esp), %dx
- lock
- cmpxchgw %dx,(%ecx)
- sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- ret
-
- DEBUG_INFO __kmp_compare_and_store16
-
-// kmp_int32
-// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
- PROC __kmp_compare_and_store32
-
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- lock
- cmpxchgl %edx,(%ecx)
- sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- ret
-
- DEBUG_INFO __kmp_compare_and_store32
-
-// kmp_int32
-// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
- PROC __kmp_compare_and_store64
-
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- pushl %edi
- movl 8(%ebp), %edi
- movl 12(%ebp), %eax // "cv" low order word
- movl 16(%ebp), %edx // "cv" high order word
- movl 20(%ebp), %ebx // "sv" low order word
- movl 24(%ebp), %ecx // "sv" high order word
- lock
- cmpxchg8b (%edi)
- sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- popl %edi
- popl %ebx
- movl %ebp, %esp
- popl %ebp
- ret
-
- DEBUG_INFO __kmp_compare_and_store64
-
-// kmp_int8
-// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
- PROC __kmp_compare_and_store_ret8
-
- movl 4(%esp), %ecx
- movb 8(%esp), %al
- movb 12(%esp), %dl
- lock
- cmpxchgb %dl,(%ecx)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret8
-
-// kmp_int16
-// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
-// kmp_int16 sv);
- PROC __kmp_compare_and_store_ret16
-
- movl 4(%esp), %ecx
- movw 8(%esp), %ax
- movw 12(%esp), %dx
- lock
- cmpxchgw %dx,(%ecx)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret16
-
-// kmp_int32
-// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
-// kmp_int32 sv);
- PROC __kmp_compare_and_store_ret32
-
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- lock
- cmpxchgl %edx,(%ecx)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret32
-
-// kmp_int64
-// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
-// kmp_int64 sv);
- PROC __kmp_compare_and_store_ret64
-
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- pushl %edi
- movl 8(%ebp), %edi
- movl 12(%ebp), %eax // "cv" low order word
- movl 16(%ebp), %edx // "cv" high order word
- movl 20(%ebp), %ebx // "sv" low order word
- movl 24(%ebp), %ecx // "sv" high order word
- lock
- cmpxchg8b (%edi)
- popl %edi
- popl %ebx
- movl %ebp, %esp
- popl %ebp
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret64
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_real32
-//
-// kmp_real32
-// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
-//
-// parameters:
-// addr: 4(%esp)
-// data: 8(%esp)
-//
-// return: %eax
- PROC __kmp_xchg_real32
-
- pushl %ebp
- movl %esp, %ebp
- subl $4, %esp
- pushl %esi
-
- movl 4(%ebp), %esi
- flds (%esi)
- // load <addr>
- fsts -4(%ebp)
- // store old value
-
- movl 8(%ebp), %eax
-
- lock
- xchgl %eax, (%esi)
-
- flds -4(%ebp)
- // return old value
-
- popl %esi
- movl %ebp, %esp
- popl %ebp
- ret
-
- DEBUG_INFO __kmp_xchg_real32
-
-# endif /* !KMP_ASM_INTRINS */
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_load_x87_fpu_control_word
-//
-// void
-// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
-//
-// parameters:
-// p: 4(%esp)
- PROC __kmp_load_x87_fpu_control_word
-
- movl 4(%esp), %eax
- fldcw (%eax)
- ret
-
- DEBUG_INFO __kmp_load_x87_fpu_control_word
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_store_x87_fpu_control_word
-//
-// void
-// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
-//
-// parameters:
-// p: 4(%esp)
- PROC __kmp_store_x87_fpu_control_word
-
- movl 4(%esp), %eax
- fstcw (%eax)
- ret
-
- DEBUG_INFO __kmp_store_x87_fpu_control_word
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_clear_x87_fpu_status_word
-//
-// void
-// __kmp_clear_x87_fpu_status_word();
- PROC __kmp_clear_x87_fpu_status_word
-
- fnclex
- ret
-
- DEBUG_INFO __kmp_clear_x87_fpu_status_word
-
-
-//------------------------------------------------------------------------
-// typedef void (*microtask_t)( int *gtid, int *tid, ... );
-//
-// int
-// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
-// int argc, void *p_argv[] ) {
-// (*pkfn)( & gtid, & gtid, argv[0], ... );
-// return 1;
-// }
-
-// -- Begin __kmp_invoke_microtask
-// mark_begin;
- PROC __kmp_invoke_microtask
-
- pushl %ebp
- KMP_CFI_DEF_OFFSET 8
- KMP_CFI_OFFSET ebp,-8
- movl %esp,%ebp // establish the base pointer for this routine.
- KMP_CFI_REGISTER ebp
- subl $8,%esp // allocate space for two local variables.
- // These varibales are:
- // argv: -4(%ebp)
- // temp: -8(%ebp)
- //
- pushl %ebx // save %ebx to use during this routine
- //
-#if OMPT_SUPPORT
- movl 28(%ebp),%ebx // get exit_frame address
- movl %ebp,(%ebx) // save exit_frame
-#endif
-
- movl 20(%ebp),%ebx // Stack alignment - # args
- addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
- shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
- movl %esp,%eax //
- subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
- movl %eax,%ebx // Save to %ebx
- andl $0xFFFFFF80,%eax // mask off 7 bits
- subl %eax,%ebx // Amount to subtract from %esp
- subl %ebx,%esp // Prepare the stack ptr --
- // now it will be aligned on 128-byte boundary at the call
-
- movl 24(%ebp),%eax // copy from p_argv[]
- movl %eax,-4(%ebp) // into the local variable *argv.
-
- movl 20(%ebp),%ebx // argc is 20(%ebp)
- shll $2,%ebx
-
-KMP_LABEL(invoke_2):
- cmpl $0,%ebx
- jg KMP_LABEL(invoke_4)
- jmp KMP_LABEL(invoke_3)
- ALIGN 2
-KMP_LABEL(invoke_4):
- movl -4(%ebp),%eax
- subl $4,%ebx // decrement argc.
- addl %ebx,%eax // index into argv.
- movl (%eax),%edx
- pushl %edx
-
- jmp KMP_LABEL(invoke_2)
- ALIGN 2
-KMP_LABEL(invoke_3):
- leal 16(%ebp),%eax // push & tid
- pushl %eax
-
- leal 12(%ebp),%eax // push & gtid
- pushl %eax
-
- movl 8(%ebp),%ebx
- call *%ebx // call (*pkfn)();
-
- movl $1,%eax // return 1;
-
- movl -12(%ebp),%ebx // restore %ebx
- leave
- KMP_CFI_DEF esp,4
- ret
-
- DEBUG_INFO __kmp_invoke_microtask
-// -- End __kmp_invoke_microtask
-
-
-// kmp_uint64
-// __kmp_hardware_timestamp(void)
- PROC __kmp_hardware_timestamp
- rdtsc
- ret
-
- DEBUG_INFO __kmp_hardware_timestamp
-// -- End __kmp_hardware_timestamp
-
-#endif /* KMP_ARCH_X86 */
-
-
-#if KMP_ARCH_X86_64
-
-// -----------------------------------------------------------------------
-// microtasking routines specifically written for IA-32 architecture and
-// Intel(R) 64 running Linux* OS
-// -----------------------------------------------------------------------
-
-// -- Machine type P
-// mark_description "Intel Corporation";
- .ident "Intel Corporation"
-// -- .file "z_Linux_asm.s"
- .data
- ALIGN 4
-
-// To prevent getting our code into .data section .text added to every routine
-// definition for x86_64.
-//------------------------------------------------------------------------
-// FUNCTION __kmp_x86_cpuid
-//
-// void
-// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
-//
-// parameters:
-// mode: %edi
-// mode2: %esi
-// cpuid_buffer: %rdx
- .text
- PROC __kmp_x86_cpuid
-
- pushq %rbp
- movq %rsp,%rbp
- pushq %rbx // callee-save register
-
- movl %esi, %ecx // "mode2"
- movl %edi, %eax // "mode"
- movq %rdx, %rsi // cpuid_buffer
- cpuid // Query the CPUID for the current processor
-
- movl %eax, 0(%rsi) // store results into buffer
- movl %ebx, 4(%rsi)
- movl %ecx, 8(%rsi)
- movl %edx, 12(%rsi)
-
- popq %rbx // callee-save register
- movq %rbp, %rsp
- popq %rbp
- ret
-
- DEBUG_INFO __kmp_x86_cpuid
-
-
-
-# if !KMP_ASM_INTRINS
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_test_then_add32
-//
-// kmp_int32
-// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
-//
-// parameters:
-// p: %rdi
-// d: %esi
-//
-// return: %eax
- .text
- PROC __kmp_test_then_add32
-
- movl %esi, %eax // "d"
- lock
- xaddl %eax,(%rdi)
- ret
-
- DEBUG_INFO __kmp_test_then_add32
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_test_then_add64
-//
-// kmp_int64
-// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
-//
-// parameters:
-// p: %rdi
-// d: %rsi
-// return: %rax
- .text
- PROC __kmp_test_then_add64
-
- movq %rsi, %rax // "d"
- lock
- xaddq %rax,(%rdi)
- ret
-
- DEBUG_INFO __kmp_test_then_add64
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed8
-//
-// kmp_int32
-// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
-//
-// parameters:
-// p: %rdi
-// d: %sil
-//
-// return: %al
- .text
- PROC __kmp_xchg_fixed8
-
- movb %sil, %al // "d"
-
- lock
- xchgb %al,(%rdi)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed8
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed16
-//
-// kmp_int16
-// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
-//
-// parameters:
-// p: %rdi
-// d: %si
-// return: %ax
- .text
- PROC __kmp_xchg_fixed16
-
- movw %si, %ax // "d"
-
- lock
- xchgw %ax,(%rdi)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed16
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed32
-//
-// kmp_int32
-// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
-//
-// parameters:
-// p: %rdi
-// d: %esi
-//
-// return: %eax
- .text
- PROC __kmp_xchg_fixed32
-
- movl %esi, %eax // "d"
-
- lock
- xchgl %eax,(%rdi)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed32
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_fixed64
-//
-// kmp_int64
-// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
-//
-// parameters:
-// p: %rdi
-// d: %rsi
-// return: %rax
- .text
- PROC __kmp_xchg_fixed64
-
- movq %rsi, %rax // "d"
-
- lock
- xchgq %rax,(%rdi)
- ret
-
- DEBUG_INFO __kmp_xchg_fixed64
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store8
-//
-// kmp_int8
-// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %esi
-// sv: %edx
-//
-// return: %eax
- .text
- PROC __kmp_compare_and_store8
-
- movb %sil, %al // "cv"
- lock
- cmpxchgb %dl,(%rdi)
- sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
-
- DEBUG_INFO __kmp_compare_and_store8
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store16
-//
-// kmp_int16
-// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %si
-// sv: %dx
-//
-// return: %eax
- .text
- PROC __kmp_compare_and_store16
-
- movw %si, %ax // "cv"
- lock
- cmpxchgw %dx,(%rdi)
- sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
-
- DEBUG_INFO __kmp_compare_and_store16
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store32
-//
-// kmp_int32
-// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %esi
-// sv: %edx
-//
-// return: %eax
- .text
- PROC __kmp_compare_and_store32
-
- movl %esi, %eax // "cv"
- lock
- cmpxchgl %edx,(%rdi)
- sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
-
- DEBUG_INFO __kmp_compare_and_store32
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store64
-//
-// kmp_int32
-// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %rsi
-// sv: %rdx
-// return: %eax
- .text
- PROC __kmp_compare_and_store64
-
- movq %rsi, %rax // "cv"
- lock
- cmpxchgq %rdx,(%rdi)
- sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
-
- DEBUG_INFO __kmp_compare_and_store64
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store_ret8
-//
-// kmp_int8
-// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %esi
-// sv: %edx
-//
-// return: %eax
- .text
- PROC __kmp_compare_and_store_ret8
-
- movb %sil, %al // "cv"
- lock
- cmpxchgb %dl,(%rdi)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret8
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store_ret16
-//
-// kmp_int16
-// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %si
-// sv: %dx
-//
-// return: %eax
- .text
- PROC __kmp_compare_and_store_ret16
-
- movw %si, %ax // "cv"
- lock
- cmpxchgw %dx,(%rdi)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret16
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store_ret32
-//
-// kmp_int32
-// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %esi
-// sv: %edx
-//
-// return: %eax
- .text
- PROC __kmp_compare_and_store_ret32
-
- movl %esi, %eax // "cv"
- lock
- cmpxchgl %edx,(%rdi)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret32
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_compare_and_store_ret64
-//
-// kmp_int64
-// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
-//
-// parameters:
-// p: %rdi
-// cv: %rsi
-// sv: %rdx
-// return: %eax
- .text
- PROC __kmp_compare_and_store_ret64
-
- movq %rsi, %rax // "cv"
- lock
- cmpxchgq %rdx,(%rdi)
- ret
-
- DEBUG_INFO __kmp_compare_and_store_ret64
-
-# endif /* !KMP_ASM_INTRINS */
-
-
-# if !KMP_MIC
-
-# if !KMP_ASM_INTRINS
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_real32
-//
-// kmp_real32
-// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
-//
-// parameters:
-// addr: %rdi
-// data: %xmm0 (lower 4 bytes)
-//
-// return: %xmm0 (lower 4 bytes)
- .text
- PROC __kmp_xchg_real32
-
- movd %xmm0, %eax // load "data" to eax
-
- lock
- xchgl %eax, (%rdi)
-
- movd %eax, %xmm0 // load old value into return register
-
- ret
-
- DEBUG_INFO __kmp_xchg_real32
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_xchg_real64
-//
-// kmp_real64
-// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
-//
-// parameters:
-// addr: %rdi
-// data: %xmm0 (lower 8 bytes)
-// return: %xmm0 (lower 8 bytes)
- .text
- PROC __kmp_xchg_real64
-
- movd %xmm0, %rax // load "data" to rax
-
- lock
- xchgq %rax, (%rdi)
-
- movd %rax, %xmm0 // load old value into return register
- ret
-
- DEBUG_INFO __kmp_xchg_real64
-
-
-# endif /* !KMP_MIC */
-
-# endif /* !KMP_ASM_INTRINS */
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_load_x87_fpu_control_word
-//
-// void
-// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
-//
-// parameters:
-// p: %rdi
- .text
- PROC __kmp_load_x87_fpu_control_word
-
- fldcw (%rdi)
- ret
-
- DEBUG_INFO __kmp_load_x87_fpu_control_word
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_store_x87_fpu_control_word
-//
-// void
-// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
-//
-// parameters:
-// p: %rdi
- .text
- PROC __kmp_store_x87_fpu_control_word
-
- fstcw (%rdi)
- ret
-
- DEBUG_INFO __kmp_store_x87_fpu_control_word
-
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_clear_x87_fpu_status_word
-//
-// void
-// __kmp_clear_x87_fpu_status_word();
- .text
- PROC __kmp_clear_x87_fpu_status_word
-
-#if KMP_MIC
-// TODO: remove the workaround for problem with fnclex instruction (no CQ known)
- fstenv -32(%rsp) // store FP env
- andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW
- fldenv -32(%rsp) // load FP env back
- ret
-#else
- fnclex
- ret
-#endif
-
- DEBUG_INFO __kmp_clear_x87_fpu_status_word
-
-
-//------------------------------------------------------------------------
-// typedef void (*microtask_t)( int *gtid, int *tid, ... );
-//
-// int
-// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
-// int gtid, int tid,
-// int argc, void *p_argv[] ) {
-// (*pkfn)( & gtid, & tid, argv[0], ... );
-// return 1;
-// }
-//
-// note: at call to pkfn must have %rsp 128-byte aligned for compiler
-//
-// parameters:
-// %rdi: pkfn
-// %esi: gtid
-// %edx: tid
-// %ecx: argc
-// %r8: p_argv
-// %r9: &exit_frame
-//
-// locals:
-// __gtid: gtid parm pushed on stack so can pass >id to pkfn
-// __tid: tid parm pushed on stack so can pass &tid to pkfn
-//
-// reg temps:
-// %rax: used all over the place
-// %rdx: used in stack pointer alignment calculation
-// %r11: used to traverse p_argv array
-// %rsi: used as temporary for stack parameters
-// used as temporary for number of pkfn parms to push
-// %rbx: used to hold pkfn address, and zero constant, callee-save
-//
-// return: %eax (always 1/TRUE)
-__gtid = -16
-__tid = -24
-
-// -- Begin __kmp_invoke_microtask
-// mark_begin;
- .text
- PROC __kmp_invoke_microtask
-
- pushq %rbp // save base pointer
- KMP_CFI_DEF_OFFSET 16
- KMP_CFI_OFFSET rbp,-16
- movq %rsp,%rbp // establish the base pointer for this routine.
- KMP_CFI_REGISTER rbp
-
-#if OMPT_SUPPORT
- movq %rbp, (%r9) // save exit_frame
-#endif
-
- pushq %rbx // %rbx is callee-saved register
- pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
- pushq %rdx // Put tid on stack so can pass &tid to pkfn
-
- movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
- movq $0, %rbx // constant for cmovs later
- subq $4, %rax // subtract four args passed in registers to pkfn
-#if KMP_MIC
- js KMP_LABEL(kmp_0) // jump to movq
- jmp KMP_LABEL(kmp_0_exit) // jump ahead
-KMP_LABEL(kmp_0):
- movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
-KMP_LABEL(kmp_0_exit):
-#else
- cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
-#endif // KMP_MIC
-
- movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
- shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
-
- movq %rsp, %rdx //
- subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
- // without align, stack ptr would be this
- movq %rdx, %rax // Save to %rax
-
- andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
- subq %rax, %rdx // Amount to subtract from %rsp
- subq %rdx, %rsp // Prepare the stack ptr --
- // now %rsp will align to 128-byte boundary at call site
-
- // setup pkfn parameter reg and stack
- movq %rcx, %rax // argc -> %rax
- cmpq $0, %rsi
- je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
- shlq $3, %rcx // argc*8 -> %rcx
- movq %r8, %rdx // p_argv -> %rdx
- addq %rcx, %rdx // &p_argv[argc] -> %rdx
-
- movq %rsi, %rcx // max (0, argc-4) -> %rcx
-
-KMP_LABEL(kmp_invoke_push_parms):
- // push nth - 7th parms to pkfn on stack
- subq $8, %rdx // decrement p_argv pointer to previous parm
- movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
- pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
- subl $1, %ecx
-
-// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
-// if the name of the label that is an operand of this jecxz starts with a dot (".");
-// Apple's linker does not support 1-byte length relocation;
-// Resolution: replace all .labelX entries with L_labelX.
-
- jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
- jmp KMP_LABEL(kmp_invoke_push_parms)
- ALIGN 3
-KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
- // order here is important to avoid trashing
- // registers used for both input and output parms!
- movq %rdi, %rbx // pkfn -> %rbx
- leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn)
- leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
-
- movq %r8, %r11 // p_argv -> %r11
-
-#if KMP_MIC
- cmpq $4, %rax // argc >= 4?
- jns KMP_LABEL(kmp_4) // jump to movq
- jmp KMP_LABEL(kmp_4_exit) // jump ahead
-KMP_LABEL(kmp_4):
- movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
-KMP_LABEL(kmp_4_exit):
-
- cmpq $3, %rax // argc >= 3?
- jns KMP_LABEL(kmp_3) // jump to movq
- jmp KMP_LABEL(kmp_3_exit) // jump ahead
-KMP_LABEL(kmp_3):
- movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
-KMP_LABEL(kmp_3_exit):
-
- cmpq $2, %rax // argc >= 2?
- jns KMP_LABEL(kmp_2) // jump to movq
- jmp KMP_LABEL(kmp_2_exit) // jump ahead
-KMP_LABEL(kmp_2):
- movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
-KMP_LABEL(kmp_2_exit):
-
- cmpq $1, %rax // argc >= 1?
- jns KMP_LABEL(kmp_1) // jump to movq
- jmp KMP_LABEL(kmp_1_exit) // jump ahead
-KMP_LABEL(kmp_1):
- movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
-KMP_LABEL(kmp_1_exit):
-#else
- cmpq $4, %rax // argc >= 4?
- cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
-
- cmpq $3, %rax // argc >= 3?
- cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
-
- cmpq $2, %rax // argc >= 2?
- cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
-
- cmpq $1, %rax // argc >= 1?
- cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
-#endif // KMP_MIC
-
- call *%rbx // call (*pkfn)();
- movq $1, %rax // move 1 into return register;
-
- movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
- movq %rbp, %rsp // restore stack pointer
- popq %rbp // restore frame pointer
- KMP_CFI_DEF rsp,8
- ret
-
- DEBUG_INFO __kmp_invoke_microtask
-// -- End __kmp_invoke_microtask
-
-// kmp_uint64
-// __kmp_hardware_timestamp(void)
- .text
- PROC __kmp_hardware_timestamp
- rdtsc
- shlq $32, %rdx
- orq %rdx, %rax
- ret
-
- DEBUG_INFO __kmp_hardware_timestamp
-// -- End __kmp_hardware_timestamp
-
-//------------------------------------------------------------------------
-// FUNCTION __kmp_bsr32
-//
-// int
-// __kmp_bsr32( int );
- .text
- PROC __kmp_bsr32
-
- bsr %edi,%eax
- ret
-
- DEBUG_INFO __kmp_bsr32
-
-
-// -----------------------------------------------------------------------
-#endif /* KMP_ARCH_X86_64 */
-
-// '
-#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
-
-//------------------------------------------------------------------------
-//
-// typedef void (*microtask_t)( int *gtid, int *tid, ... );
-//
-// int
-// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
-// int gtid, int tid,
-// int argc, void *p_argv[] ) {
-// (*pkfn)( & gtid, & tid, argv[0], ... );
-// return 1;
-// }
-//
-// parameters:
-// x0: pkfn
-// w1: gtid
-// w2: tid
-// w3: argc
-// x4: p_argv
-// x5: &exit_frame
-//
-// locals:
-// __gtid: gtid parm pushed on stack so can pass >id to pkfn
-// __tid: tid parm pushed on stack so can pass &tid to pkfn
-//
-// reg temps:
-// x8: used to hold pkfn address
-// w9: used as temporary for number of pkfn parms
-// x10: used to traverse p_argv array
-// x11: used as temporary for stack placement calculation
-// x12: used as temporary for stack parameters
-// x19: used to preserve exit_frame_ptr, callee-save
-//
-// return: w0 (always 1/TRUE)
-//
-
-__gtid = 4
-__tid = 8
-
-// -- Begin __kmp_invoke_microtask
-// mark_begin;
- .text
- PROC __kmp_invoke_microtask
-
- stp x29, x30, [sp, #-16]!
-# if OMPT_SUPPORT
- stp x19, x20, [sp, #-16]!
-# endif
- mov x29, sp
-
- orr w9, wzr, #1
- add w9, w9, w3, lsr #1
- sub sp, sp, w9, lsl #4
- mov x11, sp
-
- mov x8, x0
- str w1, [x29, #-__gtid]
- str w2, [x29, #-__tid]
- mov w9, w3
- mov x10, x4
-# if OMPT_SUPPORT
- mov x19, x5
- str x29, [x19]
-# endif
-
- sub x0, x29, #__gtid
- sub x1, x29, #__tid
-
- cbz w9, KMP_LABEL(kmp_1)
- ldr x2, [x10]
-
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x3, [x10, #8]!
-
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x4, [x10, #8]!
-
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x5, [x10, #8]!
-
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x6, [x10, #8]!
-
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x7, [x10, #8]!
-
-KMP_LABEL(kmp_0):
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x12, [x10, #8]!
- str x12, [x11], #8
- b KMP_LABEL(kmp_0)
-KMP_LABEL(kmp_1):
- blr x8
- orr w0, wzr, #1
- mov sp, x29
-# if OMPT_SUPPORT
- str xzr, [x19]
- ldp x19, x20, [sp], #16
-# endif
- ldp x29, x30, [sp], #16
- ret
-
- DEBUG_INFO __kmp_invoke_microtask
-// -- End __kmp_invoke_microtask
-
-#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */
-
-#if KMP_ARCH_PPC64
-
-//------------------------------------------------------------------------
-//
-// typedef void (*microtask_t)( int *gtid, int *tid, ... );
-//
-// int
-// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
-// int gtid, int tid,
-// int argc, void *p_argv[] ) {
-// (*pkfn)( & gtid, & tid, argv[0], ... );
-// return 1;
-// }
-//
-// parameters:
-// r3: pkfn
-// r4: gtid
-// r5: tid
-// r6: argc
-// r7: p_argv
-// r8: &exit_frame
-//
-// return: r3 (always 1/TRUE)
-//
- .text
-# if KMP_ARCH_PPC64_LE
- .abiversion 2
-# endif
- .globl __kmp_invoke_microtask
-
-# if KMP_ARCH_PPC64_LE
- .p2align 4
-# else
- .p2align 2
-# endif
-
- .type __kmp_invoke_microtask, at function
-
-# if KMP_ARCH_PPC64_LE
-__kmp_invoke_microtask:
-.Lfunc_begin0:
-.Lfunc_gep0:
- addis 2, 12, .TOC.-.Lfunc_gep0 at ha
- addi 2, 2, .TOC.-.Lfunc_gep0 at l
-.Lfunc_lep0:
- .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
-# else
- .section .opd,"aw", at progbits
-__kmp_invoke_microtask:
- .p2align 3
- .quad .Lfunc_begin0
- .quad .TOC. at tocbase
- .quad 0
- .text
-.Lfunc_begin0:
-# endif
-
-// -- Begin __kmp_invoke_microtask
-// mark_begin;
-
-// We need to allocate a stack frame large enough to hold all of the parameters
-// on the stack for the microtask plus what this function needs. That's 48
-// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
-// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
-// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
-// to save r30 to hold a copy of r8.
-
- .cfi_startproc
- mflr 0
- std 31, -8(1)
- std 0, 16(1)
-
-// This is unusual because normally we'd set r31 equal to r1 after the stack
-// frame is established. In this case, however, we need to dynamically compute
-// the stack frame size, and so we keep a direct copy of r1 to access our
-// register save areas and restore the r1 value before returning.
- mr 31, 1
- .cfi_def_cfa_register r31
- .cfi_offset r31, -8
- .cfi_offset lr, 16
-
-// Compute the size necessary for the local stack frame.
-# if KMP_ARCH_PPC64_LE
- li 12, 72
-# else
- li 12, 88
-# endif
- sldi 0, 6, 3
- add 12, 0, 12
- neg 12, 12
-
-// We need to make sure that the stack frame stays aligned (to 16 bytes, except
-// under the BG/Q CNK, where it must be to 32 bytes).
-# if KMP_OS_CNK
- li 0, -32
-# else
- li 0, -16
-# endif
- and 12, 0, 12
-
-// Establish the local stack frame.
- stdux 1, 1, 12
-
-# if OMPT_SUPPORT
- .cfi_offset r30, -16
- std 30, -16(31)
- std 1, 0(8)
- mr 30, 8
-# endif
-
-// Store gtid and tid to the stack because they're passed by reference to the microtask.
- stw 4, -20(31)
- stw 5, -24(31)
-
- mr 12, 6
- mr 4, 7
-
- cmpwi 0, 12, 1
- blt 0, .Lcall
-
- ld 5, 0(4)
-
- cmpwi 0, 12, 2
- blt 0, .Lcall
-
- ld 6, 8(4)
-
- cmpwi 0, 12, 3
- blt 0, .Lcall
-
- ld 7, 16(4)
-
- cmpwi 0, 12, 4
- blt 0, .Lcall
-
- ld 8, 24(4)
-
- cmpwi 0, 12, 5
- blt 0, .Lcall
-
- ld 9, 32(4)
-
- cmpwi 0, 12, 6
- blt 0, .Lcall
-
- ld 10, 40(4)
-
- cmpwi 0, 12, 7
- blt 0, .Lcall
-
-// There are more than 6 microtask parameters, so we need to store the
-// remainder to the stack.
- addi 12, 12, -6
- mtctr 12
-
-// These are set to 8 bytes before the first desired store address (we're using
-// pre-increment loads and stores in the loop below). The parameter save area
-// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
-// 32 + 8*8 == 96 bytes above r1 for ELFv2.
- addi 4, 4, 40
-# if KMP_ARCH_PPC64_LE
- addi 12, 1, 88
-# else
- addi 12, 1, 104
-# endif
-
-.Lnext:
- ldu 0, 8(4)
- stdu 0, 8(12)
- bdnz .Lnext
-
-.Lcall:
-# if KMP_ARCH_PPC64_LE
- std 2, 24(1)
- mr 12, 3
-#else
- std 2, 40(1)
-// For ELFv1, we need to load the actual function address from the function descriptor.
- ld 12, 0(3)
- ld 2, 8(3)
- ld 11, 16(3)
-#endif
-
- addi 3, 31, -20
- addi 4, 31, -24
-
- mtctr 12
- bctrl
-# if KMP_ARCH_PPC64_LE
- ld 2, 24(1)
-# else
- ld 2, 40(1)
-# endif
-
-# if OMPT_SUPPORT
- li 3, 0
- std 3, 0(30)
-# endif
-
- li 3, 1
-
-# if OMPT_SUPPORT
- ld 30, -16(31)
-# endif
-
- mr 1, 31
- ld 0, 16(1)
- ld 31, -8(1)
- mtlr 0
- blr
-
- .long 0
- .quad 0
-.Lfunc_end0:
- .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
- .cfi_endproc
-
-// -- End __kmp_invoke_microtask
-
-#endif /* KMP_ARCH_PPC64 */
-
-#if KMP_ARCH_ARM || KMP_ARCH_MIPS
- .data
- .comm .gomp_critical_user_,32,8
- .data
- .align 4
- .global __kmp_unnamed_critical_addr
-__kmp_unnamed_critical_addr:
- .4byte .gomp_critical_user_
- .size __kmp_unnamed_critical_addr,4
-#endif /* KMP_ARCH_ARM */
-
-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
- .data
- .comm .gomp_critical_user_,32,8
- .data
- .align 8
- .global __kmp_unnamed_critical_addr
-__kmp_unnamed_critical_addr:
- .8byte .gomp_critical_user_
- .size __kmp_unnamed_critical_addr,8
-#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
-
-#if KMP_OS_LINUX
-# if KMP_ARCH_ARM
-.section .note.GNU-stack,"",%progbits
-# else
-.section .note.GNU-stack,"", at progbits
-# endif
-#endif
More information about the Openmp-commits
mailing list