[Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]
Hal Finkel via Openmp-commits
openmp-commits at lists.llvm.org
Wed May 25 21:48:14 PDT 2016
Author: hfinkel
Date: Wed May 25 23:48:14 2016
New Revision: 270821
URL: http://llvm.org/viewvc/llvm-project?rev=270821&view=rev
Log:
Add an assembly __kmp_invoke_microtask for ppc64[le]
Clang no longer restricts itself to generating microtasks with a small number
of arguments, and so an assembly implementation is required to prevent hitting
the parameter limit present in the C implementation. This adds an
implementation for ppc64[le].
Modified:
openmp/trunk/runtime/src/z_Linux_asm.s
openmp/trunk/runtime/src/z_Linux_util.c
Modified: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s Wed May 25 23:48:14 2016
@@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1):
#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
+#if KMP_ARCH_PPC64
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)( int *gtid, int *tid, ... );
+//
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+// int gtid, int tid,
+// int argc, void *p_argv[] ) {
+// (*pkfn)( & gtid, & tid, argv[0], ... );
+// return 1;
+// }
+//
+// parameters:
+// r3: pkfn
+// r4: gtid
+// r5: tid
+// r6: argc
+// r7: p_argv
+// r8: &exit_frame
+//
+// return: r3 (always 1/TRUE)
+//
+ .text
+# if KMP_ARCH_PPC64_LE
+ .abiversion 2
+# endif
+ .globl __kmp_invoke_microtask
+
+# if KMP_ARCH_PPC64_LE
+ .p2align 4
+# else
+ .p2align 2
+# endif
+
+ .type __kmp_invoke_microtask, at function
+
+# if KMP_ARCH_PPC64_LE
+__kmp_invoke_microtask:
+.Lfunc_begin0:
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep0 at l
+.Lfunc_lep0:
+ .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
+# else
+ .section .opd,"aw", at progbits
+__kmp_invoke_microtask:
+ .p2align 3
+ .quad .Lfunc_begin0
+ .quad .TOC. at tocbase
+ .quad 0
+ .text
+.Lfunc_begin0:
+# endif
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+
+// We need to allocate a stack frame large enough to hold all of the parameters
+// on the stack for the microtask plus what this function needs. That's 48
+// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
+// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
+// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
+// to save r30 to hold a copy of r8.
+
+ .cfi_startproc
+ mflr 0
+ std 31, -8(1)
+ std 0, 16(1)
+
+// This is unusual because normally we'd set r31 equal to r1 after the stack
+// frame is established. In this case, however, we need to dynamically compute
+// the stack frame size, and so we keep a direct copy of r1 to access our
+// register save areas and restore the r1 value before returning.
+ mr 31, 1
+ .cfi_def_cfa_register r31
+ .cfi_offset r31, -8
+ .cfi_offset lr, 16
+
+// Compute the size necessary for the local stack frame.
+# if KMP_ARCH_PPC64_LE
+ li 12, 72
+# else
+ li 12, 88
+# endif
+ sldi 0, 6, 3
+ add 12, 0, 12
+ neg 12, 12
+
+// We need to make sure that the stack frame stays aligned (to 16 bytes, except
+// under the BG/Q CNK, where it must be to 32 bytes).
+# if KMP_OS_CNK
+ li 0, -32
+# else
+ li 0, -16
+# endif
+ and 12, 0, 12
+
+// Establish the local stack frame.
+ stdux 1, 1, 12
+
+# if OMPT_SUPPORT
+ .cfi_offset r30, -16
+ std 30, -16(31)
+ mr 30, 8
+# endif
+
+// Store gtid and tid to the stack because they're passed by reference to the microtask.
+ stw 4, -20(31)
+ stw 5, -24(31)
+
+ mr 12, 6
+ mr 4, 7
+
+ cmpwi 0, 12, 1
+ blt 0, .Lcall
+
+ ld 5, 0(4)
+
+ cmpwi 0, 12, 2
+ blt 0, .Lcall
+
+ ld 6, 8(4)
+
+ cmpwi 0, 12, 3
+ blt 0, .Lcall
+
+ ld 7, 16(4)
+
+ cmpwi 0, 12, 4
+ blt 0, .Lcall
+
+ ld 8, 24(4)
+
+ cmpwi 0, 12, 5
+ blt 0, .Lcall
+
+ ld 9, 32(4)
+
+ cmpwi 0, 12, 6
+ blt 0, .Lcall
+
+ ld 10, 40(4)
+
+ cmpwi 0, 12, 7
+ blt 0, .Lcall
+
+// There are more than 6 microtask parameters, so we need to store the
+// remainder to the stack.
+ addi 12, 12, -6
+ mtctr 12
+
+// These are set to 8 bytes before the first desired store address (we're using
+// pre-increment loads and stores in the loop below). The parameter save area
+// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
+// 32 + 8*8 == 96 bytes above r1 for ELFv2.
+ addi 4, 4, 40
+# if KMP_ARCH_PPC64_LE
+ addi 12, 1, 88
+# else
+ addi 12, 1, 104
+# endif
+
+.Lnext:
+ ldu 0, 8(4)
+ stdu 0, 8(12)
+ bdnz .Lnext
+
+.Lcall:
+# if KMP_ARCH_PPC64_LE
+ std 2, 24(1)
+ mr 12, 3
+#else
+ std 2, 40(1)
+// For ELFv1, we need to load the actual function address from the function descriptor.
+ ld 12, 0(3)
+ ld 2, 8(3)
+ ld 11, 16(3)
+#endif
+
+ addi 3, 31, -20
+ addi 4, 31, -24
+
+ mtctr 12
+ bctrl
+# if KMP_ARCH_PPC64_LE
+ ld 2, 24(1)
+# else
+ ld 2, 40(1)
+# endif
+
+# if OMPT_SUPPORT
+ li 3, 0
+ std 3, 0(30)
+# endif
+
+ li 3, 1
+
+# if OMPT_SUPPORT
+ ld 30, -16(31)
+# endif
+
+ mr 1, 31
+ ld 0, 16(1)
+ ld 31, -8(1)
+ mtlr 0
+ blr
+
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+
+// -- End __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_PPC64 */
+
#if KMP_ARCH_ARM
.data
.comm .gomp_critical_user_,32,8
Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Wed May 25 23:48:14 2016
@@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max )
#endif // USE_LOAD_BALANCE
-#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
+#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
// we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function
More information about the Openmp-commits
mailing list