[Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]
Peyton, Jonathan L via Openmp-commits
openmp-commits at lists.llvm.org
Thu Jun 2 09:06:54 PDT 2016
Tom, Andrey,
Can this commit be merged into 3.8.1? It fixes a bug for the ppc64 architecture when invoking a microtask with many arguments.
-- Johnny
-----Original Message-----
From: Openmp-commits [mailto:openmp-commits-bounces at lists.llvm.org] On Behalf Of Hal Finkel via Openmp-commits
Sent: Wednesday, May 25, 2016 11:48 PM
To: openmp-commits at lists.llvm.org
Subject: [Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]
Author: hfinkel
Date: Wed May 25 23:48:14 2016
New Revision: 270821
URL: http://llvm.org/viewvc/llvm-project?rev=270821&view=rev
Log:
Add an assembly __kmp_invoke_microtask for ppc64[le]
Clang no longer restricts itself to generating microtasks with a small number of arguments, and so an assembly implementation is required to prevent hitting the parameter limit present in the C implementation. This adds an implementation for ppc64[le].
Modified:
openmp/trunk/runtime/src/z_Linux_asm.s
openmp/trunk/runtime/src/z_Linux_util.c
Modified: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s Wed May 25 23:48:14 2016
@@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1):
#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
+#if KMP_ARCH_PPC64
+
+//---------------------------------------------------------------------
+---
+//
+// typedef void (*microtask_t)( int *gtid, int *tid, ... );
+//
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+// int gtid, int tid,
+// int argc, void *p_argv[] ) {
+// (*pkfn)( & gtid, & tid, argv[0], ... );
+// return 1;
+// }
+//
+// parameters:
+// r3: pkfn
+// r4: gtid
+// r5: tid
+// r6: argc
+// r7: p_argv
+// r8: &exit_frame
+//
+// return: r3 (always 1/TRUE)
+//
+ .text
+# if KMP_ARCH_PPC64_LE
+ .abiversion 2
+# endif
+ .globl __kmp_invoke_microtask
+
+# if KMP_ARCH_PPC64_LE
+ .p2align 4
+# else
+ .p2align 2
+# endif
+
+ .type __kmp_invoke_microtask, at function
+
+# if KMP_ARCH_PPC64_LE
+__kmp_invoke_microtask:
+.Lfunc_begin0:
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep0 at l
+.Lfunc_lep0:
+ .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
+# else
+ .section .opd,"aw", at progbits
+__kmp_invoke_microtask:
+ .p2align 3
+ .quad .Lfunc_begin0
+ .quad .TOC. at tocbase
+ .quad 0
+ .text
+.Lfunc_begin0:
+# endif
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+
+// We need to allocate a stack frame large enough to hold all of the
+parameters // on the stack for the microtask plus what this function
+needs. That's 48 // bytes under the ELFv1 ABI (32 bytes under ELFv2),
+plus 8*(2 + argc) for the // parameters to the microtask, plus 8 bytes
+to store the values of r4 and r5, // and 8 bytes to store r31. With
+OMP-T support, we need an additional 8 bytes // to save r30 to hold a copy of r8.
+
+ .cfi_startproc
+ mflr 0
+ std 31, -8(1)
+ std 0, 16(1)
+
+// This is unusual because normally we'd set r31 equal to r1 after the
+stack // frame is established. In this case, however, we need to
+dynamically compute // the stack frame size, and so we keep a direct
+copy of r1 to access our // register save areas and restore the r1 value before returning.
+ mr 31, 1
+ .cfi_def_cfa_register r31
+ .cfi_offset r31, -8
+ .cfi_offset lr, 16
+
+// Compute the size necessary for the local stack frame.
+# if KMP_ARCH_PPC64_LE
+ li 12, 72
+# else
+ li 12, 88
+# endif
+ sldi 0, 6, 3
+ add 12, 0, 12
+ neg 12, 12
+
+// We need to make sure that the stack frame stays aligned (to 16
+bytes, except // under the BG/Q CNK, where it must be to 32 bytes).
+# if KMP_OS_CNK
+ li 0, -32
+# else
+ li 0, -16
+# endif
+ and 12, 0, 12
+
+// Establish the local stack frame.
+ stdux 1, 1, 12
+
+# if OMPT_SUPPORT
+ .cfi_offset r30, -16
+ std 30, -16(31)
+ mr 30, 8
+# endif
+
+// Store gtid and tid to the stack because they're passed by reference to the microtask.
+ stw 4, -20(31)
+ stw 5, -24(31)
+
+ mr 12, 6
+ mr 4, 7
+
+ cmpwi 0, 12, 1
+ blt 0, .Lcall
+
+ ld 5, 0(4)
+
+ cmpwi 0, 12, 2
+ blt 0, .Lcall
+
+ ld 6, 8(4)
+
+ cmpwi 0, 12, 3
+ blt 0, .Lcall
+
+ ld 7, 16(4)
+
+ cmpwi 0, 12, 4
+ blt 0, .Lcall
+
+ ld 8, 24(4)
+
+ cmpwi 0, 12, 5
+ blt 0, .Lcall
+
+ ld 9, 32(4)
+
+ cmpwi 0, 12, 6
+ blt 0, .Lcall
+
+ ld 10, 40(4)
+
+ cmpwi 0, 12, 7
+ blt 0, .Lcall
+
+// There are more than 6 microtask parameters, so we need to store the
+// remainder to the stack.
+ addi 12, 12, -6
+ mtctr 12
+
+// These are set to 8 bytes before the first desired store address
+(we're using // pre-increment loads and stores in the loop below). The
+parameter save area // for the microtask begins 48 + 8*8 == 112 bytes
+above r1 for ELFv1 and // 32 + 8*8 == 96 bytes above r1 for ELFv2.
+ addi 4, 4, 40
+# if KMP_ARCH_PPC64_LE
+ addi 12, 1, 88
+# else
+ addi 12, 1, 104
+# endif
+
+.Lnext:
+ ldu 0, 8(4)
+ stdu 0, 8(12)
+ bdnz .Lnext
+
+.Lcall:
+# if KMP_ARCH_PPC64_LE
+ std 2, 24(1)
+ mr 12, 3
+#else
+ std 2, 40(1)
+// For ELFv1, we need to load the actual function address from the function descriptor.
+ ld 12, 0(3)
+ ld 2, 8(3)
+ ld 11, 16(3)
+#endif
+
+ addi 3, 31, -20
+ addi 4, 31, -24
+
+ mtctr 12
+ bctrl
+# if KMP_ARCH_PPC64_LE
+ ld 2, 24(1)
+# else
+ ld 2, 40(1)
+# endif
+
+# if OMPT_SUPPORT
+ li 3, 0
+ std 3, 0(30)
+# endif
+
+ li 3, 1
+
+# if OMPT_SUPPORT
+ ld 30, -16(31)
+# endif
+
+ mr 1, 31
+ ld 0, 16(1)
+ ld 31, -8(1)
+ mtlr 0
+ blr
+
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+
+// -- End __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_PPC64 */
+
#if KMP_ARCH_ARM
.data
.comm .gomp_critical_user_,32,8
Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Wed May 25 23:48:14 2016
@@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max )
#endif // USE_LOAD_BALANCE
-#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
+#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX &&
+KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
// we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function
_______________________________________________
Openmp-commits mailing list
Openmp-commits at lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/openmp-commits
More information about the Openmp-commits
mailing list