[Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]

Peyton, Jonathan L via Openmp-commits openmp-commits at lists.llvm.org
Thu Jun 2 09:06:54 PDT 2016


Tom, Andrey,

Can this commit be merged into 3.8.1?  It fixes a bug for the ppc64 architecture when invoking a microtask with many arguments.

-- Johnny

-----Original Message-----
From: Openmp-commits [mailto:openmp-commits-bounces at lists.llvm.org] On Behalf Of Hal Finkel via Openmp-commits
Sent: Wednesday, May 25, 2016 11:48 PM
To: openmp-commits at lists.llvm.org
Subject: [Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]

Author: hfinkel
Date: Wed May 25 23:48:14 2016
New Revision: 270821

URL: http://llvm.org/viewvc/llvm-project?rev=270821&view=rev
Log:
Add an assembly __kmp_invoke_microtask for ppc64[le]

Clang no longer restricts itself to generating microtasks with a small number of arguments, and so an assembly implementation is required to prevent hitting the parameter limit present in the C implementation. This adds an implementation for ppc64[le].

Modified:
    openmp/trunk/runtime/src/z_Linux_asm.s
    openmp/trunk/runtime/src/z_Linux_util.c

Modified: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s Wed May 25 23:48:14 2016
@@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1):
 
 #endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
 
+#if KMP_ARCH_PPC64
+
+//---------------------------------------------------------------------
+---
+//
+// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
+//
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+//		           int gtid, int tid,
+//                         int argc, void *p_argv[] ) {
+//    (*pkfn)( & gtid, & tid, argv[0], ... );
+//    return 1;
+// }
+//
+// parameters:
+//	r3:	pkfn
+//	r4:	gtid
+//	r5:	tid
+//	r6:	argc
+//	r7:	p_argv
+//	r8:	&exit_frame
+//
+// return:	r3	(always 1/TRUE)
+//
+	.text
+# if KMP_ARCH_PPC64_LE
+	.abiversion 2
+# endif
+	.globl	__kmp_invoke_microtask
+
+# if KMP_ARCH_PPC64_LE
+	.p2align	4
+# else
+	.p2align	2
+# endif
+
+	.type	__kmp_invoke_microtask, at function
+
+# if KMP_ARCH_PPC64_LE
+__kmp_invoke_microtask:
+.Lfunc_begin0:
+.Lfunc_gep0:
+	addis 2, 12, .TOC.-.Lfunc_gep0 at ha
+	addi 2, 2, .TOC.-.Lfunc_gep0 at l
+.Lfunc_lep0:
+	.localentry	__kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
+# else
+	.section	.opd,"aw", at progbits
+__kmp_invoke_microtask:
+	.p2align	3
+	.quad	.Lfunc_begin0
+	.quad	.TOC. at tocbase
+	.quad	0
+	.text
+.Lfunc_begin0:
+# endif
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+
+// We need to allocate a stack frame large enough to hold all of the 
+parameters // on the stack for the microtask plus what this function 
+needs. That's 48 // bytes under the ELFv1 ABI (32 bytes under ELFv2), 
+plus 8*(2 + argc) for the // parameters to the microtask, plus 8 bytes 
+to store the values of r4 and r5, // and 8 bytes to store r31. With 
+OMP-T support, we need an additional 8 bytes // to save r30 to hold a copy of r8.
+
+	.cfi_startproc
+	mflr 0
+	std 31, -8(1)
+	std 0, 16(1)
+
+// This is unusual because normally we'd set r31 equal to r1 after the 
+stack // frame is established. In this case, however, we need to 
+dynamically compute // the stack frame size, and so we keep a direct 
+copy of r1 to access our // register save areas and restore the r1 value before returning.
+	mr 31, 1
+	.cfi_def_cfa_register r31
+	.cfi_offset r31, -8
+	.cfi_offset lr, 16
+
+// Compute the size necessary for the local stack frame.
+# if KMP_ARCH_PPC64_LE
+	li 12, 72
+# else
+	li 12, 88
+# endif
+	sldi 0, 6, 3
+	add 12, 0, 12
+	neg 12, 12
+
+// We need to make sure that the stack frame stays aligned (to 16 
+bytes, except // under the BG/Q CNK, where it must be to 32 bytes).
+# if KMP_OS_CNK
+	li 0, -32
+# else
+	li 0, -16
+# endif
+	and 12, 0, 12
+
+// Establish the local stack frame.
+	stdux 1, 1, 12
+
+# if OMPT_SUPPORT
+	.cfi_offset r30, -16
+	std 30, -16(31)
+	mr 30, 8
+# endif
+
+// Store gtid and tid to the stack because they're passed by reference to the microtask.
+	stw 4, -20(31)
+	stw 5, -24(31)
+
+	mr 12, 6
+	mr 4, 7
+
+	cmpwi 0, 12, 1
+	blt	 0, .Lcall
+
+	ld 5, 0(4)
+
+	cmpwi 0, 12, 2
+	blt	 0, .Lcall
+
+	ld 6, 8(4)
+
+	cmpwi 0, 12, 3
+	blt	 0, .Lcall
+
+	ld 7, 16(4)
+
+	cmpwi 0, 12, 4
+	blt	 0, .Lcall
+
+	ld 8, 24(4)
+
+	cmpwi 0, 12, 5
+	blt	 0, .Lcall
+
+	ld 9, 32(4)
+
+	cmpwi 0, 12, 6
+	blt	 0, .Lcall
+
+	ld 10, 40(4)
+
+	cmpwi 0, 12, 7
+	blt	 0, .Lcall
+
+// There are more than 6 microtask parameters, so we need to store the 
+// remainder to the stack.
+	addi 12, 12, -6
+	mtctr 12
+
+// These are set to 8 bytes before the first desired store address 
+(we're using // pre-increment loads and stores in the loop below). The 
+parameter save area // for the microtask begins 48 + 8*8 == 112 bytes 
+above r1 for ELFv1 and // 32 + 8*8 == 96 bytes above r1 for ELFv2.
+	addi 4, 4, 40
+# if KMP_ARCH_PPC64_LE
+	addi 12, 1, 88
+# else
+	addi 12, 1, 104
+# endif
+
+.Lnext:
+	ldu 0, 8(4)
+	stdu 0, 8(12)
+	bdnz .Lnext
+
+.Lcall:
+# if KMP_ARCH_PPC64_LE
+	std 2, 24(1)
+	mr 12, 3
+#else
+	std 2, 40(1)
+// For ELFv1, we need to load the actual function address from the function descriptor.
+	ld 12, 0(3)
+	ld 2, 8(3)
+	ld 11, 16(3)
+#endif
+
+	addi 3, 31, -20
+	addi 4, 31, -24
+
+	mtctr 12
+	bctrl
+# if KMP_ARCH_PPC64_LE
+	ld 2, 24(1)
+# else
+	ld 2, 40(1)
+# endif
+
+# if OMPT_SUPPORT
+	li 3, 0
+	std 3, 0(30)
+# endif
+
+	li 3, 1
+
+# if OMPT_SUPPORT
+	ld 30, -16(31)
+# endif
+
+	mr 1, 31
+	ld 0, 16(1)
+	ld 31, -8(1)
+	mtlr 0
+	blr
+
+	.long	0
+	.quad	0
+.Lfunc_end0:
+	.size	__kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
+	.cfi_endproc
+
+// -- End  __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_PPC64 */
+
 #if KMP_ARCH_ARM
     .data
     .comm .gomp_critical_user_,32,8

Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Wed May 25 23:48:14 2016
@@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max )
 
 #endif // USE_LOAD_BALANCE
 
-#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
+#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && 
+KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
 
 // we really only need the case with 1 argument, because CLANG always build  // a struct of pointers to shared variables referenced in the outlined function


_______________________________________________
Openmp-commits mailing list
Openmp-commits at lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/openmp-commits


More information about the Openmp-commits mailing list