[Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]

Churbanov, Andrey via Openmp-commits openmp-commits at lists.llvm.org
Thu Jun 2 09:08:40 PDT 2016


OK for me.

- Andrey

-----Original Message-----
From: Peyton, Jonathan L 
Sent: Thursday, June 2, 2016 7:07 PM
To: Churbanov, Andrey <Andrey.Churbanov at intel.com>; Tom Stellard <tom at stellard.net>
Cc: openmp-commits (openmp-commits at lists.llvm.org) <openmp-commits at lists.llvm.org>; John Mellor-Crummey <johnmc at rice.edu>
Subject: RE: [Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]

Tom, Andrey,

Can this commit be merged into 3.8.1?  It fixes a bug for the ppc64 architecture when invoking a microtask with many arguments.

-- Johnny

-----Original Message-----
From: Openmp-commits [mailto:openmp-commits-bounces at lists.llvm.org] On Behalf Of Hal Finkel via Openmp-commits
Sent: Wednesday, May 25, 2016 11:48 PM
To: openmp-commits at lists.llvm.org
Subject: [Openmp-commits] [openmp] r270821 - Add an assembly __kmp_invoke_microtask for ppc64[le]

Author: hfinkel
Date: Wed May 25 23:48:14 2016
New Revision: 270821

URL: http://llvm.org/viewvc/llvm-project?rev=270821&view=rev
Log:
Add an assembly __kmp_invoke_microtask for ppc64[le]

Clang no longer restricts itself to generating microtasks with a small number of arguments, and so an assembly implementation is required to prevent hitting the parameter limit present in the C implementation. This adds an implementation for ppc64[le].

Modified:
    openmp/trunk/runtime/src/z_Linux_asm.s
    openmp/trunk/runtime/src/z_Linux_util.c

Modified: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s Wed May 25 23:48:14 2016
@@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1):
 
 #endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
 
+#if KMP_ARCH_PPC64
+
+//---------------------------------------------------------------------
+---
+//
+// typedef void	(*microtask_t)( int *gtid, int *tid, ... );
+//
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+//		           int gtid, int tid,
+//                         int argc, void *p_argv[] ) {
+//    (*pkfn)( & gtid, & tid, argv[0], ... );
+//    return 1;
+// }
+//
+// parameters:
+//	r3:	pkfn
+//	r4:	gtid
+//	r5:	tid
+//	r6:	argc
+//	r7:	p_argv
+//	r8:	&exit_frame
+//
+// return:	r3	(always 1/TRUE)
+//
+	.text
+# if KMP_ARCH_PPC64_LE
+	.abiversion 2
+# endif
+	.globl	__kmp_invoke_microtask
+
+# if KMP_ARCH_PPC64_LE
+	.p2align	4
+# else
+	.p2align	2
+# endif
+
+	.type	__kmp_invoke_microtask, at function
+
+# if KMP_ARCH_PPC64_LE
+__kmp_invoke_microtask:
+.Lfunc_begin0:
+.Lfunc_gep0:
+	addis 2, 12, .TOC.-.Lfunc_gep0 at ha
+	addi 2, 2, .TOC.-.Lfunc_gep0 at l
+.Lfunc_lep0:
+	.localentry	__kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
+# else
+	.section	.opd,"aw", at progbits
+__kmp_invoke_microtask:
+	.p2align	3
+	.quad	.Lfunc_begin0
+	.quad	.TOC. at tocbase
+	.quad	0
+	.text
+.Lfunc_begin0:
+# endif
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+
+// We need to allocate a stack frame large enough to hold all of the 
+parameters // on the stack for the microtask plus what this function 
+needs. That's 48 // bytes under the ELFv1 ABI (32 bytes under ELFv2), 
+plus 8*(2 + argc) for the // parameters to the microtask, plus 8 bytes 
+to store the values of r4 and r5, // and 8 bytes to store r31. With 
+OMP-T support, we need an additional 8 bytes // to save r30 to hold a copy of r8.
+
+	.cfi_startproc
+	mflr 0
+	std 31, -8(1)
+	std 0, 16(1)
+
+// This is unusual because normally we'd set r31 equal to r1 after the 
+stack // frame is established. In this case, however, we need to 
+dynamically compute // the stack frame size, and so we keep a direct 
+copy of r1 to access our // register save areas and restore the r1 value before returning.
+	mr 31, 1
+	.cfi_def_cfa_register r31
+	.cfi_offset r31, -8
+	.cfi_offset lr, 16
+
+// Compute the size necessary for the local stack frame.
+# if KMP_ARCH_PPC64_LE
+	li 12, 72
+# else
+	li 12, 88
+# endif
+	sldi 0, 6, 3
+	add 12, 0, 12
+	neg 12, 12
+
+// We need to make sure that the stack frame stays aligned (to 16 
+bytes, except // under the BG/Q CNK, where it must be to 32 bytes).
+# if KMP_OS_CNK
+	li 0, -32
+# else
+	li 0, -16
+# endif
+	and 12, 0, 12
+
+// Establish the local stack frame.
+	stdux 1, 1, 12
+
+# if OMPT_SUPPORT
+	.cfi_offset r30, -16
+	std 30, -16(31)
+	mr 30, 8
+# endif
+
+// Store gtid and tid to the stack because they're passed by reference to the microtask.
+	stw 4, -20(31)
+	stw 5, -24(31)
+
+	mr 12, 6
+	mr 4, 7
+
+	cmpwi 0, 12, 1
+	blt	 0, .Lcall
+
+	ld 5, 0(4)
+
+	cmpwi 0, 12, 2
+	blt	 0, .Lcall
+
+	ld 6, 8(4)
+
+	cmpwi 0, 12, 3
+	blt	 0, .Lcall
+
+	ld 7, 16(4)
+
+	cmpwi 0, 12, 4
+	blt	 0, .Lcall
+
+	ld 8, 24(4)
+
+	cmpwi 0, 12, 5
+	blt	 0, .Lcall
+
+	ld 9, 32(4)
+
+	cmpwi 0, 12, 6
+	blt	 0, .Lcall
+
+	ld 10, 40(4)
+
+	cmpwi 0, 12, 7
+	blt	 0, .Lcall
+
+// There are more than 6 microtask parameters, so we need to store the 
+// remainder to the stack.
+	addi 12, 12, -6
+	mtctr 12
+
+// These are set to 8 bytes before the first desired store address 
+(we're using // pre-increment loads and stores in the loop below). The 
+parameter save area // for the microtask begins 48 + 8*8 == 112 bytes 
+above r1 for ELFv1 and // 32 + 8*8 == 96 bytes above r1 for ELFv2.
+	addi 4, 4, 40
+# if KMP_ARCH_PPC64_LE
+	addi 12, 1, 88
+# else
+	addi 12, 1, 104
+# endif
+
+.Lnext:
+	ldu 0, 8(4)
+	stdu 0, 8(12)
+	bdnz .Lnext
+
+.Lcall:
+# if KMP_ARCH_PPC64_LE
+	std 2, 24(1)
+	mr 12, 3
+#else
+	std 2, 40(1)
+// For ELFv1, we need to load the actual function address from the function descriptor.
+	ld 12, 0(3)
+	ld 2, 8(3)
+	ld 11, 16(3)
+#endif
+
+	addi 3, 31, -20
+	addi 4, 31, -24
+
+	mtctr 12
+	bctrl
+# if KMP_ARCH_PPC64_LE
+	ld 2, 24(1)
+# else
+	ld 2, 40(1)
+# endif
+
+# if OMPT_SUPPORT
+	li 3, 0
+	std 3, 0(30)
+# endif
+
+	li 3, 1
+
+# if OMPT_SUPPORT
+	ld 30, -16(31)
+# endif
+
+	mr 1, 31
+	ld 0, 16(1)
+	ld 31, -8(1)
+	mtlr 0
+	blr
+
+	.long	0
+	.quad	0
+.Lfunc_end0:
+	.size	__kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
+	.cfi_endproc
+
+// -- End  __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_PPC64 */
+
 #if KMP_ARCH_ARM
     .data
     .comm .gomp_critical_user_,32,8

Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=270821&r1=270820&r2=270821&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Wed May 25 23:48:14 2016
@@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max )
 
 #endif // USE_LOAD_BALANCE
 
-#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
+#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX &&
+KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
 
 // we really only need the case with 1 argument, because CLANG always build  // a struct of pointers to shared variables referenced in the outlined function


_______________________________________________
Openmp-commits mailing list
Openmp-commits at lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/openmp-commits

--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.


More information about the Openmp-commits mailing list