[Openmp-commits] Openmp-commits Digest, Vol 6, Issue 1

Carlo Bertolli cbertol at us.ibm.com
Thu Jul 17 13:14:10 PDT 2014


Hi,

I jave just noticed that my editor cut the lines in the patch...

Below a good version of the patch.

-- Carlo


diff --git a/runtime/Makefile.bgq b/runtime/Makefile.bgq
new file mode 100644
index 0000000..d9aa914
--- /dev/null
+++ b/runtime/Makefile.bgq
@@ -0,0 +1,76 @@
+date := $(shell date '+%Y%m%d')
+
+FEATURE_FLAGS = -DOMP_40_ENABLED=1 -DOMP_30_ENABLED=1 -DOMP_VERSION=201107
-DKMP_VERSION_MAJOR=5 \
+		-DCACHE_LINE=64 -DKMP_ADJUST_BLOCKTIME=1 -DBUILD_I8
-DKMP_USE_ADAPTIVE_LOCKS=0 \
+		-DKMP_DEBUG_ADAPTIVE_LOCKS=0 -DINTEL_NO_ITTNOTIFY_API
-DKMP_VERSION_MINOR=0 -DKMP_VERSION_BUILD=0 \
+		-DKMP_LIBRARY_FILE=\"libiomp5.so\" -D_KMP_BUILD_TIME="\"$(date)\""
-DKMP_GOMP_COMPAT
+
+CPPFLAGS = ${FEATURE_FLAGS} -D__float128='long double'
+
+CC = powerpc64-bgq-linux-gcc
+CXX = powerpc64-bgq-linux-g++
+
+all: build/libiomp5.a build/libiomp5.so
+
+build/.dir:
+	mkdir -p build
+	touch build/.dir
+
+build/omp.h:
+	perl tools/expand-vars.pl --strict $$(echo $(FEATURE_FLAGS) | sed
's/-D/-D /g') \
+	  -D KMP_BUILD_DATE=$(date) -D Revision=Revision -D Date=Date \
+	  src/include/40/omp.h.var build/omp.h
+
+build/%.o: src/%.cpp build/kmp_i18n_id.inc build/kmp_i18n_default.inc
build/omp.h
+	${CXX} -c ${CPPFLAGS} -g -O3 -Isrc -Ibuild -o $@ $<
+
+build/%.o: src/%.c build/kmp_i18n_id.inc build/kmp_i18n_default.inc
build/omp.h
+	${CC} -x c++ -c ${CPPFLAGS} -g -O3 -Isrc -Ibuild -o $@ $<
+
+OBJS =	build/kmp_alloc.o \
+	build/kmp_atomic.o \
+	build/kmp_cancel.o \
+	build/kmp_csupport.o \
+	build/kmp_dispatch.o \
+	build/kmp_debug.o \
+	build/kmp_environment.o \
+	build/kmp_error.o \
+	build/kmp_ftn_cdecl.o \
+	build/kmp_ftn_extra.o \
+	build/kmp_ftn_stdcall.o \
+	build/kmp_global.o \
+	build/kmp_i18n.o \
+	build/kmp_io.o \
+	build/kmp_itt.o \
+	build/kmp_runtime.o \
+	build/kmp_settings.o \
+	build/kmp_sched.o \
+	build/kmp_str.o \
+	build/kmp_tasking.o \
+	build/kmp_taskq.o \
+	build/kmp_taskdeps.o \
+	build/kmp_threadprivate.o \
+	build/kmp_utility.o \
+	build/kmp_version.o \
+	build/kmp_lock.o \
+	build/z_Linux_util.o
+
+BGSYS_FLOOR=$(shell readlink /bgsys/drivers/ppcfloor)
+build/libiomp5.so: $(OBJS)
+	${CXX} -Wl,--build-id -Wl,-rpath -Wl,$
{BGSYS_FLOOR}/gnu-linux/powerpc64-bgq-linux/lib -shared -o $@.1.0 $^
-Wl,-soname,$(shell basename $@.1) -lpthread
+	(cd $(shell dirname $@) && ln -sf $(shell basename $@.1.0) $(shell
basename $@.1))
+	(cd $(shell dirname $@) && ln -sf $(shell basename $@.1) $(shell
basename $@))
+# Note: We should not need the --build-id when we switch to clang (it
should add it for us).
+
+build/libiomp5.a: $(OBJS)
+	ar cr $@ $^
+
+build/kmp_i18n_id.inc: src/i18n/en_US.txt tools/message-converter.pl
build/.dir
+	perl tools/message-converter.pl --prefix=kmp_i18n --enum=$@ $<
+
+build/kmp_i18n_default.inc: src/i18n/en_US.txt tools/message-converter.pl
build/.dir
+	perl tools/message-converter.pl --prefix=kmp_i18n --default=$@ $<
+
+clean:
+	rm -rf build
+
diff --git a/runtime/src/CMakeLists.txt b/runtime/src/CMakeLists.txt
index dd29966..4814cb4 100644
--- a/runtime/src/CMakeLists.txt
+++ b/runtime/src/CMakeLists.txt
@@ -12,6 +12,9 @@ include_directories(
   ${CMAKE_CURRENT_BINARY_DIR}
 )

+# detect architecture
+EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE
ARCH )
+
 if(WIN32)
   set(OS_GEN "win")
 elseif(APPLE)
@@ -22,12 +25,14 @@ else()
   message(FATAL_ERROR "Unsupported OS")
 endif()

-if("${ARCH}" STREQUAL "")
+if(NOT "${ARCH}" STREQUAL "ppc64")
   set(ARCH "32e")
+  set(ARCH_STR "Intel(R) 64")
+else()
+# arch already set above
+  set(ARCH_STR "PPC64")
 endif()

-set(ARCH_STR "Intel(R) 64")
-
 set(FEATURE_FLAGS "-D USE_ITT_BUILD")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D NDEBUG")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ARCH_STR=\"\\\"$
{ARCH_STR}\\\"\"")
@@ -50,8 +55,13 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D
USE_LOAD_BALANCE")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_CBLKDATA")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D GUIDEDLL_EXPORTS")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_GOMP_COMPAT")
-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
+
+#adaptive locks use x86 assembly - disable for ppc64
+if(NOT "${ARCH}" STREQUAL "ppc64")
+	set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
+	set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
+endif()
+
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_50_ENABLED=0")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_41_ENABLED=0")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_40_ENABLED=1")
@@ -122,16 +132,35 @@ add_custom_command(
   OUTPUT omp.h
     COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/../tools/expand-vars.pl
--strict -D Revision=\"\\$$Revision\" -D Date=\"\\$$Date\" -D KMP_TYPE=
\"Performance\" -D KMP_ARCH=\"\\\"${ARCH_STR}\\\"\" -D KMP_VERSION_MAJOR=$
{VERSION} -D KMP_VERSION_MINOR=0 -D KMP_VERSION_BUILD=00000000 -D
KMP_BUILD_DATE=\"${BUILD_TIME} UTC\" -D KMP_TARGET_COMPILER=12 -D
KMP_DIAG=0 -D KMP_DEBUG_INFO=0 -D OMP_VERSION=${OMP_VERSION} $
{CMAKE_CURRENT_SOURCE_DIR}/include/${OMP_VERSION_NUM}/omp.h.var omp.h
 )
-add_custom_command(
-  OUTPUT z_Linux_asm.o
-    COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS
-D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp $
{CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
-)
+
+if(NOT "${ARCH}" STREQUAL "ppc64")
+	add_custom_command(
+  		OUTPUT z_Linux_asm.o
+    		COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D
KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp
${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
+	)
+else()
+	add_custom_command(
+  		OUTPUT z_Linux_asm.o
+    		COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D
KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_PPC64 -x assembler-with-cpp
${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
+	)
+
+endif()
+

 add_custom_target(gen_kmp_i18n DEPENDS kmp_i18n_id.inc
kmp_i18n_default.inc omp.h z_Linux_asm.o)

 if(NOT APPLE)
-  set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=$
{CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt")
+	set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=$
{CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt -ldl")
 endif()

 add_library(iomp5 SHARED ${SOURCES} z_Linux_asm.o)
+
+# This is a workaround to a known ppc64 issue about libpthread. For more
+# information see
+#
http://ryanarn.blogspot.com/2011/07/curious-case-of-pthreadatfork-on.html
+if("${ARCH}" STREQUAL "ppc64")
+	find_library(PTHREAD NAMES pthread)
+	target_link_libraries(iomp5 ${PTHREAD})
+endif()
+
 add_dependencies(iomp5 gen_kmp_i18n)
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index a8c600b..f5dd10f 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -459,9 +459,9 @@ typedef int PACKED_REDUCTION_METHOD_T;
 /*
  * Only Linux* OS and Windows* OS support thread affinity.
  */
-#if KMP_OS_LINUX || KMP_OS_WINDOWS
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64
 # define KMP_AFFINITY_SUPPORTED 1
-#elif KMP_OS_DARWIN || KMP_OS_FREEBSD
+#elif KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_CNK || KMP_ARCH_PPC64
 // affinity not supported
 # define KMP_AFFINITY_SUPPORTED 0
 #else
@@ -476,7 +476,7 @@ extern size_t __kmp_affin_mask_size;

 # if KMP_OS_LINUX
 //
-// On Linux* OS, the mask isactually a vector of length
__kmp_affin_mask_size
+// On Linux* OS, the mask is actually a vector of length
__kmp_affin_mask_size
 // (in bytes).  It should be allocated on a word boundary.
 //
 // WARNING!!!  We have made the base type of the affinity mask unsigned
char,
@@ -946,6 +946,9 @@ extern unsigned int __kmp_place_core_offset;
 #if KMP_OS_WINDOWS
 #  define KMP_INIT_WAIT    64U          /* initial number of spin-tests
*/
 #  define KMP_NEXT_WAIT    32U          /* susequent number of spin-tests
*/
+#elif KMP_OS_CNK
+#  define KMP_INIT_WAIT    16U          /* initial number of spin-tests
*/
+#  define KMP_NEXT_WAIT     8U          /* susequent number of spin-tests
*/
 #elif KMP_OS_LINUX
 #  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests
*/
 #  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests
*/
@@ -971,6 +974,11 @@ extern void __kmp_x86_cpuid( int mode, int mode2,
struct kmp_cpuid *p );
   extern void __kmp_x86_pause( void );
 # endif
 # define KMP_CPU_PAUSE()        __kmp_x86_pause()
+#elif KMP_ARCH_PPC64
+# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
+# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
+# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
+# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED();
KMP_PPC64_PRI_LOC_MB(); } while (0)
 #else
 # define KMP_CPU_PAUSE()        /* nothing to do */
 #endif
diff --git a/runtime/src/kmp_csupport.c b/runtime/src/kmp_csupport.c
index 18b6c35..0b7d3ed 100644
--- a/runtime/src/kmp_csupport.c
+++ b/runtime/src/kmp_csupport.c
@@ -837,6 +837,19 @@ __kmpc_flush(ident_t *loc, ...)
             #endif // KMP_MIC
         #elif KMP_ARCH_ARM
             // Nothing yet
+	#elif KMP_ARCH_PPC64
+            // Nothing needed here (we have a real MB above).
+            #if KMP_OS_CNK
+		// The flushing thread needs to yield here; this prevents a
+		// busy-waiting thread from saturating the pipeline. flush is
+		// often used in loops like this:
+                // while (!flag) {
+                //   #pragma omp flush(flag)
+                // }
+		// and adding the yield here is good for at least a 10x speedup
+		// when running >2 threads per core (on the NAS LU benchmark).
+                __kmp_yield(TRUE);
+            #endif
         #else
             #error Unknown or unsupported architecture
         #endif
diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h
index f241751..d78d846 100644
--- a/runtime/src/kmp_ftn_os.h
+++ b/runtime/src/kmp_ftn_os.h
@@ -478,7 +478,7 @@
 //#define KMP_API_NAME_GOMP_TARGET_UPDATE
GOMP_target_update
 #define KMP_API_NAME_GOMP_TEAMS                          GOMP_teams

-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK && !KMP_ARCH_PPC64
     #define xstr(x) str(x)
     #define str(x) #x

diff --git a/runtime/src/kmp_global.c b/runtime/src/kmp_global.c
index aa1f8e3..d3c3195 100644
--- a/runtime/src/kmp_global.c
+++ b/runtime/src/kmp_global.c
@@ -321,7 +321,11 @@ int        __kmp_env_consistency_check  = FALSE;  /*
KMP_CONSISTENCY_CHECK speci
 kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
 kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
 kmp_uint32 __kmp_yielding_on = 1;
+#if KMP_OS_CNK
+kmp_uint32 __kmp_yield_cycle = 0;
+#else
 kmp_uint32 __kmp_yield_cycle = 1;     /* Yield-cycle is on by default */
+#endif
 kmp_int32  __kmp_yield_on_count = 10; /* By default, yielding is on for 10
monitor periods. */
 kmp_int32  __kmp_yield_off_count = 1; /* By default, yielding is off for 1
monitor periods. */
 /* ----------------------------------------------------- */
diff --git a/runtime/src/kmp_gsupport.c b/runtime/src/kmp_gsupport.c
index 9d8e553..aa52024 100644
--- a/runtime/src/kmp_gsupport.c
+++ b/runtime/src/kmp_gsupport.c
@@ -15,7 +15,7 @@

//===----------------------------------------------------------------------===//


-#if defined(__x86_64)
+#if defined(__x86_64) || defined (__powerpc64__)
 # define KMP_I8
 #endif
 #include "kmp.h"
diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h
index 8009d18..c5ce838 100644
--- a/runtime/src/kmp_lock.h
+++ b/runtime/src/kmp_lock.h
@@ -518,7 +518,7 @@ __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t
*lck )
 // Internal RTL locks are also implemented as ticket locks, for now.
 //
 // FIXME - We should go through and figure out which lock kind works best
for
-// each internal lock, and use the type deeclaration and function calls
for
+// each internal lock, and use the type declaration and function calls for
 // that explicit lock kind (and get rid of this section).
 //

diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
index bb5e72f..db1981e 100644
--- a/runtime/src/kmp_os.h
+++ b/runtime/src/kmp_os.h
@@ -66,10 +66,12 @@
 #define KMP_OS_FREEBSD  0
 #define KMP_OS_DARWIN   0
 #define KMP_OS_WINDOWS    0
+#define KMP_OS_CNK      0
 #define KMP_OS_UNIX     0  /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN
etc. */

 #define KMP_ARCH_X86        0
 #define KMP_ARCH_X86_64	    0
+#define KMP_ARCH_PPC64      0

 #ifdef _WIN32
 # undef KMP_OS_WINDOWS
@@ -91,6 +93,11 @@
 # define KMP_OS_FREEBSD 1
 #endif

+#if ( defined __bgq__ )
+# undef KMP_OS_CNK
+# define KMP_OS_CNK 1
+#endif
+
 #if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS)
 # error Unknown OS
 #endif
@@ -117,6 +124,9 @@
 # elif defined __i386
 #  undef KMP_ARCH_X86
 #  define KMP_ARCH_X86 1
+# elif defined __powerpc64__
+#  undef KMP_ARCH_PPC64
+#  define KMP_ARCH_PPC64 1
 # endif
 #endif

@@ -156,7 +166,7 @@
 # define KMP_ARCH_ARM 1
 #endif

-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM)
+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64)
 # error Unknown or unsupported architecture
 #endif

@@ -232,7 +242,7 @@

 #if KMP_ARCH_X86 || KMP_ARCH_ARM
 # define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
-#elif KMP_ARCH_X86_64
+#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64
 # define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
 #else
 # error "Can't determine size_t printf format specifier."
@@ -657,6 +667,10 @@ extern kmp_real64 __kmp_test_then_add_real64
( volatile kmp_real64 *p, kmp_real6
 # endif
 #endif /* KMP_OS_WINDOWS */

+#if KMP_ARCH_PPC64
+# define KMP_MB()       __sync_synchronize()
+#endif
+
 #ifndef KMP_MB
 # define KMP_MB()       /* nothing to do */
 #endif
@@ -763,7 +777,7 @@ typedef void    (*microtask_t)( int *gtid, int
*npr, ... );
 #endif /* KMP_I8 */

 /* Workaround for Intel(R) 64 code gen bug when taking address of static
array (Intel(R) 64 Tracker #138) */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX
 # define STATIC_EFI2_WORKAROUND
 #else
 # define STATIC_EFI2_WORKAROUND static
diff --git a/runtime/src/kmp_runtime.c b/runtime/src/kmp_runtime.c
index fea41d0..d243700 100644
--- a/runtime/src/kmp_runtime.c
+++ b/runtime/src/kmp_runtime.c
@@ -8450,7 +8450,7 @@ __kmp_determine_reduction_method( ident_t *loc,
kmp_int32 global_tid,
         int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
         int tree_available   = FAST_REDUCTION_TREE_METHOD_GENERATED;

-        #if KMP_ARCH_X86_64
+        #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64

             #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS ||
KMP_OS_DARWIN
                 #if KMP_MIC
diff --git a/runtime/src/kmp_settings.c b/runtime/src/kmp_settings.c
index 54745cb..b85678e 100644
--- a/runtime/src/kmp_settings.c
+++ b/runtime/src/kmp_settings.c
@@ -536,6 +536,7 @@ __kmp_stg_parse_file(

 static char * par_range_to_print = NULL;

+#ifdef KMP_DEBUG
 static void
 __kmp_stg_parse_par_range(
     char const * name,
@@ -614,7 +615,7 @@ __kmp_stg_parse_par_range(
         break;
     }
 } // __kmp_stg_parse_par_range
-
+#endif

 int
 __kmp_initial_threads_capacity( int req_nproc )
diff --git a/runtime/src/kmp_version.c b/runtime/src/kmp_version.c
index f64d052..5dc82d1 100644
--- a/runtime/src/kmp_version.c
+++ b/runtime/src/kmp_version.c
@@ -20,7 +20,7 @@
 #include "kmp_version.h"

 // Replace with snapshot date YYYYMMDD for promotion build.
-#define KMP_VERSION_BUILD    00000000
+//#define KMP_VERSION_BUILD    00000000

 // Helper macros to convert value of macro to string literal.
 #define _stringer( x ) #x
diff --git a/runtime/src/makefile.mk b/runtime/src/makefile.mk
index 9be8799..84d6ac2 100644
--- a/runtime/src/makefile.mk
+++ b/runtime/src/makefile.mk
@@ -310,6 +310,9 @@ endif
 ifeq "$(CPLUSPLUS)" "on"
     ifeq "$(os)" "win"
         c-flags   += -TP
+    else ifeq "$(arch)" "ppc64"
+    # c++0x on ppc64 linux removes definition of preproc. macros, needed
in .hs
+    	c-flags   += -x c++ -std=gnu++0x
     else
         ifneq "$(filter gcc clang,$(c))" ""
             c-flags   += -x c++ -std=c++0x
@@ -370,7 +373,7 @@ ifeq "$(os)" "lin"
             ld-flags-extra += -lirc_pic
             endif
         endif
-        ifeq "$(filter 32 32e 64,$(arch))" ""
+        ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
             ld-flags-extra += $(shell pkg-config --libs libffi)
         endif
     else
@@ -581,9 +584,12 @@ ifneq "$(os)" "win"
     ifeq "$(arch)" "arm"
         z_Linux_asm$(obj) : \
 		    cpp-flags += -D KMP_ARCH_ARM
-    else
+    else ifeq "$(arch)" "ppc64"
+        z_Linux_asm$(obj) : \
+		    cpp-flags += -D KMP_ARCH_PPC64
+	else
         z_Linux_asm$(obj) : \
-            cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+       	    cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)

     endif
 endif

@@ -729,7 +735,9 @@ endif
         else # 5
             lib_c_items += kmp_gsupport
         endif
+#        ifneq "$(arch)" "ppc64"
         lib_asm_items += z_Linux_asm
+#	    endif
     endif
 endif

@@ -1391,9 +1399,13 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))" ""
             td_exp += libc.so.6
             td_exp += ld-linux-armhf.so.3
         endif
+        ifeq "$(arch)" "ppc64"
+            td_exp += libc.so.6
+            td_exp += ld64.so.1
+        endif
         td_exp += libdl.so.2
         td_exp += libgcc_s.so.1
-        ifeq "$(filter 32 32e 64,$(arch))" ""
+        ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
             td_exp += libffi.so.6
             td_exp += libffi.so.5
         endif
diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index 40c8614..9e7b36b 100644
--- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -132,6 +132,11 @@
 #  define ITT_ARCH_ARM  4
 #endif /* ITT_ARCH_ARM */

+#ifndef ITT_ARCH_PPC64
+#  define ITT_ARCH_PPC64  5
+#endif /* ITT_ARCH_PPC64 */
+
+
 #ifndef ITT_ARCH
 #  if defined _M_IX86 || defined __i386__
 #    define ITT_ARCH ITT_ARCH_IA32
@@ -141,6 +146,8 @@
 #    define ITT_ARCH ITT_ARCH_IA64
 #  elif defined _M_ARM || __arm__
 #    define ITT_ARCH ITT_ARCH_ARM
+#  elif defined __powerpc64__
+#    define ITT_ARCH ITT_ARCH_PPC64
 #  endif
 #endif

@@ -274,7 +281,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void*
ptr, long addend)
                           : "memory");
     return result;
 }
-#elif ITT_ARCH==ITT_ARCH_ARM
+#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64
 #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
 #endif /* ITT_ARCH==ITT_ARCH_IA64 */
 #ifndef ITT_SIMPLE_INIT
diff --git a/runtime/src/z_Linux_asm.s b/runtime/src/z_Linux_asm.s
index 1f1ba1b..64c8052 100644
--- a/runtime/src/z_Linux_asm.s
+++ b/runtime/src/z_Linux_asm.s
@@ -138,7 +138,7 @@ __kmp_unnamed_critical_addr:
 #endif /* KMP_GOMP_COMPAT */


-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 && !KMP_ARCH_PPC64

 // -----------------------------------------------------------------------
 // microtasking routines specifically written for IA-32 architecture
@@ -1585,6 +1585,16 @@ __kmp_unnamed_critical_addr:
     .size __kmp_unnamed_critical_addr,4
 #endif /* KMP_ARCH_ARM */

+#if KMP_ARCH_PPC64
+    .data
+    .comm .gomp_critical_user_,32,8
+    .data
+    .align 8
+    .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+    .8byte .gomp_critical_user_
+    .size __kmp_unnamed_critical_addr,8
+#endif /* KMP_ARCH_PPC64 */

 #if defined(__linux__)
 .section .note.GNU-stack,"", at progbits
diff --git a/runtime/src/z_Linux_util.c b/runtime/src/z_Linux_util.c
index 7633f99..348f5d8 100644
--- a/runtime/src/z_Linux_util.c
+++ b/runtime/src/z_Linux_util.c
@@ -32,7 +32,7 @@
 #include <sys/resource.h>
 #include <sys/syscall.h>

-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK
 # include <sys/sysinfo.h>
 # if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 // We should really include <futex.h>, but that causes compatibility
problems on different
@@ -61,7 +61,7 @@
 #include <fcntl.h>

 // For non-x86 architecture
-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
KMP_ARCH_PPC64)
 # include <stdbool.h>
 # include <ffi.h>
 #endif
@@ -110,7 +110,7 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t
*cond )
 /*
------------------------------------------------------------------------ */
 /*
------------------------------------------------------------------------ */

-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)

 /*
  * Affinity support
@@ -147,6 +147,19 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t
*cond )
 #    error Wrong code for getaffinity system call.
 #   endif /* __NR_sched_getaffinity */

+#  elif KMP_ARCH_PPC64
+#   ifndef __NR_sched_setaffinity
+#    define __NR_sched_setaffinity  222
+#   elif __NR_sched_setaffinity != 222
+#    error Wrong code for setaffinity system call.
+#   endif /* __NR_sched_setaffinity */
+#   ifndef __NR_sched_getaffinity
+#    define __NR_sched_getaffinity  223
+#   elif __NR_sched_getaffinity != 223
+#    error Wrong code for getaffinity system call.
+#   endif /* __NR_sched_getaffinity */
+
+
 #  else
 #   error Unknown or unsupported architecture

@@ -445,7 +458,7 @@ __kmp_change_thread_affinity_mask( int gtid,
kmp_affin_mask_t *new_mask,
 /*
------------------------------------------------------------------------ */
 /*
------------------------------------------------------------------------ */

-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
&& !KMP_OS_CNK

 int
 __kmp_futex_determine_capable()
@@ -462,7 +475,7 @@ __kmp_futex_determine_capable()
     return retval;
 }

-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
KMP_ARCH_ARM)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
KMP_ARCH_ARM) && !KMP_OS_CNK

 /*
------------------------------------------------------------------------ */
 /*
------------------------------------------------------------------------ */
@@ -481,7 +494,7 @@ __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32
d )
     old_value = TCR_4( *p );
     new_value = old_value | d;

-    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
     {
         KMP_CPU_PAUSE();
         old_value = TCR_4( *p );
@@ -498,7 +511,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32
d )
     old_value = TCR_4( *p );
     new_value = old_value & d;

-    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
     {
         KMP_CPU_PAUSE();
         old_value = TCR_4( *p );
@@ -507,7 +520,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32
d )
     return old_value;
 }

-# if KMP_ARCH_X86
+# if KMP_ARCH_X86 || KMP_ARCH_PPC64
 kmp_int64
 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
 {
@@ -516,7 +529,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64
d )
     old_value = TCR_8( *p );
     new_value = old_value + d;

-    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
     {
         KMP_CPU_PAUSE();
         old_value = TCR_8( *p );
@@ -533,7 +546,7 @@ __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64
d )

     old_value = TCR_8( *p );
     new_value = old_value | d;
-    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
     {
         KMP_CPU_PAUSE();
         old_value = TCR_8( *p );
@@ -549,7 +562,7 @@ __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64
d )

     old_value = TCR_8( *p );
     new_value = old_value & d;
-    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
     {
         KMP_CPU_PAUSE();
         old_value = TCR_8( *p );
@@ -2527,7 +2540,7 @@ __kmp_get_load_balance( int max )
 #endif // USE_LOAD_BALANCE


-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
KMP_ARCH_PPC64)

 int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
         void *p_argv[] )
@@ -2561,7 +2574,89 @@ int __kmp_invoke_microtask( microtask_t pkfn, int
gtid, int tid, int argc,
     return 1;
 }

-#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
KMP_ARCH_PPC64)
+
+#if KMP_ARCH_PPC64
+
+// we really only need the case with 1 argument, because CLANG always
build
+// a struct of pointers to shared variables referenced in the outlined
function
+int
+__kmp_invoke_microtask( microtask_t pkfn,
+                        int gtid, int tid,
+                        int argc, void *p_argv[] ) {
+  switch (argc) {
+  default:
+    fprintf(stderr, "Too many args to microtask: %d!\n", argc);
+    fflush(stderr);
+    exit(-1);
+  case 0:
+    (*pkfn)(&gtid, &tid);
+    break;
+  case 1:
+    (*pkfn)(&gtid, &tid, p_argv[0]);
+    break;
+  case 2:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
+    break;
+  case 3:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
+    break;
+  case 4:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
+    break;
+  case 5:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4]);
+    break;
+  case 6:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5]);
+    break;
+  case 7:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6]);
+    break;
+  case 8:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7]);
+    break;
+  case 9:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
+    break;
+  case 10:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
+    break;
+  case 11:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv
[10]);
+    break;
+  case 12:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv
[10],
+            p_argv[11]);
+    break;
+  case 13:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv
[10],
+            p_argv[11], p_argv[12]);
+    break;
+  case 14:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv
[10],
+            p_argv[11], p_argv[12], p_argv[13]);
+    break;
+  case 15:
+    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
p_argv[4],
+            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv
[10],
+            p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
+    break;
+  }
+
+  return 1;
+}
+
+#endif

 // end of file //

diff --git a/runtime/tools/lib/Platform.pm b/runtime/tools/lib/Platform.pm
index 5b399f7..763f3cb 100644
--- a/runtime/tools/lib/Platform.pm
+++ b/runtime/tools/lib/Platform.pm
@@ -50,6 +50,8 @@ sub canon_arch($) {
             $arch = "32e";
         } elsif ( $arch =~ m{\Aarm(?:v7\D*)?\z} ) {
             $arch = "arm";
+        } elsif ( $arch =~ m{\Appc64} ) {
+        	$arch = "ppc64";
         } else {
             $arch = undef;
         }; # if
@@ -62,6 +64,7 @@ sub canon_arch($) {
         "32"  => "IA-32 architecture",
         "32e" => "Intel(R) 64",
         "arm" => "ARM",
+        "ppc64" => "PPC64",
     );

     sub legal_arch($) {
@@ -159,6 +162,8 @@ sub target_options() {
         $_host_arch = "32e";
     } elsif ( $hardware_platform eq "arm" ) {
         $_host_arch = "arm";
+    } elsif ( $hardware_platform eq "ppc64" ) {
+        $_host_arch = "ppc64";
     } else {
         die "Unsupported host hardware platform: \"$hardware_platform\";
stopped";
     }; # if
diff --git a/runtime/tools/lib/Uname.pm b/runtime/tools/lib/Uname.pm
index e212501..52518b4 100644
--- a/runtime/tools/lib/Uname.pm
+++ b/runtime/tools/lib/Uname.pm
@@ -147,6 +147,8 @@ if ( 0 ) {
         $values{ hardware_platform } = "x86_64";
     } elsif ( $values{ machine } =~ m{\Aarmv7\D*\z} ) {
         $values{ hardware_platform } = "arm";
+    } elsif ( $values{ machine } =~ m{\Appc64\z} ) {
+        $values{ hardware_platform } = "ppc64";
     } else {
         die "Unsupported machine (\"$values{ machine }\") returned by
POSIX::uname(); stopped";
     }; # if
diff --git a/runtime/tools/src/common-defs.mk
b/runtime/tools/src/common-defs.mk
index ebd1922..7eb64b0 100644
--- a/runtime/tools/src/common-defs.mk
+++ b/runtime/tools/src/common-defs.mk
@@ -45,7 +45,7 @@ endif
 # Description:
 #     The function return printable name of specified architecture, IA-32
architecture or Intel(R) 64.
 #
-legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R)
64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(error Bad
architecture specified: $(1))))))
+legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R)
64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(if $(filter
ppc64,$(1)),PPC64,$(error Bad architecture specified: $(1)))))))

 # Synopsis:
 #     var_name = $(call check_variable,var,list)
@@ -128,9 +128,9 @@ endif
 #
--------------------------------------------------------------------------------------------------

 os       := $(call check_variable,os,lin lrb mac win)
-arch     := $(call check_variable,arch,32 32e 64 arm)
+arch     := $(call check_variable,arch,32 32e 64 arm ppc64)
 platform := $(os)_$(arch)
-platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm
lrb_32e mac_32 mac_32e win_32 win_32e win_64)
+platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm
lrb_32e mac_32 mac_32e win_32 win_32e win_64 lin_ppc64)
 # oa-opts means "os and arch options". They are passed to almost all perl
scripts.
 oa-opts  := --os=$(os) --arch=$(arch)

diff --git a/runtime/tools/src/common-tools.mk
b/runtime/tools/src/common-tools.mk
index a9c9fbc..8c86791 100644
--- a/runtime/tools/src/common-tools.mk
+++ b/runtime/tools/src/common-tools.mk
@@ -34,7 +34,7 @@
 # "No rule to build .\kmp_i18n.inc". Using "./" solves the problem.
 cpp-flags += -I ./
 # For non-x86 architecture
-ifeq "$(filter 32 32e 64,$(arch))" ""
+ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
     cpp-flags += $(shell pkg-config --cflags libffi)
 endif
 # Add all VPATH directories to path for searching include files.




> ------------------------------
>
> Message: 2
> Date: Thu, 17 Jul 2014 13:43:56 -0400
> From: Carlo Bertolli <cbertol at us.ibm.com>
> To: openmp-commits at dcs-maillist2.engr.illinois.edu
> Cc: Michael Wong <michaelw at ca.ibm.com>, "Peyton,   Jonathan L"
>    <jonathan.l.peyton at intel.com>,   Samuel F Antao <sfantao at us.ibm.com>
> Subject: [Openmp-commits] Add support for BGQ and PPC64
> Message-ID:
>    <OFB10D84DF.82E6C374-ON85257D18.0060BAA2-85257D18.006166FF at us.ibm.com>
> Content-Type: text/plain; charset="us-ascii"
>
>
>
> Hi,
>
> Attached a patch built on top of Hal Finkel's BGQ patch for PPC64 (thanks
> Hal).
> In this patch, I extended the existing cmake system while waiting for
> patches from Intel, which I will fix once they get in.
>
> -- Carlo
>
>
>
> diff --git a/runtime/Makefile.bgq b/runtime/Makefile.bgq
> new file mode 100644
> index 0000000..d9aa914
> --- /dev/null
> +++ b/runtime/Makefile.bgq
> @@ -0,0 +1,76 @@
> +date := $(shell date '+%Y%m%d')
> +
> +FEATURE_FLAGS = -DOMP_40_ENABLED=1 -DOMP_30_ENABLED=1
-DOMP_VERSION=201107
> -DKMP_VERSION_MAJOR=5 \
> +      -DCACHE_LINE=64 -DKMP_ADJUST_BLOCKTIME=1 -DBUILD_I8
> -DKMP_USE_ADAPTIVE_LOCKS=0 \
> +      -DKMP_DEBUG_ADAPTIVE_LOCKS=0 -DINTEL_NO_ITTNOTIFY_API
> -DKMP_VERSION_MINOR=0 -DKMP_VERSION_BUILD=0 \
> +      -DKMP_LIBRARY_FILE=\"libiomp5.so\" -D_KMP_BUILD_TIME="\"$(date)\""
> -DKMP_GOMP_COMPAT
> +
> +CPPFLAGS = ${FEATURE_FLAGS} -D__float128='long double'
> +
> +CC = powerpc64-bgq-linux-gcc
> +CXX = powerpc64-bgq-linux-g++
> +
> +all: build/libiomp5.a build/libiomp5.so
> +
> +build/.dir:
> +   mkdir -p build
> +   touch build/.dir
> +
> +build/omp.h:
> +   perl tools/expand-vars.pl --strict $$(echo $(FEATURE_FLAGS) | sed
> 's/-D/-D /g') \
> +     -D KMP_BUILD_DATE=$(date) -D Revision=Revision -D Date=Date \
> +     src/include/40/omp.h.var build/omp.h
> +
> +build/%.o: src/%.cpp build/kmp_i18n_id.inc build/kmp_i18n_default.inc
> build/omp.h
> +   ${CXX} -c ${CPPFLAGS} -g -O3 -Isrc -Ibuild -o $@ $<
> +
> +build/%.o: src/%.c build/kmp_i18n_id.inc build/kmp_i18n_default.inc
> build/omp.h
> +   ${CC} -x c++ -c ${CPPFLAGS} -g -O3 -Isrc -Ibuild -o $@ $<
> +
> +OBJS =   build/kmp_alloc.o \
> +   build/kmp_atomic.o \
> +   build/kmp_cancel.o \
> +   build/kmp_csupport.o \
> +   build/kmp_dispatch.o \
> +   build/kmp_debug.o \
> +   build/kmp_environment.o \
> +   build/kmp_error.o \
> +   build/kmp_ftn_cdecl.o \
> +   build/kmp_ftn_extra.o \
> +   build/kmp_ftn_stdcall.o \
> +   build/kmp_global.o \
> +   build/kmp_i18n.o \
> +   build/kmp_io.o \
> +   build/kmp_itt.o \
> +   build/kmp_runtime.o \
> +   build/kmp_settings.o \
> +   build/kmp_sched.o \
> +   build/kmp_str.o \
> +   build/kmp_tasking.o \
> +   build/kmp_taskq.o \
> +   build/kmp_taskdeps.o \
> +   build/kmp_threadprivate.o \
> +   build/kmp_utility.o \
> +   build/kmp_version.o \
> +   build/kmp_lock.o \
> +   build/z_Linux_util.o
> +
> +BGSYS_FLOOR=$(shell readlink /bgsys/drivers/ppcfloor)
> +build/libiomp5.so: $(OBJS)
> +   ${CXX} -Wl,--build-id -Wl,-rpath -Wl,$
> {BGSYS_FLOOR}/gnu-linux/powerpc64-bgq-linux/lib -shared -o $@.1.0 $^
> -Wl,-soname,$(shell basename $@.1) -lpthread
> +   (cd $(shell dirname $@) && ln -sf $(shell basename $@.1.0) $(shell
> basename $@.1))
> +   (cd $(shell dirname $@) && ln -sf $(shell basename $@.1) $(shell
> basename $@))
> +# Note: We should not need the --build-id when we switch to clang (it
> should add it for us).
> +
> +build/libiomp5.a: $(OBJS)
> +   ar cr $@ $^
> +
> +build/kmp_i18n_id.inc: src/i18n/en_US.txt tools/message-converter.pl
> build/.dir
> +   perl tools/message-converter.pl --prefix=kmp_i18n --enum=$@ $<
> +
> +build/kmp_i18n_default.inc: src/i18n/en_US.txt
tools/message-converter.pl
> build/.dir
> +   perl tools/message-converter.pl --prefix=kmp_i18n --default=$@ $<
> +
> +clean:
> +   rm -rf build
> +
> diff --git a/runtime/src/CMakeLists.txt b/runtime/src/CMakeLists.txt
> index dd29966..4814cb4 100644
> --- a/runtime/src/CMakeLists.txt
> +++ b/runtime/src/CMakeLists.txt
> @@ -12,6 +12,9 @@ include_directories(
>    ${CMAKE_CURRENT_BINARY_DIR}
>  )
>
> +# detect architecture
> +EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE
> ARCH )
> +
>  if(WIN32)
>    set(OS_GEN "win")
>  elseif(APPLE)
> @@ -22,12 +25,14 @@ else()
>    message(FATAL_ERROR "Unsupported OS")
>  endif()
>
> -if("${ARCH}" STREQUAL "")
> +if(NOT "${ARCH}" STREQUAL "ppc64")
>    set(ARCH "32e")
> +  set(ARCH_STR "Intel(R) 64")
> +else()
> +# arch already set above
> +  set(ARCH_STR "PPC64")
>  endif()
>
> -set(ARCH_STR "Intel(R) 64")
> -
>  set(FEATURE_FLAGS "-D USE_ITT_BUILD")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D NDEBUG")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ARCH_STR=\"\\\"$
> {ARCH_STR}\\\"\"")
> @@ -50,8 +55,13 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D
> USE_LOAD_BALANCE")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_CBLKDATA")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D GUIDEDLL_EXPORTS")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_GOMP_COMPAT")
> -set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
> -set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
> +
> +#adaptive locks use x86 assembly - disable for ppc64
> +if(NOT "${ARCH}" STREQUAL "ppc64")
> +   set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
> +   set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
> +endif()
> +
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_50_ENABLED=0")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_41_ENABLED=0")
>  set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_40_ENABLED=1")
> @@ -122,16 +132,35 @@ add_custom_command(
>    OUTPUT omp.h
>      COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/../tools/expand-vars.pl
> --strict -D Revision=\"\\$$Revision\" -D Date=\"\\$$Date\" -D KMP_TYPE=
> \"Performance\" -D KMP_ARCH=\"\\\"${ARCH_STR}\\\"\" -D KMP_VERSION_MAJOR=
$
> {VERSION} -D KMP_VERSION_MINOR=0 -D KMP_VERSION_BUILD=00000000 -D
> KMP_BUILD_DATE=\"${BUILD_TIME} UTC\" -D KMP_TARGET_COMPILER=12 -D
> KMP_DIAG=0 -D KMP_DEBUG_INFO=0 -D OMP_VERSION=${OMP_VERSION} $
> {CMAKE_CURRENT_SOURCE_DIR}/include/${OMP_VERSION_NUM}/omp.h.var omp.h
>  )
> -add_custom_command(
> -  OUTPUT z_Linux_asm.o
> -    COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS
> -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp $
> {CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
> -)
> +
> +if(NOT "${ARCH}" STREQUAL "ppc64")
> +   add_custom_command(
> +        OUTPUT z_Linux_asm.o
> +          COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D
> KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x
assembler-with-cpp
> ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
> +   )
> +else()
> +   add_custom_command(
> +        OUTPUT z_Linux_asm.o
> +          COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D
> KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_PPC64 -x
assembler-with-cpp
> ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
> +   )
> +
> +endif()
> +
>
>  add_custom_target(gen_kmp_i18n DEPENDS kmp_i18n_id.inc
> kmp_i18n_default.inc omp.h z_Linux_asm.o)
>
>  if(NOT APPLE)
> -  set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=$
> {CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt")
> +   set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=$
> {CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt -ldl")
>  endif()
>
>  add_library(iomp5 SHARED ${SOURCES} z_Linux_asm.o)
> +
> +# This is a workaround to a known ppc64 issue about libpthread. For more
> +# information see
> +#
> http://ryanarn.blogspot.com/2011/07/curious-case-of-pthreadatfork-on.html
> +if("${ARCH}" STREQUAL "ppc64")
> +   find_library(PTHREAD NAMES pthread)
> +   target_link_libraries(iomp5 ${PTHREAD})
> +endif()
> +
>  add_dependencies(iomp5 gen_kmp_i18n)
> diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
> index a8c600b..f5dd10f 100644
> --- a/runtime/src/kmp.h
> +++ b/runtime/src/kmp.h
> @@ -459,9 +459,9 @@ typedef int PACKED_REDUCTION_METHOD_T;
>  /*
>   * Only Linux* OS and Windows* OS support thread affinity.
>   */
> -#if KMP_OS_LINUX || KMP_OS_WINDOWS
> +#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64
>  # define KMP_AFFINITY_SUPPORTED 1
> -#elif KMP_OS_DARWIN || KMP_OS_FREEBSD
> +#elif KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_CNK || KMP_ARCH_PPC64
>  // affinity not supported
>  # define KMP_AFFINITY_SUPPORTED 0
>  #else
> @@ -476,7 +476,7 @@ extern size_t __kmp_affin_mask_size;
>
>  # if KMP_OS_LINUX
>  //
> -// On Linux* OS, the mask isactually a vector of length
> __kmp_affin_mask_size
> +// On Linux* OS, the mask is actually a vector of length
> __kmp_affin_mask_size
>  // (in bytes).  It should be allocated on a word boundary.
>  //
>  // WARNING!!!  We have made the base type of the affinity mask unsigned
> char,
> @@ -946,6 +946,9 @@ extern unsigned int __kmp_place_core_offset;
>  #if KMP_OS_WINDOWS
>  #  define KMP_INIT_WAIT    64U          /* initial number of spin-tests
> */
>  #  define KMP_NEXT_WAIT    32U          /* susequent number of
spin-tests
> */
> +#elif KMP_OS_CNK
> +#  define KMP_INIT_WAIT    16U          /* initial number of spin-tests
> */
> +#  define KMP_NEXT_WAIT     8U          /* susequent number of
spin-tests
> */
>  #elif KMP_OS_LINUX
>  #  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests
> */
>  #  define KMP_NEXT_WAIT   512U          /* susequent number of
spin-tests
> */
> @@ -971,6 +974,11 @@ extern void __kmp_x86_cpuid( int mode, int mode2,
> struct kmp_cpuid *p );
>    extern void __kmp_x86_pause( void );
>  # endif
>  # define KMP_CPU_PAUSE()        __kmp_x86_pause()
> +#elif KMP_ARCH_PPC64
> +# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
> +# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
> +# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
> +# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED();
> KMP_PPC64_PRI_LOC_MB(); } while (0)
>  #else
>  # define KMP_CPU_PAUSE()        /* nothing to do */
>  #endif
> diff --git a/runtime/src/kmp_csupport.c b/runtime/src/kmp_csupport.c
> index 18b6c35..0b7d3ed 100644
> --- a/runtime/src/kmp_csupport.c
> +++ b/runtime/src/kmp_csupport.c
> @@ -837,6 +837,19 @@ __kmpc_flush(ident_t *loc, ...)
>              #endif // KMP_MIC
>          #elif KMP_ARCH_ARM
>              // Nothing yet
> +   #elif KMP_ARCH_PPC64
> +            // Nothing needed here (we have a real MB above).
> +            #if KMP_OS_CNK
> +      // The flushing thread needs to yield here; this prevents a
> +      // busy-waiting thread from saturating the pipeline. flush is
> +      // often used in loops like this:
> +                // while (!flag) {
> +                //   #pragma omp flush(flag)
> +                // }
> +      // and adding the yield here is good for at least a 10x speedup
> +      // when running >2 threads per core (on the NAS LU benchmark).
> +                __kmp_yield(TRUE);
> +            #endif
>          #else
>              #error Unknown or unsupported architecture
>          #endif
> diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h
> index f241751..d78d846 100644
> --- a/runtime/src/kmp_ftn_os.h
> +++ b/runtime/src/kmp_ftn_os.h
> @@ -478,7 +478,7 @@
>  //#define KMP_API_NAME_GOMP_TARGET_UPDATE
> GOMP_target_update
>  #define KMP_API_NAME_GOMP_TEAMS                          GOMP_teams
>
> -#if KMP_OS_LINUX
> +#if KMP_OS_LINUX && !KMP_OS_CNK && !KMP_ARCH_PPC64
>      #define xstr(x) str(x)
>      #define str(x) #x
>
> diff --git a/runtime/src/kmp_global.c b/runtime/src/kmp_global.c
> index aa1f8e3..d3c3195 100644
> --- a/runtime/src/kmp_global.c
> +++ b/runtime/src/kmp_global.c
> @@ -321,7 +321,11 @@ int        __kmp_env_consistency_check  = FALSE;  /*
> KMP_CONSISTENCY_CHECK speci
>  kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
>  kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
>  kmp_uint32 __kmp_yielding_on = 1;
> +#if KMP_OS_CNK
> +kmp_uint32 __kmp_yield_cycle = 0;
> +#else
>  kmp_uint32 __kmp_yield_cycle = 1;     /* Yield-cycle is on by default */
> +#endif
>  kmp_int32  __kmp_yield_on_count = 10; /* By default, yielding is on for
10
> monitor periods. */
>  kmp_int32  __kmp_yield_off_count = 1; /* By default, yielding is off for
1
> monitor periods. */
>  /* ----------------------------------------------------- */
> diff --git a/runtime/src/kmp_gsupport.c b/runtime/src/kmp_gsupport.c
> index 9d8e553..aa52024 100644
> --- a/runtime/src/kmp_gsupport.c
> +++ b/runtime/src/kmp_gsupport.c
> @@ -15,7 +15,7 @@
>
> //
>
===----------------------------------------------------------------------===//

>
>
> -#if defined(__x86_64)
> +#if defined(__x86_64) || defined (__powerpc64__)
>  # define KMP_I8
>  #endif
>  #include "kmp.h"
> diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h
> index 8009d18..c5ce838 100644
> --- a/runtime/src/kmp_lock.h
> +++ b/runtime/src/kmp_lock.h
> @@ -518,7 +518,7 @@ __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t
> *lck )
>  // Internal RTL locks are also implemented as ticket locks, for now.
>  //
>  // FIXME - We should go through and figure out which lock kind works
best
> for
> -// each internal lock, and use the type deeclaration and function calls
> for
> +// each internal lock, and use the type declaration and function calls
for
>  // that explicit lock kind (and get rid of this section).
>  //
>
> diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
> index bb5e72f..db1981e 100644
> --- a/runtime/src/kmp_os.h
> +++ b/runtime/src/kmp_os.h
> @@ -66,10 +66,12 @@
>  #define KMP_OS_FREEBSD  0
>  #define KMP_OS_DARWIN   0
>  #define KMP_OS_WINDOWS    0
> +#define KMP_OS_CNK      0
>  #define KMP_OS_UNIX     0  /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN
> etc. */
>
>  #define KMP_ARCH_X86        0
>  #define KMP_ARCH_X86_64       0
> +#define KMP_ARCH_PPC64      0
>
>  #ifdef _WIN32
>  # undef KMP_OS_WINDOWS
> @@ -91,6 +93,11 @@
>  # define KMP_OS_FREEBSD 1
>  #endif
>
> +#if ( defined __bgq__ )
> +# undef KMP_OS_CNK
> +# define KMP_OS_CNK 1
> +#endif
> +
>  #if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_DARWIN +
KMP_OS_WINDOWS)
>  # error Unknown OS
>  #endif
> @@ -117,6 +124,9 @@
>  # elif defined __i386
>  #  undef KMP_ARCH_X86
>  #  define KMP_ARCH_X86 1
> +# elif defined __powerpc64__
> +#  undef KMP_ARCH_PPC64
> +#  define KMP_ARCH_PPC64 1
>  # endif
>  #endif
>
> @@ -156,7 +166,7 @@
>  # define KMP_ARCH_ARM 1
>  #endif
>
> -#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM)
> +#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM +
KMP_ARCH_PPC64)
>  # error Unknown or unsupported architecture
>  #endif
>
> @@ -232,7 +242,7 @@
>
>  #if KMP_ARCH_X86 || KMP_ARCH_ARM
>  # define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
> -#elif KMP_ARCH_X86_64
> +#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64
>  # define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
>  #else
>  # error "Can't determine size_t printf format specifier."
> @@ -657,6 +667,10 @@ extern kmp_real64 __kmp_test_then_add_real64
> ( volatile kmp_real64 *p, kmp_real6
>  # endif
>  #endif /* KMP_OS_WINDOWS */
>
> +#if KMP_ARCH_PPC64
> +# define KMP_MB()       __sync_synchronize()
> +#endif
> +
>  #ifndef KMP_MB
>  # define KMP_MB()       /* nothing to do */
>  #endif
> @@ -763,7 +777,7 @@ typedef void    (*microtask_t)( int *gtid, int
> *npr, ... );
>  #endif /* KMP_I8 */
>
>  /* Workaround for Intel(R) 64 code gen bug when taking address of static
> array (Intel(R) 64 Tracker #138) */
> -#if KMP_ARCH_X86_64 && KMP_OS_LINUX
> +#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX
>  # define STATIC_EFI2_WORKAROUND
>  #else
>  # define STATIC_EFI2_WORKAROUND static
> diff --git a/runtime/src/kmp_runtime.c b/runtime/src/kmp_runtime.c
> index fea41d0..d243700 100644
> --- a/runtime/src/kmp_runtime.c
> +++ b/runtime/src/kmp_runtime.c
> @@ -8450,7 +8450,7 @@ __kmp_determine_reduction_method( ident_t *loc,
> kmp_int32 global_tid,
>          int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
>          int tree_available   = FAST_REDUCTION_TREE_METHOD_GENERATED;
>
> -        #if KMP_ARCH_X86_64
> +        #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64
>
>              #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS ||
> KMP_OS_DARWIN
>                  #if KMP_MIC
> diff --git a/runtime/src/kmp_settings.c b/runtime/src/kmp_settings.c
> index 54745cb..b85678e 100644
> --- a/runtime/src/kmp_settings.c
> +++ b/runtime/src/kmp_settings.c
> @@ -536,6 +536,7 @@ __kmp_stg_parse_file(
>
>  static char * par_range_to_print = NULL;
>
> +#ifdef KMP_DEBUG
>  static void
>  __kmp_stg_parse_par_range(
>      char const * name,
> @@ -614,7 +615,7 @@ __kmp_stg_parse_par_range(
>          break;
>      }
>  } // __kmp_stg_parse_par_range
> -
> +#endif
>
>  int
>  __kmp_initial_threads_capacity( int req_nproc )
> diff --git a/runtime/src/kmp_version.c b/runtime/src/kmp_version.c
> index f64d052..5dc82d1 100644
> --- a/runtime/src/kmp_version.c
> +++ b/runtime/src/kmp_version.c
> @@ -20,7 +20,7 @@
>  #include "kmp_version.h"
>
>  // Replace with snapshot date YYYYMMDD for promotion build.
> -#define KMP_VERSION_BUILD    00000000
> +//#define KMP_VERSION_BUILD    00000000
>
>  // Helper macros to convert value of macro to string literal.
>  #define _stringer( x ) #x
> diff --git a/runtime/src/makefile.mk b/runtime/src/makefile.mk
> index 9be8799..84d6ac2 100644
> --- a/runtime/src/makefile.mk
> +++ b/runtime/src/makefile.mk
> @@ -310,6 +310,9 @@ endif
>  ifeq "$(CPLUSPLUS)" "on"
>      ifeq "$(os)" "win"
>          c-flags   += -TP
> +    else ifeq "$(arch)" "ppc64"
> +    # c++0x on ppc64 linux removes definition of preproc. macros, needed
> in .hs
> +       c-flags   += -x c++ -std=gnu++0x
>      else
>          ifneq "$(filter gcc clang,$(c))" ""
>              c-flags   += -x c++ -std=c++0x
> @@ -370,7 +373,7 @@ ifeq "$(os)" "lin"
>              ld-flags-extra += -lirc_pic
>              endif
>          endif
> -        ifeq "$(filter 32 32e 64,$(arch))" ""
> +        ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
>              ld-flags-extra += $(shell pkg-config --libs libffi)
>          endif
>      else
> @@ -581,9 +584,12 @@ ifneq "$(os)" "win"
>      ifeq "$(arch)" "arm"
>          z_Linux_asm$(obj) : \
>            cpp-flags += -D KMP_ARCH_ARM
> -    else
> +    else ifeq "$(arch)" "ppc64"
> +        z_Linux_asm$(obj) : \
> +          cpp-flags += -D KMP_ARCH_PPC64
> +   else
>          z_Linux_asm$(obj) : \
> -            cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
> +              cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$
(arch)),_64)
>
>      endif
>  endif
>
> @@ -729,7 +735,9 @@ endif
>          else # 5
>              lib_c_items += kmp_gsupport
>          endif
> +#        ifneq "$(arch)" "ppc64"
>          lib_asm_items += z_Linux_asm
> +#       endif
>      endif
>  endif
>
> @@ -1391,9 +1399,13 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))"
""
>              td_exp += libc.so.6
>              td_exp += ld-linux-armhf.so.3
>          endif
> +        ifeq "$(arch)" "ppc64"
> +            td_exp += libc.so.6
> +            td_exp += ld64.so.1
> +        endif
>          td_exp += libdl.so.2
>          td_exp += libgcc_s.so.1
> -        ifeq "$(filter 32 32e 64,$(arch))" ""
> +        ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
>              td_exp += libffi.so.6
>              td_exp += libffi.so.5
>          endif
> diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
> b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
> index 40c8614..9e7b36b 100644
> --- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
> +++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
> @@ -132,6 +132,11 @@
>  #  define ITT_ARCH_ARM  4
>  #endif /* ITT_ARCH_ARM */
>
> +#ifndef ITT_ARCH_PPC64
> +#  define ITT_ARCH_PPC64  5
> +#endif /* ITT_ARCH_PPC64 */
> +
> +
>  #ifndef ITT_ARCH
>  #  if defined _M_IX86 || defined __i386__
>  #    define ITT_ARCH ITT_ARCH_IA32
> @@ -141,6 +146,8 @@
>  #    define ITT_ARCH ITT_ARCH_IA64
>  #  elif defined _M_ARM || __arm__
>  #    define ITT_ARCH ITT_ARCH_ARM
> +#  elif defined __powerpc64__
> +#    define ITT_ARCH ITT_ARCH_PPC64
>  #  endif
>  #endif
>
> @@ -274,7 +281,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile
void*
> ptr, long addend)
>                            : "memory");
>      return result;
>  }
> -#elif ITT_ARCH==ITT_ARCH_ARM
> +#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64
>  #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr,
val)
>  #endif /* ITT_ARCH==ITT_ARCH_IA64 */
>  #ifndef ITT_SIMPLE_INIT
> diff --git a/runtime/src/z_Linux_asm.s b/runtime/src/z_Linux_asm.s
> index 1f1ba1b..64c8052 100644
> --- a/runtime/src/z_Linux_asm.s
> +++ b/runtime/src/z_Linux_asm.s
> @@ -138,7 +138,7 @@ __kmp_unnamed_critical_addr:
>  #endif /* KMP_GOMP_COMPAT */
>
>
> -#if KMP_ARCH_X86
> +#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
>
>  //
-----------------------------------------------------------------------
>  // microtasking routines specifically written for IA-32 architecture
> @@ -1585,6 +1585,16 @@ __kmp_unnamed_critical_addr:
>      .size __kmp_unnamed_critical_addr,4
>  #endif /* KMP_ARCH_ARM */
>
> +#if KMP_ARCH_PPC64
> +    .data
> +    .comm .gomp_critical_user_,32,8
> +    .data
> +    .align 8
> +    .global __kmp_unnamed_critical_addr
> +__kmp_unnamed_critical_addr:
> +    .8byte .gomp_critical_user_
> +    .size __kmp_unnamed_critical_addr,8
> +#endif /* KMP_ARCH_PPC64 */
>
>  #if defined(__linux__)
>  .section .note.GNU-stack,"", at progbits
> diff --git a/runtime/src/z_Linux_util.c b/runtime/src/z_Linux_util.c
> index 7633f99..348f5d8 100644
> --- a/runtime/src/z_Linux_util.c
> +++ b/runtime/src/z_Linux_util.c
> @@ -32,7 +32,7 @@
>  #include <sys/resource.h>
>  #include <sys/syscall.h>
>
> -#if KMP_OS_LINUX
> +#if KMP_OS_LINUX && !KMP_OS_CNK
>  # include <sys/sysinfo.h>
>  # if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
>  // We should really include <futex.h>, but that causes compatibility
> problems on different
> @@ -61,7 +61,7 @@
>  #include <fcntl.h>
>
>  // For non-x86 architecture
> -#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
> +#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
> KMP_ARCH_PPC64)
>  # include <stdbool.h>
>  # include <ffi.h>
>  #endif
> @@ -110,7 +110,7 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t
> *cond )
>  /*
> ------------------------------------------------------------------------
*/
>  /*
> ------------------------------------------------------------------------
*/
>
> -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
> +#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
>
>  /*
>   * Affinity support
> @@ -147,6 +147,19 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t
> *cond )
>  #    error Wrong code for getaffinity system call.
>  #   endif /* __NR_sched_getaffinity */
>
> +#  elif KMP_ARCH_PPC64
> +#   ifndef __NR_sched_setaffinity
> +#    define __NR_sched_setaffinity  222
> +#   elif __NR_sched_setaffinity != 222
> +#    error Wrong code for setaffinity system call.
> +#   endif /* __NR_sched_setaffinity */
> +#   ifndef __NR_sched_getaffinity
> +#    define __NR_sched_getaffinity  223
> +#   elif __NR_sched_getaffinity != 223
> +#    error Wrong code for getaffinity system call.
> +#   endif /* __NR_sched_getaffinity */
> +
> +
>  #  else
>  #   error Unknown or unsupported architecture
>
> @@ -445,7 +458,7 @@ __kmp_change_thread_affinity_mask( int gtid,
> kmp_affin_mask_t *new_mask,
>  /*
> ------------------------------------------------------------------------
*/
>  /*
> ------------------------------------------------------------------------
*/
>
> -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
> +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
> && !KMP_OS_CNK
>
>  int
>  __kmp_futex_determine_capable()
> @@ -462,7 +475,7 @@ __kmp_futex_determine_capable()
>      return retval;
>  }
>
> -#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
> KMP_ARCH_ARM)
> +#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
> KMP_ARCH_ARM) && !KMP_OS_CNK
>
>  /*
> ------------------------------------------------------------------------
*/
>  /*
> ------------------------------------------------------------------------
*/
> @@ -481,7 +494,7 @@ __kmp_test_then_or32( volatile kmp_int32 *p,
kmp_int32
> d )
>      old_value = TCR_4( *p );
>      new_value = old_value | d;
>
> -    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
> +    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
>      {
>          KMP_CPU_PAUSE();
>          old_value = TCR_4( *p );
> @@ -498,7 +511,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p,
kmp_int32
> d )
>      old_value = TCR_4( *p );
>      new_value = old_value & d;
>
> -    while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
> +    while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
>      {
>          KMP_CPU_PAUSE();
>          old_value = TCR_4( *p );
> @@ -507,7 +520,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p,
kmp_int32
> d )
>      return old_value;
>  }
>
> -# if KMP_ARCH_X86
> +# if KMP_ARCH_X86 || KMP_ARCH_PPC64
>  kmp_int64
>  __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
>  {
> @@ -516,7 +529,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p,
kmp_int64
> d )
>      old_value = TCR_8( *p );
>      new_value = old_value + d;
>
> -    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
> +    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
>      {
>          KMP_CPU_PAUSE();
>          old_value = TCR_8( *p );
> @@ -533,7 +546,7 @@ __kmp_test_then_or64( volatile kmp_int64 *p,
kmp_int64
> d )
>
>      old_value = TCR_8( *p );
>      new_value = old_value | d;
> -    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
> +    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
>      {
>          KMP_CPU_PAUSE();
>          old_value = TCR_8( *p );
> @@ -549,7 +562,7 @@ __kmp_test_then_and64( volatile kmp_int64 *p,
kmp_int64
> d )
>
>      old_value = TCR_8( *p );
>      new_value = old_value & d;
> -    while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
> +    while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
>      {
>          KMP_CPU_PAUSE();
>          old_value = TCR_8( *p );
> @@ -2527,7 +2540,7 @@ __kmp_get_load_balance( int max )
>  #endif // USE_LOAD_BALANCE
>
>
> -#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
> +#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
> KMP_ARCH_PPC64)
>
>  int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int
argc,
>          void *p_argv[] )
> @@ -2561,7 +2574,89 @@ int __kmp_invoke_microtask( microtask_t pkfn, int
> gtid, int tid, int argc,
>      return 1;
>  }
>
> -#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
> +#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 ||
> KMP_ARCH_PPC64)
> +
> +#if KMP_ARCH_PPC64
> +
> +// we really only need the case with 1 argument, because CLANG always
> build
> +// a struct of pointers to shared variables referenced in the outlined
> function
> +int
> +__kmp_invoke_microtask( microtask_t pkfn,
> +                        int gtid, int tid,
> +                        int argc, void *p_argv[] ) {
> +  switch (argc) {
> +  default:
> +    fprintf(stderr, "Too many args to microtask: %d!\n", argc);
> +    fflush(stderr);
> +    exit(-1);
> +  case 0:
> +    (*pkfn)(&gtid, &tid);
> +    break;
> +  case 1:
> +    (*pkfn)(&gtid, &tid, p_argv[0]);
> +    break;
> +  case 2:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
> +    break;
> +  case 3:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
> +    break;
> +  case 4:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
> +    break;
> +  case 5:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4]);
> +    break;
> +  case 6:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5]);
> +    break;
> +  case 7:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6]);
> +    break;
> +  case 8:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7]);
> +    break;
> +  case 9:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
> +    break;
> +  case 10:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
> +    break;
> +  case 11:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9],
p_argv
> [10]);
> +    break;
> +  case 12:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9],
p_argv
> [10],
> +            p_argv[11]);
> +    break;
> +  case 13:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9],
p_argv
> [10],
> +            p_argv[11], p_argv[12]);
> +    break;
> +  case 14:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9],
p_argv
> [10],
> +            p_argv[11], p_argv[12], p_argv[13]);
> +    break;
> +  case 15:
> +    (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3],
> p_argv[4],
> +            p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9],
p_argv
> [10],
> +            p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
> +    break;
> +  }
> +
> +  return 1;
> +}
> +
> +#endif
>
>  // end of file //
>
> diff --git a/runtime/tools/lib/Platform.pm
b/runtime/tools/lib/Platform.pm
> index 5b399f7..763f3cb 100644
> --- a/runtime/tools/lib/Platform.pm
> +++ b/runtime/tools/lib/Platform.pm
> @@ -50,6 +50,8 @@ sub canon_arch($) {
>              $arch = "32e";
>          } elsif ( $arch =~ m{\Aarm(?:v7\D*)?\z} ) {
>              $arch = "arm";
> +        } elsif ( $arch =~ m{\Appc64} ) {
> +           $arch = "ppc64";
>          } else {
>              $arch = undef;
>          }; # if
> @@ -62,6 +64,7 @@ sub canon_arch($) {
>          "32"  => "IA-32 architecture",
>          "32e" => "Intel(R) 64",
>          "arm" => "ARM",
> +        "ppc64" => "PPC64",
>      );
>
>      sub legal_arch($) {
> @@ -159,6 +162,8 @@ sub target_options() {
>          $_host_arch = "32e";
>      } elsif ( $hardware_platform eq "arm" ) {
>          $_host_arch = "arm";
> +    } elsif ( $hardware_platform eq "ppc64" ) {
> +        $_host_arch = "ppc64";
>      } else {
>          die "Unsupported host hardware platform: \"$hardware_platform\";
> stopped";
>      }; # if
> diff --git a/runtime/tools/lib/Uname.pm b/runtime/tools/lib/Uname.pm
> index e212501..52518b4 100644
> --- a/runtime/tools/lib/Uname.pm
> +++ b/runtime/tools/lib/Uname.pm
> @@ -147,6 +147,8 @@ if ( 0 ) {
>          $values{ hardware_platform } = "x86_64";
>      } elsif ( $values{ machine } =~ m{\Aarmv7\D*\z} ) {
>          $values{ hardware_platform } = "arm";
> +    } elsif ( $values{ machine } =~ m{\Appc64\z} ) {
> +        $values{ hardware_platform } = "ppc64";
>      } else {
>          die "Unsupported machine (\"$values{ machine }\") returned by
> POSIX::uname(); stopped";
>      }; # if
> diff --git a/runtime/tools/src/common-defs.mk
> b/runtime/tools/src/common-defs.mk
> index ebd1922..7eb64b0 100644
> --- a/runtime/tools/src/common-defs.mk
> +++ b/runtime/tools/src/common-defs.mk
> @@ -45,7 +45,7 @@ endif
>  # Description:
>  #     The function return printable name of specified architecture,
IA-32
> architecture or Intel(R) 64.
>  #
> -legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel
(R)
> 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(error Bad
> architecture specified: $(1))))))
> +legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel
(R)
> 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(if $(filter
> ppc64,$(1)),PPC64,$(error Bad architecture specified: $(1)))))))
>
>  # Synopsis:
>  #     var_name = $(call check_variable,var,list)
> @@ -128,9 +128,9 @@ endif
>  #
>
--------------------------------------------------------------------------------------------------

>
>  os       := $(call check_variable,os,lin lrb mac win)
> -arch     := $(call check_variable,arch,32 32e 64 arm)
> +arch     := $(call check_variable,arch,32 32e 64 arm ppc64)
>  platform := $(os)_$(arch)
> -platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm
> lrb_32e mac_32 mac_32e win_32 win_32e win_64)
> +platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm
> lrb_32e mac_32 mac_32e win_32 win_32e win_64 lin_ppc64)
>  # oa-opts means "os and arch options". They are passed to almost all
perl
> scripts.
>  oa-opts  := --os=$(os) --arch=$(arch)
>
> diff --git a/runtime/tools/src/common-tools.mk
> b/runtime/tools/src/common-tools.mk
> index a9c9fbc..8c86791 100644
> --- a/runtime/tools/src/common-tools.mk
> +++ b/runtime/tools/src/common-tools.mk
> @@ -34,7 +34,7 @@
>  # "No rule to build .\kmp_i18n.inc". Using "./" solves the problem.
>  cpp-flags += -I ./
>  # For non-x86 architecture
> -ifeq "$(filter 32 32e 64,$(arch))" ""
> +ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
>      cpp-flags += $(shell pkg-config --cflags libffi)
>  endif
>  # Add all VPATH directories to path for searching include files.
> -------------- next part --------------
> An HTML attachment was scrubbed...
> URL: <http://lists.cs.uiuc.edu/pipermail/openmp-commits/attachments/
> 20140717/41b2fdff/attachment.html>
>
> ------------------------------
>
> _______________________________________________
> Openmp-commits mailing list
> Openmp-commits at dcs-maillist2.engr.illinois.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/openmp-commits
>
>
> End of Openmp-commits Digest, Vol 6, Issue 1
> ********************************************
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20140717/5d933375/attachment.html>


More information about the Openmp-commits mailing list