[Openmp-dev] AArch64 support

C Bergström cbergstrom at pathscale.com
Sun Oct 26 12:05:28 PDT 2014


On Sun, Oct 26, 2014 at 8:34 PM, C Bergström <cbergstrom at pathscale.com>
wrote:

> Hi
>
> Does anyone have a patch for AArch64 support? Either experimental,
> complete or notes/suggestions.
>

So here's a 1st draft for review - I don't expect it to be clean on the 1st
pass, but getting some review would be really appreciated.

btw - Is anyone testing this on ARM or PPC64 regularly?

Thanks
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/openmp-dev/attachments/20141027/7e432752/attachment.html>
-------------- next part --------------
diff --git a/runtime/Build_With_CMake.txt b/runtime/Build_With_CMake.txt
index ca69b76..5c5b9d0 100644
--- a/runtime/Build_With_CMake.txt
+++ b/runtime/Build_With_CMake.txt
@@ -112,9 +112,9 @@ Build options
 ======================
 ==== Architecture ====
 ======================
--Darch=32|32e|arm|ppc64
+-Darch=32|32e|arm|ppc64|aarch64
 * Architecture can be 32 (IA-32 architecture), 32e (Intel(R) 64 architecture),
-  arm (ARM architecture), or ppc64 (PPC64 architecture).
+  arm (ARM architecture), aarch64 (ARMv8 architecture) or ppc64 (PPC64 architecture).
   This option, by default is chosen based on the
   CMake variable CMAKE_SIZEOF_VOID_P.  If it is 8, then Intel(R) 64 architecture
   is assumed.  If it is 4, then IA-32 architecture is assumed.  If you want to
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index b620f5a..4729610 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -49,7 +49,7 @@ include(FindPerl) # Standard cmake module to check for Perl
 
 # Build Configuration
 set(os_possible_values          lin mac win mic)
-set(arch_possible_values        32e 32 arm ppc64)
+set(arch_possible_values        32e 32 arm ppc64 aarch64)
 set(build_type_possible_values  release debug relwithdebinfo)
 set(omp_version_possible_values 40 30)
 set(lib_type_possible_values    normal profile stubs)
@@ -72,14 +72,36 @@ else()
 endif()
 
 # set to default architecture if the user did not specify an architecture explicitly
-if(NOT arch)	
-	if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
-    	set(arch           32          CACHE STRING "The architecture to build for (32e/32/arm/ppc64).  32e is Intel(R) 64 architecture, 32 is IA-32 architecture")
-	else()
-    	set(arch           32e         CACHE STRING "The architecture to build for (32e/32/arm/ppc64).  32e is Intel(R) 64 architecture, 32 is IA-32 architecture")
-	endif()
+#if(NOT arch)	
+#	if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
+#    	set(arch           32          CACHE STRING "The architecture to build for (32e/32/arm/ppc64/aarch64).  32e is Intel(R) 64 architecture, 32 is IA-32 architecture")
+#	else()
+#    	set(arch           32e         CACHE STRING "The architecture to build for (32e/32/arm/ppc64/aarch64).  32e is Intel(R) 64 architecture, 32 is IA-32 architecture")
+#	endif()
+#endif()
+
+# Architecture                                                                   
+set(IA32 FALSE)
+set(INTEL64 FALSE)
+set(ARM FALSE)
+set(PPC64 FALSE)
+set(AARCH64 FALSE)
+                                                           
+if("${arch}" STREQUAL "32")      # IA-32 architecture
+    set(IA32 TRUE)
+elseif("${arch}" STREQUAL "32e") # Intel(R) 64 architecture
+    set(INTEL64 TRUE)
+elseif("${arch}" STREQUAL "arm") # ARM architecture
+    set(ARM TRUE)
+elseif("${arch}" STREQUAL "ppc64") # PPC64 architecture
+    set(PPC64 TRUE)
+elseif("${arch}" STREQUAL "aarch64") # AARCH64 architecture
+    set(AARCH64 TRUE)
+else() # Ensure ARCH is set
+    message(FATAL_ERROR "Error: \"arch\" not set. Possible values are 32e 32 arm ppc64 aarch64. Example -Darch=e32 for x86-64/Intel64")
 endif()
 
+
 set(lib_type       normal        CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)")
 set(version        5             CACHE STRING "Produce libguide (version 4) or libiomp5 (version 5)")
 set(omp_version    40            CACHE STRING "The OpenMP version (40/30)")
@@ -169,21 +191,6 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
     set(FREEBSD TRUE)
 endif()
 
-# Architecture
-set(IA32 FALSE)
-set(INTEL64 FALSE)
-set(ARM FALSE)
-set(PPC64 FALSE)
-if("${arch}" STREQUAL "32")      # IA-32 architecture
-    set(IA32 TRUE)
-elseif("${arch}" STREQUAL "32e") # Intel(R) 64 architecture
-    set(INTEL64 TRUE)
-elseif("${arch}" STREQUAL "arm") # ARM architecture
-    set(ARM TRUE)
-elseif("${arch}" STREQUAL "ppc64") # PPC64 architecture
-    set(PPC64 TRUE)
-endif()
-
 # Set some flags based on build_type
 # cmake_build_type_lowercase is based off of CMAKE_BUILD_TYPE, just put in lowercase.
 set(RELEASE_BUILD        FALSE)
diff --git a/runtime/cmake/HelperFunctions.cmake b/runtime/cmake/HelperFunctions.cmake
index 88e309b..46f8b26 100644
--- a/runtime/cmake/HelperFunctions.cmake
+++ b/runtime/cmake/HelperFunctions.cmake
@@ -67,8 +67,10 @@ function(set_legal_arch return_arch_string)
         set(${return_arch_string} "L1OM" PARENT_SCOPE)
     elseif(${ARM})
         set(${return_arch_string} "ARM" PARENT_SCOPE)
-	elseif(${PPC64})
-	    set(${return_arch_string} "PPC64" PARENT_SCOPE)
+    elseif(${PPC64})
+        set(${return_arch_string} "PPC64" PARENT_SCOPE)
+    elseif(${AARCH64})                                                                                   
+        set(${return_arch_string} "AARCH64" PARENT_SCOPE)
     else()
         warning_say("set_legal_arch(): Warning: Unknown architecture...")
     endif()
diff --git a/runtime/src/CMakeLists.txt b/runtime/src/CMakeLists.txt
index bfc8540..bf0d69d 100644
--- a/runtime/src/CMakeLists.txt
+++ b/runtime/src/CMakeLists.txt
@@ -24,14 +24,16 @@ endif()
 
 if(arch)	
 	set(ARCH ${arch}) #acquire from command line
-else() #assume default
-  set(ARCH "32e")
+else() # Ensure ARCH is set
+  message(FATAL_ERROR "ARCH not set. Possible values are 32e 32 arm ppc64 aarch64. Example -DARCH=e32 for x86-64/Intel64")
 endif()
 
 if("${arch}" STREQUAL "32e")
   set(ARCH_STR "Intel(R) 64")
 elseif("${arch}" STREQUAL "ppc64")
   set(ARCH_STR "PPC64")
+elseif("${arch}" STREQUAL "aarch64")                       
+  set(ARCH_STR "AARCH64")  
 endif()
 
 set(FEATURE_FLAGS "-D USE_ITT_BUILD")
@@ -67,7 +69,7 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D GUIDEDLL_EXPORTS")
 set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_GOMP_COMPAT")
 
 #adaptive locks use x86 assembly - disable for ppc64
-if("${arch}" STREQUAL "32e" OR "${arch}" STREQUAL "32" OR "${arch}" STREQUAL "arm")
+if("${arch}" STREQUAL "32e" OR "${arch}" STREQUAL "32" OR "${arch}" STREQUAL "arm" OR "${arch}" STREQUAL "aarch64")
 	set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")
 	set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")
 endif()
@@ -144,17 +146,20 @@ add_custom_command(
 )
 
 if("${ARCH}" STREQUAL "ppc64")
-	add_custom_command(
-		OUTPUT z_Linux_asm.o
-    		       COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_PPC64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
-		       )
-else()
-	add_custom_command(
-		OUTPUT z_Linux_asm.o
-    		       COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
-		       )
+  set(KMP_ARCH_TEMP "KMP_ARCH_PPC64")
+elseif("${ARCH}" STREQUAL "arm")
+  set(KMP_ARCH_TEMP "KMP_ARCH_ARM")
+elseif("${ARCH}" STREQUAL "aarch64")
+  set(KMP_ARCH_TEMP "KMP_ARCH_AARCH64")
+else
+  set(KMP_ARCH_TEMP "KMP_ARCH_X86_64")
 endif()
 
+add_custom_command(
+  OUTPUT z_Linux_asm.o
+    COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D ${KMP_ARCH_TEMP} -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}
+)
+
 
 add_custom_target(gen_kmp_i18n DEPENDS kmp_i18n_id.inc kmp_i18n_default.inc omp.h z_Linux_asm.o)
 
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index 6daf973..448ee87 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -766,6 +766,8 @@ typedef struct kmp_nested_proc_bind_t {
 
 extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
 
+#endif /* OMP_40_ENABLED */
+
 # if KMP_AFFINITY_SUPPORTED
 #  define KMP_PLACE_ALL       (-1)
 #  define KMP_PLACE_UNDEFINED (-2)
@@ -773,7 +775,6 @@ extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
 
 extern int __kmp_affinity_num_places;
 
-#endif /* OMP_40_ENABLED */
 
 #if OMP_40_ENABLED
 typedef enum kmp_cancel_kind_t {
@@ -2940,11 +2941,11 @@ extern void __kmp_balanced_affinity( int tid, int team_size );
 
 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 
 extern int __kmp_futex_determine_capable( void );
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 
 extern void __kmp_gtid_set_specific( int gtid );
 extern int  __kmp_gtid_get_specific( void );
@@ -3039,7 +3040,7 @@ enum fork_context_e
 extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
   kmp_int32 argc, microtask_t microtask, launch_t invoker,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if (KMP_ARCH_ARM || KMP_ARCH_X86_64) && KMP_OS_LINUX
+#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
                              va_list *ap
 #else
                              va_list ap
diff --git a/runtime/src/kmp_csupport.c b/runtime/src/kmp_csupport.c
index af5c614..f4390c6 100644
--- a/runtime/src/kmp_csupport.c
+++ b/runtime/src/kmp_csupport.c
@@ -293,7 +293,7 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
             VOLATILE_CAST(microtask_t) microtask,
             VOLATILE_CAST(launch_t)    __kmp_invoke_task_func,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
             &ap
 #else
             ap
@@ -362,7 +362,7 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
             argc,
             VOLATILE_CAST(microtask_t) __kmp_teams_master,
             VOLATILE_CAST(launch_t)    __kmp_invoke_teams_master,
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
             &ap
 #else
             ap
@@ -590,8 +590,8 @@ __kmpc_flush(ident_t *loc, ...)
                 #endif // KMP_COMPILER_ICC
             }; // if
         #endif // KMP_MIC
-    #elif KMP_ARCH_ARM
-        // Nothing yet
+    #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
+        // Nothing to see here move along
     #elif KMP_ARCH_PPC64
         // Nothing needed here (we have a real MB above).
         #if KMP_OS_CNK
@@ -848,7 +848,7 @@ __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
       && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
@@ -901,7 +901,7 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
       && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
@@ -1342,7 +1342,7 @@ __kmpc_init_lock( ident_t * loc, kmp_int32 gtid,  void ** user_lock ) {
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1378,7 +1378,7 @@ __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1406,7 +1406,7 @@ __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1425,7 +1425,7 @@ __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         ;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         ;
@@ -1446,7 +1446,7 @@ __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1467,7 +1467,7 @@ __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         ;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1488,7 +1488,7 @@ __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1518,7 +1518,7 @@ __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1550,7 +1550,7 @@ __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
 
     if ( ( __kmp_user_lock_kind == lk_tas )
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
         // "fast" path implemented to fix customer performance issue
 #if USE_ITT_BUILD
         __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
@@ -1562,7 +1562,7 @@ __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
         lck = (kmp_user_lock_p)user_lock;
 #endif
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1589,7 +1589,7 @@ __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
 
     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
         // "fast" path implemented to fix customer performance issue
         kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
 #if USE_ITT_BUILD
@@ -1604,7 +1604,7 @@ __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
         lck = (kmp_user_lock_p)user_lock;
 #endif
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1635,7 +1635,7 @@ __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1673,7 +1673,7 @@ __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
diff --git a/runtime/src/kmp_gsupport.c b/runtime/src/kmp_gsupport.c
index 3cce67c..e03c9d8 100644
--- a/runtime/src/kmp_gsupport.c
+++ b/runtime/src/kmp_gsupport.c
@@ -15,7 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 
-#if defined(__x86_64) || defined (__powerpc64__)
+#if defined(__x86_64) || defined (__powerpc64__) || defined(__arch64__)
 # define KMP_I8
 #endif
 #include "kmp.h"
@@ -40,7 +40,6 @@ xexpand(KMP_API_NAME_GOMP_BARRIER)(void)
 }
 
 
-/**/
 //
 // Mutual exclusion
 //
@@ -216,7 +215,6 @@ xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void)
 }
 
 
-/**/
 //
 // Dispatch macro defs
 //
@@ -224,7 +222,7 @@ xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void)
 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
 //
 
-#if KMP_ARCH_X86 || KMP_ARCH_ARM
+#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH64
 # define KMP_DISPATCH_INIT              __kmp_aux_dispatch_init_4
 # define KMP_DISPATCH_FINI_CHUNK        __kmp_aux_dispatch_fini_chunk_4
 # define KMP_DISPATCH_NEXT              __kmpc_dispatch_next_4
@@ -239,7 +237,6 @@ xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void)
 # define KMP_DISPATCH_NEXT_ULL          __kmpc_dispatch_next_8u
 
 
-/**/
 //
 // The parallel contruct
 //
@@ -288,7 +285,7 @@ __kmp_GOMP_fork_call(ident_t *loc, int gtid, microtask_t wrapper, int argc,...)
     va_start(ap, argc);
 
     rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, __kmp_invoke_task_func,
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
       &ap
 #else
       ap
@@ -344,7 +341,6 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
 }
 
 
-/**/
 //
 // Loop worksharing constructs
 //
@@ -500,7 +496,6 @@ xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void)
 }
 
 
-/**/
 //
 // Unsigned long long loop worksharing constructs
 //
@@ -622,7 +617,6 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
 
 
-/**/
 //
 // Combined parallel / loop worksharing constructs
 //
@@ -666,7 +660,6 @@ PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), kmp_s
 PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), kmp_sch_runtime)
 
 
-/**/
 //
 // Tasking constructs
 //
@@ -739,7 +732,6 @@ xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void)
 }
 
 
-/**/
 //
 // Sections worksharing constructs
 //
diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h
index 31a93f5..e37fe7d 100644
--- a/runtime/src/kmp_lock.h
+++ b/runtime/src/kmp_lock.h
@@ -174,7 +174,7 @@ extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck );
 extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );
 
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 
 // ----------------------------------------------------------------------------
 // futex locks.  futex locks are only available on Linux* OS.
@@ -224,7 +224,7 @@ extern void __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gt
 extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
 extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 
 
 // ----------------------------------------------------------------------------
@@ -590,7 +590,7 @@ __kmp_destroy_lock( kmp_lock_t *lck )
 enum kmp_lock_kind {
     lk_default = 0,
     lk_tas,
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     lk_futex,
 #endif
     lk_ticket,
@@ -607,7 +607,7 @@ extern kmp_lock_kind_t __kmp_user_lock_kind;
 
 union kmp_user_lock {
     kmp_tas_lock_t     tas;
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
     kmp_futex_lock_t   futex;
 #endif
     kmp_ticket_lock_t  ticket;
@@ -635,7 +635,7 @@ __kmp_get_user_lock_owner( kmp_user_lock_p lck )
 
 extern void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 
 #define __kmp_acquire_user_lock_with_checks(lck,gtid)                                           \
     if (__kmp_user_lock_kind == lk_tas) {                                                       \
@@ -685,7 +685,7 @@ __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 
 extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 
 #include "kmp_i18n.h"                       /* AC: KMP_FATAL definition */
 extern int __kmp_env_consistency_check;     /* AC: copy from kmp.h here */
diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
index c65caf8..4e6f0db 100644
--- a/runtime/src/kmp_os.h
+++ b/runtime/src/kmp_os.h
@@ -76,6 +76,7 @@
 #define KMP_ARCH_X86        0
 #define KMP_ARCH_X86_64	    0
 #define KMP_ARCH_PPC64      0
+#define KMP_ARCH_AARCH64    0
 
 #ifdef _WIN32
 # undef KMP_OS_WINDOWS
@@ -142,7 +143,10 @@
 # elif defined __powerpc64__
 #  undef KMP_ARCH_PPC64
 #  define KMP_ARCH_PPC64 1
-# endif
+# elif defined __aarch64__           
+#  undef KMP_ARCH_AARCH64          
+#  define KMP_ARCH_AARCH64 1  
+# endif        
 #endif
 
 #if defined(__ARM_ARCH_7__)   || defined(__ARM_ARCH_7R__)  || \
@@ -181,7 +185,8 @@
 # define KMP_ARCH_ARM 1
 #endif
 
-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64)
+// TODO: Fixme - This is clever, but really fugly 
+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64)
 # error Unknown or unsupported architecture
 #endif
 
@@ -259,7 +264,7 @@
 
 #if KMP_ARCH_X86 || KMP_ARCH_ARM
 # define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
-#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64
+#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
 # define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
 #else
 # error "Can't determine size_t printf format specifier."
@@ -721,7 +726,7 @@ extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v );
 #define TCX_SYNC_8(a,b,c)   KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), (kmp_int64)(b), (kmp_int64)(c))
 
 #if KMP_ARCH_X86
-
+// What about ARM?
     #define TCR_PTR(a)          ((void *)TCR_4(a))
     #define TCW_PTR(a,b)        TCW_4((a),(b))
     #define TCR_SYNC_PTR(a)     ((void *)TCR_SYNC_4(a))
diff --git a/runtime/src/kmp_runtime.c b/runtime/src/kmp_runtime.c
index e675518..8fbc35a 100644
--- a/runtime/src/kmp_runtime.c
+++ b/runtime/src/kmp_runtime.c
@@ -1422,7 +1422,7 @@ __kmp_fork_call(
     microtask_t microtask,
     launch_t    invoker,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH) && KMP_OS_LINUX
     va_list   * ap
 #else
     va_list     ap
@@ -1504,7 +1504,7 @@ __kmp_fork_call(
         argv = (void**)parent_team->t.t_argv;
         for( i=argc-1; i >= 0; --i )
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH) && KMP_OS_LINUX
             *argv++ = va_arg( *ap, void * );
 #else
             *argv++ = va_arg( ap, void * );
@@ -1598,11 +1598,11 @@ __kmp_fork_call(
     /* create a serialized parallel region? */
     if ( nthreads == 1 ) {
         /* josh todo: hypothetical question: what do we do for OS X*? */
-#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM )
+#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH)
         void *   args[ argc ];
 #else
         void * * args = (void**) alloca( argc * sizeof( void * ) );
-#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM ) */
+#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH) */
 
         __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
         KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
@@ -1632,7 +1632,7 @@ __kmp_fork_call(
                 if ( ap ) {
                     for( i=argc-1; i >= 0; --i )
 // TODO: revert workaround for Intel(R) 64 tracker #96
-# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH) && KMP_OS_LINUX
                         *argv++ = va_arg( *ap, void * );
 # else
                         *argv++ = va_arg( ap, void * );
@@ -1655,7 +1655,7 @@ __kmp_fork_call(
                 argv = args;
                 for( i=argc-1; i >= 0; --i )
 // TODO: revert workaround for Intel(R) 64 tracker #96
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH) && KMP_OS_LINUX
                     *argv++ = va_arg( *ap, void * );
 #else
                     *argv++ = va_arg( ap, void * );
@@ -1823,7 +1823,7 @@ __kmp_fork_call(
 #endif /* OMP_40_ENABLED */
         for ( i=argc-1; i >= 0; --i )
 // TODO: revert workaround for Intel(R) 64 tracker #96
-#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH) && KMP_OS_LINUX
             *argv++ = va_arg( *ap, void * );
 #else
             *argv++ = va_arg( ap, void * );
@@ -6943,7 +6943,7 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
         int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
         int tree_available   = FAST_REDUCTION_TREE_METHOD_GENERATED;
 
-        #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64
+        #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
 
             #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
                 #if KMP_MIC
@@ -6966,7 +6966,7 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
                 #error "Unknown or unsupported OS"
             #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
 
-        #elif KMP_ARCH_X86 || KMP_ARCH_ARM
+        #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
 
             #if KMP_OS_LINUX || KMP_OS_WINDOWS
 
diff --git a/runtime/src/makefile.mk b/runtime/src/makefile.mk
index 553c2f8..64654be 100644
--- a/runtime/src/makefile.mk
+++ b/runtime/src/makefile.mk
@@ -326,11 +326,11 @@ ifeq "$(CPLUSPLUS)" "on"
     ifeq "$(os)" "win"
         c-flags   += -TP
     else ifeq "$(arch)" "ppc64"
-    # c++0x on ppc64 linux removes definition of preproc. macros, needed in .hs
-      c-flags   += -x c++ -std=gnu++0x
+    # c++11 on ppc64 linux removes definition of preproc. macros, needed in .hs
+      c-flags   += -x c++ -std=gnu++11
     else
         ifneq "$(filter gcc clang,$(c))" ""
-            c-flags   += -x c++ -std=c++0x
+            c-flags   += -x c++ -std=c++11
         else
             c-flags   += -Kc++
         endif
@@ -497,6 +497,14 @@ else
 	cpp-flags += -D CACHE_LINE=64
 endif
 
+# customize aarch64 cache line size to 128, 64 otherwise magic won't happen
+# Just kidding.. can we have some documentation on this, please
+ifeq "$(arch)" "aarch64"
+	cpp-flags += -D CACHE_LINE=128
+else 
+	cpp-flags += -D CACHE_LINE=64
+endif
+
 cpp-flags += -D KMP_ADJUST_BLOCKTIME=1
 cpp-flags += -D BUILD_PARALLEL_ORDERED
 cpp-flags += -D KMP_ASM_INTRINS
@@ -630,6 +638,9 @@ ifneq "$(os)" "win"
     else ifeq "$(arch)" "ppc64" 
         z_Linux_asm$(obj) : \
 			cpp-flags += -D KMP_ARCH_PPC64		    
+    else ifeq "$(arch)" "aarch64"
+        z_Linux_asm$(obj) : \                            
+                        cpp-flags += -D KMP_ARCH_AARCH64
     else
         z_Linux_asm$(obj) : \
 		    cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
@@ -1467,6 +1478,10 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))" ""
             td_exp += libc.so.6
             td_exp += ld64.so.1
         endif
+        ifeq "$(arch)" "aarch"
+            td_exp += libc.so.6
+            td_exp += ld-linux-aarch64.so.1
+        endif
         ifeq "$(std_cpp_lib)" "1"
             td_exp += libstdc++.so.6
         endif
diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index 863ab95..1323254 100644
--- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -128,6 +128,11 @@
 #  define ITT_ARCH_IA32E 2
 #endif /* ITT_ARCH_IA32E */
 
+/* Was there a magical reason we didn't have 3 here before? */
+#ifndef ITT_ARCH_AARCH64
+#  define ITT_ARCH_AARCH64  3
+#endif /* ITT_ARCH_AARCH64 */
+
 #ifndef ITT_ARCH_ARM
 #  define ITT_ARCH_ARM  4
 #endif /* ITT_ARCH_ARM */
@@ -148,6 +153,8 @@
 #    define ITT_ARCH ITT_ARCH_ARM
 #  elif defined __powerpc64__
 #    define ITT_ARCH ITT_ARCH_PPC64
+#  elif defined __aarch64__
+#    define ITT_ARCH ITT_ARCH_AARCH64
 #  endif
 #endif
 
@@ -281,7 +288,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
                           : "memory");
     return result;
 }
-#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64
+#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64
 #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
 #endif /* ITT_ARCH==ITT_ARCH_IA64 */
 #ifndef ITT_SIMPLE_INIT
diff --git a/runtime/src/z_Linux_asm.s b/runtime/src/z_Linux_asm.s
index 2b98223..e785fab 100644
--- a/runtime/src/z_Linux_asm.s
+++ b/runtime/src/z_Linux_asm.s
@@ -1377,7 +1377,7 @@ __kmp_unnamed_critical_addr:
     .size __kmp_unnamed_critical_addr,4
 #endif /* KMP_ARCH_ARM */
 
-#if KMP_ARCH_PPC64
+#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
     .data
     .comm .gomp_critical_user_,32,8
     .data
@@ -1386,7 +1386,7 @@ __kmp_unnamed_critical_addr:
 __kmp_unnamed_critical_addr:
     .8byte .gomp_critical_user_
     .size __kmp_unnamed_critical_addr,8
-#endif /* KMP_ARCH_PPC64 */
+#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
 
 #if defined(__linux__)
 # if KMP_ARCH_ARM
diff --git a/runtime/src/z_Linux_util.c b/runtime/src/z_Linux_util.c
index d66d2a4..c3f3a28 100644
--- a/runtime/src/z_Linux_util.c
+++ b/runtime/src/z_Linux_util.c
@@ -36,7 +36,7 @@
 
 #if KMP_OS_LINUX && !KMP_OS_CNK
 # include <sys/sysinfo.h>
-# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
 // We should really include <futex.h>, but that causes compatibility problems on different
 // Linux* OS distributions that either require that you include (or break when you try to include)
 // <pci/types.h>.
@@ -63,7 +63,7 @@
 #include <fcntl.h>
 
 // For non-x86 architecture
-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64)
 # include <stdbool.h>
 # include <ffi.h>
 #endif
@@ -137,6 +137,18 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond )
 #    error Wrong code for getaffinity system call.
 #   endif /* __NR_sched_getaffinity */
 
+#  elif KMP_ARCH_AARCH64
+#   ifndef __NR_sched_setaffinity
+#    define __NR_sched_setaffinity  122
+#   elif __NR_sched_setaffinity != 122
+#    error Wrong code for setaffinity system call.
+#   endif /* __NR_sched_setaffinity */
+#   ifndef __NR_sched_getaffinity
+#    define __NR_sched_getaffinity  123
+#   elif __NR_sched_getaffinity != 123
+#    error Wrong code for getaffinity system call.
+#   endif /* __NR_sched_getaffinity */
+
 #  elif KMP_ARCH_X86_64
 #   ifndef __NR_sched_setaffinity
 #    define __NR_sched_setaffinity  203
@@ -460,7 +472,7 @@ __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask,
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && !KMP_OS_CNK
 
 int
 __kmp_futex_determine_capable()
@@ -522,7 +534,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
     return old_value;
 }
 
-# if KMP_ARCH_X86 || KMP_ARCH_PPC64
+# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
 kmp_int64
 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
 {
@@ -2600,7 +2612,7 @@ __kmp_get_load_balance( int max )
 #endif // USE_LOAD_BALANCE
 
 
-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64)
 
 int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
         void *p_argv[] )
@@ -2634,7 +2646,7 @@ int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
     return 1;
 }
 
-#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)
+#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64)
 
 #if KMP_ARCH_PPC64
 
diff --git a/runtime/tools/lib/Platform.pm b/runtime/tools/lib/Platform.pm
index 077e649..839e54f 100644
--- a/runtime/tools/lib/Platform.pm
+++ b/runtime/tools/lib/Platform.pm
@@ -52,6 +52,8 @@ sub canon_arch($) {
             $arch = "arm";
         } elsif ( $arch =~ m{\Appc64} ) {
         	$arch = "ppc64";            
+        } elsif ( $arch =~ m{\Aaarch64} ) {               
+                $arch = "aarch64";
         } else {
             $arch = undef;
         }; # if
@@ -64,6 +66,7 @@ sub canon_arch($) {
         "32"  => "IA-32 architecture",
         "32e" => "Intel(R) 64",
         "arm" => "ARM",
+        "aarch64" => "AArch64",
     );
 
     sub legal_arch($) {
@@ -82,6 +85,7 @@ sub canon_arch($) {
         "32e" => "intel64",
         "64"  => "ia64",
         "arm" => "arm",
+        "aarch64" => "aarch",
     );
 
     sub arch_opt($) {
@@ -163,6 +167,8 @@ sub target_options() {
         $_host_arch = "arm";
     } elsif ( $hardware_platform eq "ppc64" ) {
         $_host_arch = "ppc64";
+    } elsif ( $hardware_platform eq "aarch64" ) {         
+        $_host_arch = "aarch64";  
     } else {
         die "Unsupported host hardware platform: \"$hardware_platform\"; stopped";
     }; # if
diff --git a/runtime/tools/lib/Uname.pm b/runtime/tools/lib/Uname.pm
index 52518b4..d5bbde5 100644
--- a/runtime/tools/lib/Uname.pm
+++ b/runtime/tools/lib/Uname.pm
@@ -149,6 +149,8 @@ if ( 0 ) {
         $values{ hardware_platform } = "arm";
     } elsif ( $values{ machine } =~ m{\Appc64\z} ) {
         $values{ hardware_platform } = "ppc64";
+    } elsif ( $values{ machine } =~ m{\Aaarch64\z} ) {
+        $values{ hardware_platform } = "aarch64";
     } else {
         die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped";
     }; # if
diff --git a/runtime/tools/src/common-defs.mk b/runtime/tools/src/common-defs.mk
index 7eb64b0..f8dd75e 100644
--- a/runtime/tools/src/common-defs.mk
+++ b/runtime/tools/src/common-defs.mk
@@ -45,7 +45,7 @@ endif
 # Description:
 #     The function return printable name of specified architecture, IA-32 architecture or Intel(R) 64.
 #
-legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(if $(filter ppc64,$(1)),PPC64,$(error Bad architecture specified: $(1)))))))
+legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(if $(filter ppc64,$(1)),PPC64,$(if $(filter aarch64,$(1)),AArch64,$(error Bad architecture specified: $(1)))))))
 
 # Synopsis:
 #     var_name = $(call check_variable,var,list)
@@ -128,9 +128,9 @@ endif
 # --------------------------------------------------------------------------------------------------
 
 os       := $(call check_variable,os,lin lrb mac win)
-arch     := $(call check_variable,arch,32 32e 64 arm ppc64)
+arch     := $(call check_variable,arch,32 32e 64 arm ppc64 aarch64)
 platform := $(os)_$(arch)
-platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm lrb_32e mac_32 mac_32e win_32 win_32e win_64 lin_ppc64)
+platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm lrb_32e mac_32 mac_32e win_32 win_32e win_64 lin_ppc64 lin_aarch64)
 # oa-opts means "os and arch options". They are passed to almost all perl scripts.
 oa-opts  := --os=$(os) --arch=$(arch)
 


More information about the Openmp-dev mailing list