[Openmp-commits] [openmp] r254320 - Adding Hwloc library option for affinity mechanism

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Mon Nov 30 12:02:59 PST 2015


Author: jlpeyton
Date: Mon Nov 30 14:02:59 2015
New Revision: 254320

URL: http://llvm.org/viewvc/llvm-project?rev=254320&view=rev
Log:
Adding Hwloc library option for affinity mechanism

These changes allow libhwloc to be used as the topology discovery/affinity
mechanism for libomp.  It is supported on Unices. The code additions:
* Canonicalize KMP_CPU_* interface macros so bitmask operations are
  implementation independent and work with both hwloc bitmaps and libomp
  bitmaps.  So there are new KMP_CPU_ALLOC_* and KMP_CPU_ITERATE() macros and
  the like. These are all in kmp.h and appropriately placed.
* Hwloc topology discovery code in kmp_affinity.cpp. This uses the hwloc
  interface to create a libomp address2os object which the rest of libomp knows
  how to handle already.
* To build, use -DLIBOMP_USE_HWLOC=on and
  -DLIBOMP_HWLOC_INSTALL_DIR=/path/to/install/dir [default /usr/local]. If CMake
  can't find the library or hwloc.h, then it will tell you and exit.

Differential Revision: http://reviews.llvm.org/D13991

Modified:
    openmp/trunk/runtime/Build_With_CMake.txt
    openmp/trunk/runtime/CMakeLists.txt
    openmp/trunk/runtime/cmake/LibompHandleFlags.cmake
    openmp/trunk/runtime/cmake/LibompMicroTests.cmake
    openmp/trunk/runtime/cmake/config-ix.cmake
    openmp/trunk/runtime/src/CMakeLists.txt
    openmp/trunk/runtime/src/i18n/en_US.txt
    openmp/trunk/runtime/src/kmp.h
    openmp/trunk/runtime/src/kmp_affinity.cpp
    openmp/trunk/runtime/src/kmp_affinity.h
    openmp/trunk/runtime/src/kmp_config.h.cmake
    openmp/trunk/runtime/src/kmp_ftn_entry.h
    openmp/trunk/runtime/src/kmp_global.c
    openmp/trunk/runtime/src/kmp_settings.c
    openmp/trunk/runtime/src/z_Linux_util.c
    openmp/trunk/runtime/test/CMakeLists.txt
    openmp/trunk/runtime/test/lit.cfg
    openmp/trunk/runtime/test/lit.site.cfg.in

Modified: openmp/trunk/runtime/Build_With_CMake.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/Build_With_CMake.txt?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/Build_With_CMake.txt (original)
+++ openmp/trunk/runtime/Build_With_CMake.txt Mon Nov 30 14:02:59 2015
@@ -159,6 +159,18 @@ Should include stats-gathering code be i
 -DLIBOMP_USE_DEBUGGER=off|on
 Should the friendly debugger interface be included in the build?
 
+-DLIBOMP_USE_HWLOC=off|on
+Should the Hwloc library be used for affinity?
+This option is not supported on Windows.
+http://www.open-mpi.org/projects/hwloc
+
+-DLIBOMP_HWLOC_INSTALL_DIR=/path/to/hwloc/install/dir
+Default: /usr/local
+This option is only used if LIBOMP_USE_HWLOC is on.
+Specifies install location of Hwloc. The configuration system will look for
+hwloc.h in ${LIBOMP_HWLOC_INSTALL_DIR}/include and the library in 
+${LIBOMP_HWLOC_INSTALL_DIR}/lib.
+
 ================================
 How to append flags to the build
 ================================

Modified: openmp/trunk/runtime/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/CMakeLists.txt?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/CMakeLists.txt (original)
+++ openmp/trunk/runtime/CMakeLists.txt Mon Nov 30 14:02:59 2015
@@ -135,6 +135,12 @@ set(LIBOMP_FFLAGS "" CACHE STRING
 set(LIBOMP_COPY_EXPORTS TRUE CACHE STRING
   "Should exports be copied into source exports/ directory?")
 
+# HWLOC-support
+set(LIBOMP_USE_HWLOC FALSE CACHE BOOL
+  "Use Hwloc (http://www.open-mpi.org/projects/hwloc/) library for affinity?")
+set(LIBOMP_HWLOC_INSTALL_DIR /usr/local CACHE PATH
+  "Install path for hwloc library")
+
 # Get the build number from kmp_version.c
 libomp_get_build_number("${CMAKE_CURRENT_SOURCE_DIR}" LIBOMP_VERSION_BUILD)
 math(EXPR LIBOMP_VERSION_BUILD_YEAR "${LIBOMP_VERSION_BUILD}/10000")
@@ -285,6 +291,11 @@ if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_H
   libomp_error_say("OpenMP Tools Interface requested but not available")
 endif()
 
+# Error check hwloc support after config-ix has run
+if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC))
+  libomp_error_say("Hwloc requested but not available")
+endif()
+
 # Setting final library name
 set(LIBOMP_DEFAULT_LIB_NAME libomp)
 if(${PROFILE_LIBRARY})
@@ -323,6 +334,7 @@ if(${LIBOMP_STANDALONE_BUILD})
   endif()
   libomp_say("Use Adaptive locks   -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
   libomp_say("Use quad precision   -- ${LIBOMP_USE_QUAD_PRECISION}")
+  libomp_say("Use Hwloc library    -- ${LIBOMP_USE_HWLOC}")
 endif()
 
 add_subdirectory(src)

Modified: openmp/trunk/runtime/cmake/LibompHandleFlags.cmake
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/cmake/LibompHandleFlags.cmake?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/cmake/LibompHandleFlags.cmake (original)
+++ openmp/trunk/runtime/cmake/LibompHandleFlags.cmake Mon Nov 30 14:02:59 2015
@@ -151,6 +151,7 @@ endfunction()
 function(libomp_get_libflags libflags)
   set(libflags_local)
   libomp_append(libflags_local "${CMAKE_THREAD_LIBS_INIT}")
+  libomp_append(libflags_local "${LIBOMP_HWLOC_LIBRARY}" LIBOMP_USE_HWLOC)
   if(${IA32})
     libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY)
   endif()

Modified: openmp/trunk/runtime/cmake/LibompMicroTests.cmake
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/cmake/LibompMicroTests.cmake?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/cmake/LibompMicroTests.cmake (original)
+++ openmp/trunk/runtime/cmake/LibompMicroTests.cmake Mon Nov 30 14:02:59 2015
@@ -82,10 +82,13 @@ else() # (Unix based systems, Intel(R) M
     libomp_append(libomp_test_touch_cflags -m32 LIBOMP_HAVE_M32_FLAG)
   endif()
   libomp_append(libomp_test_touch_libs ${LIBOMP_OUTPUT_DIRECTORY}/${LIBOMP_LIB_FILE})
+  libomp_append(libomp_test_touch_libs "${LIBOMP_HWLOC_LIBRARY}" LIBOMP_USE_HWLOC)
   if(APPLE)
     set(libomp_test_touch_env "DYLD_LIBRARY_PATH=.:${LIBOMP_OUTPUT_DIRECTORY}:$ENV{DYLD_LIBRARY_PATH}")
+    libomp_append(libomp_test_touch_ldflags "-Wl,-rpath,${LIBOMP_HWLOC_LIBRARY_DIR}" LIBOMP_USE_HWLOC)
   else()
     set(libomp_test_touch_env "LD_LIBRARY_PATH=.:${LIBOMP_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH}")
+    libomp_append(libomp_test_touch_ldflags "-Wl,-rpath=${LIBOMP_HWLOC_LIBRARY_DIR}" LIBOMP_USE_HWLOC)
   endif()
 endif()
 macro(libomp_test_touch_recipe test_touch_dir)
@@ -169,8 +172,10 @@ add_custom_target(libomp-test-deps DEPEN
 set(libomp_expected_library_deps)
 if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
   set(libomp_expected_library_deps libc.so.7 libthr.so.3)
+  libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
 elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
   set(libomp_expected_library_deps libc.so.12 libpthread.so.1 libm.so.0)
+  libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
 elseif(APPLE)
   set(libomp_expected_library_deps /usr/lib/libSystem.B.dylib)
 elseif(WIN32)
@@ -203,6 +208,7 @@ else()
       libomp_append(libomp_expected_library_deps ld64.so.1)
     endif()
     libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY)
+    libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
   endif()
   libomp_append(libomp_expected_library_deps libstdc++.so.6 LIBOMP_USE_STDCPPLIB)
 endif()

Modified: openmp/trunk/runtime/cmake/config-ix.cmake
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/cmake/config-ix.cmake?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/cmake/config-ix.cmake (original)
+++ openmp/trunk/runtime/cmake/config-ix.cmake Mon Nov 30 14:02:59 2015
@@ -12,6 +12,7 @@
 include(CheckCCompilerFlag)
 include(CheckCSourceCompiles)
 include(CheckCXXCompilerFlag)
+include(CheckIncludeFile)
 include(CheckLibraryExists)
 include(CheckIncludeFiles)
 include(LibompCheckLinkerFlag)
@@ -211,3 +212,25 @@ else()
   endif()
 endif()
 
+# Check if HWLOC support is available
+if(${LIBOMP_USE_HWLOC})
+  if(WIN32)
+    set(LIBOMP_HAVE_HWLOC FALSE)
+    libomp_say("Using hwloc not supported on Windows yet")
+  else()
+    set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include)
+    check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H)
+    set(CMAKE_REQUIRED_INCLUDES)
+    check_library_exists(hwloc hwloc_topology_init 
+      ${LIBOMP_HWLOC_INSTALL_DIR}/lib LIBOMP_HAVE_LIBHWLOC)
+    find_library(LIBOMP_HWLOC_LIBRARY hwloc ${LIBOMP_HWLOC_INSTALL_DIR}/lib)
+    get_filename_component(LIBOMP_HWLOC_LIBRARY_DIR ${LIBOMP_HWLOC_LIBRARY} PATH)
+    if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY)
+      set(LIBOMP_HAVE_HWLOC TRUE)
+    else()
+      set(LIBOMP_HAVE_HWLOC FALSE)
+      libomp_say("Could not find hwloc")
+    endif()
+  endif()
+endif()
+

Modified: openmp/trunk/runtime/src/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/CMakeLists.txt?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/CMakeLists.txt (original)
+++ openmp/trunk/runtime/src/CMakeLists.txt Mon Nov 30 14:02:59 2015
@@ -42,6 +42,9 @@ include_directories(
   ${LIBOMP_INC_DIR}
   ${LIBOMP_SRC_DIR}/thirdparty/ittnotify
 )
+if(${LIBOMP_USE_HWLOC})
+  include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
+endif()
 
 # Getting correct source files to build library
 set(LIBOMP_CFILES)

Modified: openmp/trunk/runtime/src/i18n/en_US.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/i18n/en_US.txt?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/i18n/en_US.txt (original)
+++ openmp/trunk/runtime/src/i18n/en_US.txt Mon Nov 30 14:02:59 2015
@@ -405,6 +405,9 @@ AffGranTopGroup              "%1$s: gran
 AffGranGroupType             "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"."
 AffThrPlaceManySockets       "KMP_PLACE_THREADS ignored: too many sockets requested."
 AffThrPlaceDeprecated        "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value."
+AffUsingHwloc                "%1$s: Affinity capable, using hwloc."
+AffIgnoringHwloc             "%1$s: Ignoring hwloc mechanism."
+AffHwlocErrorOccurred        "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."
 
 
 # --------------------------------------------------------------------------------------------------

Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Mon Nov 30 14:02:59 2015
@@ -77,10 +77,18 @@
 
 #include "kmp_os.h"
 
+#include "kmp_safe_c_api.h"
+
 #if KMP_STATS_ENABLED
 class kmp_stats_list;
 #endif
 
+#if KMP_USE_HWLOC
+#include "hwloc.h"
+extern hwloc_topology_t __kmp_hwloc_topology;
+extern int __kmp_hwloc_error;
+#endif
+
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 #include <xmmintrin.h>
 #endif
@@ -488,6 +496,78 @@ extern size_t __kmp_affin_mask_size;
 # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
 # define KMP_CPU_SETSIZE        (__kmp_affin_mask_size * CHAR_BIT)
 
+#if KMP_USE_HWLOC
+
+typedef hwloc_cpuset_t kmp_affin_mask_t;
+# define KMP_CPU_SET(i,mask)       hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
+# define KMP_CPU_ISSET(i,mask)     hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
+# define KMP_CPU_CLR(i,mask)       hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
+# define KMP_CPU_ZERO(mask)        hwloc_bitmap_zero((hwloc_cpuset_t)mask)
+# define KMP_CPU_COPY(dest, src)   hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
+# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
+    { \
+        unsigned i; \
+        for(i=0;i<(unsigned)max_bit_number+1;i++) { \
+            if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
+                hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
+            } else { \
+                hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
+            } \
+        } \
+    } \
+
+# define KMP_CPU_UNION(dest, src)  hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
+# define KMP_CPU_SET_ITERATE(i,mask) \
+    for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))
+
+# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
+# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
+# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
+# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
+# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
+# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
+
+//
+// The following macro should be used to index an array of masks.
+// The array should be declared as "kmp_affinity_t *" and allocated with
+// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact
+// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
+// on Linux* OS, sizeof(kmp_affin_t) is 1.
+//
+# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
+# define KMP_CPU_ALLOC_ARRAY(arr, n) {                                   \
+    arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
+    unsigned i;                                                           \
+    for(i=0;i<(unsigned)n;i++) {                                          \
+        arr[i] = hwloc_bitmap_alloc();                                    \
+    }                                                                     \
+   }
+# define KMP_CPU_FREE_ARRAY(arr, n) { \
+    unsigned i;                        \
+    for(i=0;i<(unsigned)n;i++) {       \
+        hwloc_bitmap_free(arr[i]);     \
+    }                                  \
+    __kmp_free(arr);                   \
+   }
+# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) {                               \
+    arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
+    unsigned i;                                                                \
+    for(i=0;i<(unsigned)n;i++) {                                               \
+        arr[i] = hwloc_bitmap_alloc();                                         \
+    }                                                                          \
+   }
+# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
+    unsigned i;                                 \
+    for(i=0;i<(unsigned)n;i++) {                \
+        hwloc_bitmap_free(arr[i]);              \
+    }                                           \
+    KMP_INTERNAL_FREE(arr);                     \
+   }
+
+#else /* KMP_USE_HWLOC */
+#  define KMP_CPU_SET_ITERATE(i,mask) \
+    for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
+
 # if KMP_OS_LINUX
 //
 // On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
@@ -526,7 +606,7 @@ typedef unsigned char kmp_affin_mask_t;
             }                                                                \
         }
 
-#  define KMP_CPU_COMPLEMENT(mask) \
+#  define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
         {                                                                    \
             size_t __i;                                                      \
             for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
@@ -605,7 +685,7 @@ extern int __kmp_num_proc_groups;
             }                                                                \
         }
 
-#   define KMP_CPU_COMPLEMENT(mask) \
+#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
         {                                                                    \
             int __i;                                                         \
             for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
@@ -637,7 +717,7 @@ extern kmp_SetThreadGroupAffinity_t __km
 
 extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
 
-#  else
+#  else /* KMP_GROUP_AFFINITY */
 
 typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
 
@@ -646,7 +726,7 @@ typedef DWORD kmp_affin_mask_t; /* for c
 #   define KMP_CPU_CLR(i,mask)      (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
 #   define KMP_CPU_ZERO(mask)       (*(mask) = 0)
 #   define KMP_CPU_COPY(dest, src)  (*(dest) = *(src))
-#   define KMP_CPU_COMPLEMENT(mask) (*(mask) = ~*(mask))
+#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask))
 #   define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
 
 #  endif /* KMP_GROUP_AFFINITY */
@@ -660,6 +740,10 @@ typedef DWORD kmp_affin_mask_t; /* for c
 # define KMP_CPU_ALLOC(ptr) \
         (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
 # define KMP_CPU_FREE(ptr) __kmp_free(ptr)
+# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
+# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
+# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
+# define KMP_CPU_INTERNAL_FREE(ptr)  KMP_INTERNAL_FREE(ptr)
 
 //
 // The following macro should be used to index an array of masks.
@@ -670,6 +754,12 @@ typedef DWORD kmp_affin_mask_t; /* for c
 //
 # define KMP_CPU_INDEX(array,i) \
         ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
+# define KMP_CPU_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
+# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
+# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
+# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);
+
+#endif /* KMP_USE_HWLOC */
 
 //
 // Declare local char buffers with this size for printing debug and info
@@ -716,6 +806,9 @@ enum affinity_top_method {
     affinity_top_method_group,
 #endif /* KMP_GROUP_AFFINITY */
     affinity_top_method_flat,
+#if KMP_USE_HWLOC
+    affinity_top_method_hwloc,
+#endif
     affinity_top_method_default
 };
 

Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Mon Nov 30 14:02:59 2015
@@ -50,6 +50,50 @@ void __kmp_get_hierarchy(kmp_uint32 npro
 //
 // Print the affinity mask to the character array in a pretty format.
 //
+#if KMP_USE_HWLOC
+char *
+__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
+{
+    int num_chars_to_write, num_chars_written;
+    char* scan;
+    KMP_ASSERT(buf_len >= 40);
+
+    // bufsize of 0 just retrieves the needed buffer size.
+    num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
+
+    // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
+    // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
+    //   take into account the '\0' character.
+    if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
+        KMP_SNPRINTF(buf, buf_len, "{<empty>}");
+    } else if(num_chars_to_write < buf_len - 3) {
+        // no problem fitting the mask into buf_len number of characters
+        buf[0] = '{';
+        // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
+        num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
+        buf[num_chars_written+1] = '}';
+        buf[num_chars_written+2] = '\0';
+    } else {
+        // Need to truncate the affinity mask string and add ellipsis.
+        // To do this, we first write out the '{' + str(mask)
+        buf[0] = '{';
+        hwloc_bitmap_list_snprintf(buf+1, buf_len-7, (hwloc_bitmap_t)mask);
+        // then, what we do here is go to the 7th to last character, then go backwards until we are NOT
+        // on a digit then write "...}\0".  This way it is a clean ellipsis addition and we don't
+        // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
+        // { 45, 67,...} instead.
+        scan = buf + buf_len - 7;
+        while(*scan >= '0' && *scan <= '9' && scan >= buf)
+            scan--;
+        *(scan+1) = '.';
+        *(scan+2) = '.';
+        *(scan+3) = '.';
+        *(scan+4) = '}';
+        *(scan+5) = '\0';
+    }
+    return buf;
+}
+#else
 char *
 __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
 {
@@ -102,6 +146,7 @@ __kmp_affinity_print_mask(char *buf, int
     KMP_ASSERT(scan <= end);
     return buf;
 }
+#endif // KMP_USE_HWLOC
 
 
 void
@@ -263,6 +308,291 @@ __kmp_affinity_print_topology(AddrUnsPai
     }
 }
 
+#if KMP_USE_HWLOC
+static int
+__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
+  kmp_i18n_id_t *const msg_id)
+{
+    *address2os = NULL;
+    *msg_id = kmp_i18n_null;
+
+    //
+    // Save the affinity mask for the current thread.
+    //
+    kmp_affin_mask_t *oldMask;
+    KMP_CPU_ALLOC(oldMask);
+    __kmp_get_system_affinity(oldMask, TRUE);
+
+    unsigned depth = hwloc_topology_get_depth(__kmp_hwloc_topology);
+    int threadLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_PU);
+    int coreLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
+    int pkgLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
+    __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 0;
+
+    //
+    // This makes an assumption about the topology being four levels:
+    // machines -> packages -> cores -> hardware threads
+    //
+    hwloc_obj_t current_level_iterator = hwloc_get_root_obj(__kmp_hwloc_topology);
+    hwloc_obj_t child_iterator;
+    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
+        child_iterator != NULL;
+        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
+    {
+        nPackages++;
+    }
+    current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, pkgLevel, 0);
+    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
+        child_iterator != NULL;
+        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
+    {
+        nCoresPerPkg++;
+    }
+    current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, coreLevel, 0);
+    for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
+        child_iterator != NULL;
+        child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
+    {
+        __kmp_nThreadsPerCore++;
+    }
+
+    if (! KMP_AFFINITY_CAPABLE())
+    {
+        //
+        // Hack to try and infer the machine topology using only the data
+        // available from cpuid on the current thread, and __kmp_xproc.
+        //
+        KMP_ASSERT(__kmp_affinity_type == affinity_none);
+
+        __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
+        nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            if (__kmp_affinity_uniform_topology()) {
+                KMP_INFORM(Uniform, "KMP_AFFINITY");
+            } else {
+                KMP_INFORM(NonUniform, "KMP_AFFINITY");
+            }
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+        return 0;
+    }
+
+    //
+    // Allocate the data structure to be returned.
+    //
+    AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+
+    unsigned num_hardware_threads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel);
+    unsigned i;
+    hwloc_obj_t hardware_thread_iterator;
+    int nActiveThreads = 0;
+    for(i=0;i<num_hardware_threads;i++) {
+        hardware_thread_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, threadLevel, i);
+        Address addr(3);
+        if(! KMP_CPU_ISSET(i, fullMask)) continue;
+        addr.labels[0] = hardware_thread_iterator->parent->parent->logical_index;
+        addr.labels[1] = hardware_thread_iterator->parent->logical_index % nCoresPerPkg;
+        addr.labels[2] = hardware_thread_iterator->logical_index % __kmp_nThreadsPerCore;
+        retval[nActiveThreads] = AddrUnsPair(addr, hardware_thread_iterator->os_index);
+        nActiveThreads++;
+    }
+
+    //
+    // If there's only one thread context to bind to, return now.
+    //
+    KMP_ASSERT(nActiveThreads > 0);
+    if (nActiveThreads == 1) {
+        __kmp_ncores = nPackages = 1;
+        __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+        if (__kmp_affinity_verbose) {
+            char buf[KMP_AFFIN_MASK_PRINT_LEN];
+            __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+            KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+            if (__kmp_affinity_respect_mask) {
+                KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+            } else {
+                KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+            }
+            KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+            KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+              __kmp_nThreadsPerCore, __kmp_ncores);
+        }
+
+        if (__kmp_affinity_type == affinity_none) {
+            __kmp_free(retval);
+            KMP_CPU_FREE(oldMask);
+            return 0;
+        }
+
+        //
+        // Form an Address object which only includes the package level.
+        //
+        Address addr(1);
+        addr.labels[0] = retval[0].first.labels[pkgLevel-1];
+        retval[0].first = addr;
+
+        if (__kmp_affinity_gran_levels < 0) {
+            __kmp_affinity_gran_levels = 0;
+        }
+
+        if (__kmp_affinity_verbose) {
+            __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
+        }
+
+        *address2os = retval;
+        KMP_CPU_FREE(oldMask);
+        return 1;
+    }
+
+    //
+    // Sort the table by physical Id.
+    //
+    qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
+
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    // correctly, and return if affinity is not enabled.
+    //
+    __kmp_ncores = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, coreLevel);
+
+    //
+    // Check to see if the machine topology is uniform
+    //
+    unsigned npackages = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, pkgLevel);
+    unsigned ncores = __kmp_ncores;
+    unsigned nthreads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel);
+    unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
+
+    //
+    // Print the machine topology summary.
+    //
+    if (__kmp_affinity_verbose) {
+        char mask[KMP_AFFIN_MASK_PRINT_LEN];
+        __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+        KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+        if (__kmp_affinity_respect_mask) {
+            KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
+        } else {
+            KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
+        }
+        KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+        if (uniform) {
+            KMP_INFORM(Uniform, "KMP_AFFINITY");
+        } else {
+            KMP_INFORM(NonUniform, "KMP_AFFINITY");
+        }
+
+        kmp_str_buf_t buf;
+        __kmp_str_buf_init(&buf);
+
+        __kmp_str_buf_print(&buf, "%d", npackages);
+        //for (level = 1; level <= pkgLevel; level++) {
+        //    __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
+       // }
+        KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
+          __kmp_nThreadsPerCore, __kmp_ncores);
+
+        __kmp_str_buf_free(&buf);
+    }
+
+    if (__kmp_affinity_type == affinity_none) {
+        KMP_CPU_FREE(oldMask);
+        return 0;
+    }
+
+    //
+    // Find any levels with radiix 1, and remove them from the map
+    // (except for the package level).
+    //
+    int new_depth = 0;
+    int level;
+    unsigned proc;
+    for (level = 1; level < (int)depth; level++) {
+        if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) {
+           continue;
+        }
+        new_depth++;
+    }
+
+    //
+    // If we are removing any levels, allocate a new vector to return,
+    // and copy the relevant information to it.
+    //
+    if (new_depth != depth-1) {
+        AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
+          sizeof(AddrUnsPair) * nActiveThreads);
+        for (proc = 0; (int)proc < nActiveThreads; proc++) {
+            Address addr(new_depth);
+            new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
+        }
+        int new_level = 0;
+        for (level = 1; level < (int)depth; level++) {
+            if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) {
+               if (level == threadLevel) {
+                   threadLevel = -1;
+               }
+               else if ((threadLevel >= 0) && (level < threadLevel)) {
+                   threadLevel--;
+               }
+               if (level == coreLevel) {
+                   coreLevel = -1;
+               }
+               else if ((coreLevel >= 0) && (level < coreLevel)) {
+                   coreLevel--;
+               }
+               if (level < pkgLevel) {
+                   pkgLevel--;
+               }
+               continue;
+            }
+            for (proc = 0; (int)proc < nActiveThreads; proc++) {
+                new_retval[proc].first.labels[new_level]
+                  = retval[proc].first.labels[level];
+            }
+            new_level++;
+        }
+
+        __kmp_free(retval);
+        retval = new_retval;
+        depth = new_depth;
+    }
+
+    if (__kmp_affinity_gran_levels < 0) {
+        //
+        // Set the granularity level based on what levels are modeled
+        // in the machine topology map.
+        //
+        __kmp_affinity_gran_levels = 0;
+        if ((threadLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if ((coreLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+            __kmp_affinity_gran_levels++;
+        }
+        if (__kmp_affinity_gran > affinity_gran_package) {
+            __kmp_affinity_gran_levels++;
+        }
+    }
+
+    if (__kmp_affinity_verbose) {
+        __kmp_affinity_print_topology(retval, nActiveThreads, depth-1, pkgLevel-1,
+          coreLevel-1, threadLevel-1);
+    }
+
+    KMP_CPU_FREE(oldMask);
+    *address2os = retval;
+    if(depth == 0) return 0;
+    else return depth-1;
+}
+#endif // KMP_USE_HWLOC
 
 //
 // If we don't know how to retrieve the machine's processor topology, or
@@ -329,7 +659,7 @@ __kmp_affinity_create_flat_map(AddrUnsPa
       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
     int avail_ct = 0;
     unsigned int i;
-    for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
+    KMP_CPU_SET_ITERATE(i, fullMask) {
         //
         // Skip this proc if it is not included in the machine model.
         //
@@ -394,7 +724,7 @@ __kmp_affinity_create_proc_group_map(Add
       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
     int avail_ct = 0;
     int i;
-    for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
+    KMP_CPU_SET_ITERATE(i, fullMask) {
         //
         // Skip this proc if it is not included in the machine model.
         //
@@ -656,7 +986,7 @@ __kmp_affinity_create_apicid_map(AddrUns
     apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
       __kmp_avail_proc * sizeof(apicThreadInfo));
     unsigned nApics = 0;
-    for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
+    KMP_CPU_SET_ITERATE(i, fullMask) {
         //
         // Skip this proc if it is not included in the machine model.
         //
@@ -1167,7 +1497,7 @@ __kmp_affinity_create_x2apicid_map(AddrU
     //
     unsigned int proc;
     int nApics = 0;
-    for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
+    KMP_CPU_SET_ITERATE(proc, fullMask) {
         //
         // Skip this proc if it is not included in the machine model.
         //
@@ -2282,8 +2612,8 @@ __kmp_create_masks(unsigned *maxIndex, u
             maxOsId = osId;
         }
     }
-    kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
-      (maxOsId + 1) * __kmp_affin_mask_size);
+    kmp_affin_mask_t *osId2Mask;
+    KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
 
     //
     // Sort the address2os table according to physical order.  Doing so
@@ -2314,8 +2644,8 @@ __kmp_create_masks(unsigned *maxIndex, u
     unsigned j = 0;                             // index of 1st thread on core
     unsigned leader = 0;
     Address *leaderAddr = &(address2os[0].first);
-    kmp_affin_mask_t *sum
-      = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
+    kmp_affin_mask_t *sum;
+    KMP_CPU_ALLOC_ON_STACK(sum);
     KMP_CPU_ZERO(sum);
     KMP_CPU_SET(address2os[0].second, sum);
     for (i = 1; i < numAddrs; i++) {
@@ -2365,6 +2695,7 @@ __kmp_create_masks(unsigned *maxIndex, u
         address2os[j].first.leader = (j == leader);
     }
     unique++;
+    KMP_CPU_FREE_FROM_STACK(sum);
 
     *maxIndex = maxOsId;
     *numUnique = unique;
@@ -2384,9 +2715,17 @@ static int nextNewMask;
 #define ADD_MASK(_mask) \
     {                                                                   \
         if (nextNewMask >= numNewMasks) {                               \
+            int i;                                                      \
             numNewMasks *= 2;                                           \
-            newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
-              numNewMasks * __kmp_affin_mask_size);                     \
+            kmp_affin_mask_t* temp;                                     \
+            KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks);            \
+            for(i=0;i<numNewMasks/2;i++) {                              \
+                kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);    \
+                kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i);        \
+                KMP_CPU_COPY(dest, src);                                \
+            }                                                           \
+            KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2);       \
+            newMasks = temp;                                            \
         }                                                               \
         KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask));    \
         nextNewMask++;                                                  \
@@ -2416,6 +2755,7 @@ __kmp_affinity_process_proclist(kmp_affi
   unsigned int *out_numMasks, const char *proclist,
   kmp_affin_mask_t *osId2Mask, int maxOsId)
 {
+    int i;
     const char *scan = proclist;
     const char *next = proclist;
 
@@ -2424,11 +2764,10 @@ __kmp_affinity_process_proclist(kmp_affi
     // so that we can use realloc() to extend it.
     //
     numNewMasks = 2;
-    newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
-      * __kmp_affin_mask_size);
+    KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
     nextNewMask = 0;
-    kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
-      __kmp_affin_mask_size);
+    kmp_affin_mask_t *sumMask;
+    KMP_CPU_ALLOC(sumMask);
     int setSize = 0;
 
     for (;;) {
@@ -2632,14 +2971,17 @@ __kmp_affinity_process_proclist(kmp_affi
     *out_numMasks = nextNewMask;
     if (nextNewMask == 0) {
         *out_masks = NULL;
-        KMP_INTERNAL_FREE(newMasks);
+        KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
         return;
     }
-    *out_masks
-      = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
-    KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
-    __kmp_free(sumMask);
-    KMP_INTERNAL_FREE(newMasks);
+    KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
+    for(i = 0; i < nextNewMask; i++) {
+        kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);
+        kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
+        KMP_CPU_COPY(dest, src);
+    }
+    KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+    KMP_CPU_FREE(sumMask);
 }
 
 
@@ -2834,7 +3176,7 @@ __kmp_process_place(const char **scan, k
     else if (**scan == '!') {
         (*scan)++;      // skip '!'
         __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
-        KMP_CPU_COMPLEMENT(tempMask);
+        KMP_CPU_COMPLEMENT(maxOsId, tempMask);
     }
     else if ((**scan >= '0') && (**scan <= '9')) {
         next = *scan;
@@ -2866,17 +3208,23 @@ __kmp_affinity_process_placelist(kmp_aff
   unsigned int *out_numMasks, const char *placelist,
   kmp_affin_mask_t *osId2Mask, int maxOsId)
 {
+    int i,j,count,stride,sign;
     const char *scan = placelist;
     const char *next = placelist;
 
     numNewMasks = 2;
-    newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
-      * __kmp_affin_mask_size);
+    KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
     nextNewMask = 0;
 
-    kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
-      __kmp_affin_mask_size);
+    // tempMask is modified based on the previous or initial
+    //   place to form the current place
+    // previousMask contains the previous place
+    kmp_affin_mask_t *tempMask;
+    kmp_affin_mask_t *previousMask;
+    KMP_CPU_ALLOC(tempMask);
     KMP_CPU_ZERO(tempMask);
+    KMP_CPU_ALLOC(previousMask);
+    KMP_CPU_ZERO(previousMask);
     int setSize = 0;
 
     for (;;) {
@@ -2910,7 +3258,7 @@ __kmp_affinity_process_placelist(kmp_aff
           "bad explicit places list");
         next = scan;
         SKIP_DIGITS(next);
-        int count = __kmp_str_to_int(scan, *next);
+        count = __kmp_str_to_int(scan, *next);
         KMP_ASSERT(count >= 0);
         scan = next;
 
@@ -2918,7 +3266,6 @@ __kmp_affinity_process_placelist(kmp_aff
         // valid follow sets are ',' ':' and EOL
         //
         SKIP_WS(scan);
-        int stride;
         if (*scan == '\0' || *scan == ',') {
             stride = +1;
         }
@@ -2929,7 +3276,7 @@ __kmp_affinity_process_placelist(kmp_aff
             //
             // Read stride parameter
             //
-            int sign = +1;
+            sign = +1;
             for (;;) {
                 SKIP_WS(scan);
                 if (*scan == '+') {
@@ -2954,66 +3301,30 @@ __kmp_affinity_process_placelist(kmp_aff
             stride *= sign;
         }
 
-        if (stride > 0) {
-            int i;
-            for (i = 0; i < count; i++) {
-                int j;
-                if (setSize == 0) {
-                    break;
-                }
-                ADD_MASK(tempMask);
-                setSize = 0;
-                for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
-                    if (! KMP_CPU_ISSET(j - stride, tempMask)) {
-                        KMP_CPU_CLR(j, tempMask);
-                    }
-                    else if ((j > maxOsId) ||
-                      (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
-                        if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
-                          && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
-                            KMP_WARNING(AffIgnoreInvalidProcID, j);
-                        }
-                        KMP_CPU_CLR(j, tempMask);
-                    }
-                    else {
-                        KMP_CPU_SET(j, tempMask);
-                        setSize++;
-                    }
-                }
-                for (; j >= 0; j--) {
-                    KMP_CPU_CLR(j, tempMask);
-                }
+        // Add places determined by initial_place : count : stride
+        for (i = 0; i < count; i++) {
+            if (setSize == 0) {
+                break;
             }
-        }
-        else {
-            int i;
-            for (i = 0; i < count; i++) {
-                int j;
-                if (setSize == 0) {
-                    break;
+            // Add the current place, then build the next place (tempMask) from that
+            KMP_CPU_COPY(previousMask, tempMask);
+            ADD_MASK(previousMask);
+            KMP_CPU_ZERO(tempMask);
+            setSize = 0;
+            KMP_CPU_SET_ITERATE(j, previousMask) {
+                if (! KMP_CPU_ISSET(j, previousMask)) {
+                    continue;
                 }
-                ADD_MASK(tempMask);
-                setSize = 0;
-                for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
-                  j++) {
-                    if (! KMP_CPU_ISSET(j - stride, tempMask)) {
-                        KMP_CPU_CLR(j, tempMask);
-                    }
-                    else if ((j > maxOsId) ||
-                      (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
-                        if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
-                          && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
-                            KMP_WARNING(AffIgnoreInvalidProcID, j);
-                        }
-                        KMP_CPU_CLR(j, tempMask);
-                    }
-                    else {
-                        KMP_CPU_SET(j, tempMask);
-                        setSize++;
+                else if ((j+stride > maxOsId) || (j+stride < 0) ||
+                  (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
+                    if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
+                      && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
+                        KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
                     }
                 }
-                for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
-                    KMP_CPU_CLR(j, tempMask);
+                else {
+                    KMP_CPU_SET(j+stride, tempMask);
+                    setSize++;
                 }
             }
         }
@@ -3038,14 +3349,18 @@ __kmp_affinity_process_placelist(kmp_aff
     *out_numMasks = nextNewMask;
     if (nextNewMask == 0) {
         *out_masks = NULL;
-        KMP_INTERNAL_FREE(newMasks);
+        KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
         return;
     }
-    *out_masks
-      = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
-    KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
-    __kmp_free(tempMask);
-    KMP_INTERNAL_FREE(newMasks);
+    KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
+    KMP_CPU_FREE(tempMask);
+    KMP_CPU_FREE(previousMask);
+    for(i = 0; i < nextNewMask; i++) {
+        kmp_affin_mask_t* src  = KMP_CPU_INDEX(newMasks, i);
+        kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
+        KMP_CPU_COPY(dest, src);
+    }
+    KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
 }
 
 # endif /* OMP_40_ENABLED */
@@ -3140,7 +3455,7 @@ __kmp_aux_affinity_initialize(void)
     // processors that we know about on the machine.
     //
     if (fullMask == NULL) {
-        fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
+        KMP_CPU_ALLOC(fullMask);
     }
     if (KMP_AFFINITY_CAPABLE()) {
         if (__kmp_affinity_respect_mask) {
@@ -3151,7 +3466,7 @@ __kmp_aux_affinity_initialize(void)
             //
             unsigned i;
             __kmp_avail_proc = 0;
-            for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
+            KMP_CPU_SET_ITERATE(i, fullMask) {
                 if (! KMP_CPU_ISSET(i, fullMask)) {
                     continue;
                 }
@@ -3193,39 +3508,60 @@ __kmp_aux_affinity_initialize(void)
         //
         const char *file_name = NULL;
         int line = 0;
-
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64
-
-        if (__kmp_affinity_verbose) {
-            KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
+# if KMP_USE_HWLOC
+        if (depth < 0) {
+            if (__kmp_affinity_verbose) {
+                KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+            }
+            if(!__kmp_hwloc_error) {
+                depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
+                if (depth == 0) {
+                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                    KMP_ASSERT(address2os == NULL);
+                    return;
+                } else if(depth < 0 && __kmp_affinity_verbose) {
+                    KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+                }
+            } else if(__kmp_affinity_verbose) {
+                KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+            }
         }
+# endif
 
-        file_name = NULL;
-        depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
-        if (depth == 0) {
-            KMP_ASSERT(__kmp_affinity_type == affinity_none);
-            KMP_ASSERT(address2os == NULL);
-            return;
-        }
+# if KMP_ARCH_X86 || KMP_ARCH_X86_64
 
         if (depth < 0) {
             if (__kmp_affinity_verbose) {
-                if (msg_id != kmp_i18n_null) {
-                    KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
-                      KMP_I18N_STR(DecodingLegacyAPIC));
-                }
-                else {
-                    KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
-                }
+                KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
             }
 
             file_name = NULL;
-            depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
+            depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
             if (depth == 0) {
                 KMP_ASSERT(__kmp_affinity_type == affinity_none);
                 KMP_ASSERT(address2os == NULL);
                 return;
             }
+
+            if (depth < 0) {
+                if (__kmp_affinity_verbose) {
+                    if (msg_id != kmp_i18n_null) {
+                        KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
+                          KMP_I18N_STR(DecodingLegacyAPIC));
+                    }
+                    else {
+                        KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
+                    }
+                }
+
+                file_name = NULL;
+                depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
+                if (depth == 0) {
+                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
+                    KMP_ASSERT(address2os == NULL);
+                    return;
+                }
+            }
         }
 
 # endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
@@ -3430,6 +3766,50 @@ __kmp_aux_affinity_initialize(void)
         KMP_ASSERT(address2os != NULL);
     }
 
+# if KMP_USE_HWLOC
+    else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+        if (__kmp_affinity_verbose) {
+            KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+        }
+        depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
+        if (depth == 0) {
+            KMP_ASSERT(__kmp_affinity_type == affinity_none);
+            KMP_ASSERT(address2os == NULL);
+            return;
+        }
+#  if KMP_DEBUG
+        AddrUnsPair *otheraddress2os = NULL;
+        int otherdepth = -1;
+#   if KMP_MIC
+        otherdepth = __kmp_affinity_create_apicid_map(&otheraddress2os, &msg_id);
+#   else
+        otherdepth = __kmp_affinity_create_x2apicid_map(&otheraddress2os, &msg_id);
+#   endif
+        if(otheraddress2os != NULL && address2os != NULL) {
+            int i;
+            unsigned arent_equal_flag = 0;
+            for(i=0;i<__kmp_avail_proc;i++) {
+                if(otheraddress2os[i] != address2os[i]) arent_equal_flag = 1;
+            }
+            if(arent_equal_flag) {
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are different from APICID\n"));
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: APICID Table:\n"));
+                for(i=0;i<__kmp_avail_proc;i++) {
+                    otheraddress2os[i].print(); __kmp_printf("\n");
+                }
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc Table:\n"));
+                for(i=0;i<__kmp_avail_proc;i++) {
+                    address2os[i].print(); __kmp_printf("\n");
+                }
+            }
+            else {
+                KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are same as APICID\n"));
+            }
+        }
+#  endif // KMP_DEBUG
+    }
+# endif // KMP_USE_HWLOC
+
     if (address2os == NULL) {
         if (KMP_AFFINITY_CAPABLE()
           && (__kmp_affinity_verbose || (__kmp_affinity_warnings
@@ -3608,8 +3988,7 @@ __kmp_aux_affinity_initialize(void)
         }
 # endif
 
-        __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
-          __kmp_affinity_num_masks * __kmp_affin_mask_size);
+        KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
 
         //
         // Sort the address2os table according to the current setting of
@@ -3679,7 +4058,7 @@ void
 __kmp_affinity_uninitialize(void)
 {
     if (__kmp_affinity_masks != NULL) {
-        __kmp_free(__kmp_affinity_masks);
+        KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
         __kmp_affinity_masks = NULL;
     }
     if (fullMask != NULL) {
@@ -3909,7 +4288,7 @@ __kmp_aux_set_affinity(void **mask)
             unsigned proc;
             int num_procs = 0;
 
-            for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
+            KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
                 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
                     continue;
                 }
@@ -4027,7 +4406,11 @@ __kmp_aux_set_affinity_mask_proc(int pro
         }
     }
 
-    if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
+    if ((proc < 0)
+# if !KMP_USE_HWLOC
+         || ((unsigned)proc >= KMP_CPU_SETSIZE)
+# endif
+       ) {
         return -1;
     }
     if (! KMP_CPU_ISSET(proc, fullMask)) {
@@ -4063,7 +4446,11 @@ __kmp_aux_unset_affinity_mask_proc(int p
         }
     }
 
-    if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
+    if ((proc < 0)
+# if !KMP_USE_HWLOC
+         || ((unsigned)proc >= KMP_CPU_SETSIZE)
+# endif
+       ) {
         return -1;
     }
     if (! KMP_CPU_ISSET(proc, fullMask)) {
@@ -4099,8 +4486,12 @@ __kmp_aux_get_affinity_mask_proc(int pro
         }
     }
 
-    if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
-        return 0;
+    if ((proc < 0)
+# if !KMP_USE_HWLOC
+         || ((unsigned)proc >= KMP_CPU_SETSIZE)
+# endif
+       ) {
+        return -1;
     }
     if (! KMP_CPU_ISSET(proc, fullMask)) {
         return 0;
@@ -4137,7 +4528,8 @@ void __kmp_balanced_affinity( int tid, i
         KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
           "Illegal set affinity operation when not capable");
 
-        kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
+        kmp_affin_mask_t *mask;
+        KMP_CPU_ALLOC_ON_STACK(mask);
         KMP_CPU_ZERO(mask);
 
         // Granularity == thread
@@ -4158,9 +4550,11 @@ void __kmp_balanced_affinity( int tid, i
               tid, buf);
         }
         __kmp_set_system_affinity( mask, TRUE );
+        KMP_CPU_FREE_FROM_STACK(mask);
     } else { // Non-uniform topology
 
-        kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
+        kmp_affin_mask_t *mask;
+        KMP_CPU_ALLOC_ON_STACK(mask);
         KMP_CPU_ZERO(mask);
 
         // Number of hyper threads per core in HT machine
@@ -4334,6 +4728,7 @@ void __kmp_balanced_affinity( int tid, i
               tid, buf);
         }
         __kmp_set_system_affinity( mask, TRUE );
+        KMP_CPU_FREE_FROM_STACK(mask);
     }
 }
 

Modified: openmp/trunk/runtime/src/kmp_affinity.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.h?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.h (original)
+++ openmp/trunk/runtime/src/kmp_affinity.h Mon Nov 30 14:02:59 2015
@@ -57,6 +57,13 @@ public:
     bool operator!=(const Address &b) const {
         return !operator==(b);
     }
+    void print() const {
+        unsigned i;
+        printf("Depth: %u --- ", depth);
+        for(i=0;i<depth;i++) {
+            printf("%u ", labels[i]);
+        }
+    }
 };
 
 class AddrUnsPair {
@@ -72,6 +79,18 @@ public:
         second = b.second;
         return *this;
     }
+    void print() const {
+        printf("first = "); first.print();
+        printf(" --- second = %u", second);
+    }
+    bool operator==(const AddrUnsPair &b) const {
+        if(first != b.first) return false;
+        if(second != b.second) return false;
+        return true;
+    }
+    bool operator!=(const AddrUnsPair &b) const {
+        return !operator==(b);
+    }
 };
 
 

Modified: openmp/trunk/runtime/src/kmp_config.h.cmake
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_config.h.cmake?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_config.h.cmake (original)
+++ openmp/trunk/runtime/src/kmp_config.h.cmake Mon Nov 30 14:02:59 2015
@@ -51,6 +51,8 @@
 #cmakedefine01 LIBOMP_ENABLE_ASSERTIONS
 #define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS
 #cmakedefine01 STUBS_LIBRARY
+#cmakedefine01 LIBOMP_USE_HWLOC
+#define KMP_USE_HWLOC LIBOMP_USE_HWLOC
 #define KMP_ARCH_STR "@LIBOMP_LEGAL_ARCH@"
 #define KMP_LIBRARY_FILE "@LIBOMP_LIB_FILE@"
 #define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@

Modified: openmp/trunk/runtime/src/kmp_ftn_entry.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_ftn_entry.h?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_ftn_entry.h (original)
+++ openmp/trunk/runtime/src/kmp_ftn_entry.h Mon Nov 30 14:02:59 2015
@@ -257,7 +257,7 @@ FTN_GET_AFFINITY_MAX_PROC( void )
             return 0;
         }
 
-    #if KMP_GROUP_AFFINITY
+    #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC
         if ( __kmp_num_proc_groups > 1 ) {
             return (int)KMP_CPU_SETSIZE;
         }
@@ -278,7 +278,11 @@ FTN_CREATE_AFFINITY_MASK( void **mask )
         if ( ! TCR_4(__kmp_init_middle) ) {
             __kmp_middle_initialize();
         }
+    # if KMP_USE_HWLOC
+        *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc();
+    # else
         *mask = kmpc_malloc( __kmp_affin_mask_size );
+    # endif
         KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) );
     #endif
 }
@@ -300,7 +304,11 @@ FTN_DESTROY_AFFINITY_MASK( void **mask )
 	        KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" );
 	    }
         }
+    # if KMP_USE_HWLOC
+        hwloc_bitmap_free((hwloc_cpuset_t)(*mask));
+    # else
         kmpc_free( *mask );
+    # endif
         *mask = NULL;
     #endif
 }

Modified: openmp/trunk/runtime/src/kmp_global.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.c?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.c (original)
+++ openmp/trunk/runtime/src/kmp_global.c Mon Nov 30 14:02:59 2015
@@ -33,6 +33,10 @@ __thread kmp_stats_list* __kmp_stats_thr
 // gives reference tick for all events (considered the 0 tick)
 tsc_tick_count __kmp_stats_start_time;
 #endif
+#if KMP_USE_HWLOC
+int __kmp_hwloc_error = FALSE;
+hwloc_topology_t __kmp_hwloc_topology = NULL;
+#endif
 
 /* ----------------------------------------------------- */
 /* INITIALIZATION VARIABLES */

Modified: openmp/trunk/runtime/src/kmp_settings.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_settings.c?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_settings.c (original)
+++ openmp/trunk/runtime/src/kmp_settings.c Mon Nov 30 14:02:59 2015
@@ -3009,6 +3009,11 @@ __kmp_stg_parse_topology_method( char co
     else if ( __kmp_str_match( "flat", 1, value ) ) {
         __kmp_affinity_top_method = affinity_top_method_flat;
     }
+# if KMP_USE_HWLOC
+    else if ( __kmp_str_match( "hwloc", 1, value) ) {
+        __kmp_affinity_top_method = affinity_top_method_hwloc;
+    }
+# endif
     else {
         KMP_WARNING( StgInvalidValue, name, value );
     }
@@ -5119,11 +5124,43 @@ __kmp_env_initialize( char const * strin
         // affinity.
         //
         const char *var = "KMP_AFFINITY";
+# if KMP_USE_HWLOC
+        if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
+            __kmp_hwloc_error = TRUE;
+            if(__kmp_affinity_verbose)
+                KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
+        }
+        hwloc_topology_ignore_type(__kmp_hwloc_topology, HWLOC_OBJ_CACHE);
+# endif
         if ( __kmp_affinity_type == affinity_disabled ) {
             KMP_AFFINITY_DISABLE();
         }
         else if ( ! KMP_AFFINITY_CAPABLE() ) {
+# if KMP_USE_HWLOC
+            const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
+            if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
+                __kmp_hwloc_error = TRUE;
+                if(__kmp_affinity_verbose)
+                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
+            }
+            // Is the system capable of setting/getting this thread's affinity?
+            // also, is topology discovery possible? (pu indicates ability to discover processing units)
+            // and finally, were there no errors when calling any hwloc_* API functions?
+            if(topology_support->cpubind->set_thisthread_cpubind &&
+               topology_support->cpubind->get_thisthread_cpubind &&
+               topology_support->discovery->pu &&
+               !__kmp_hwloc_error)
+            {
+                // enables affinity according to KMP_AFFINITY_CAPABLE() macro
+                KMP_AFFINITY_ENABLE(TRUE);
+            } else {
+                // indicate that hwloc didn't work and disable affinity
+                __kmp_hwloc_error = TRUE;
+                KMP_AFFINITY_DISABLE();
+            }
+# else
             __kmp_affinity_determine_capable( var );
+# endif // KMP_USE_HWLOC
             if ( ! KMP_AFFINITY_CAPABLE() ) {
                 if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings
                   && ( __kmp_affinity_type != affinity_default )

Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Mon Nov 30 14:02:59 2015
@@ -175,8 +175,11 @@ __kmp_set_system_affinity( kmp_affin_mas
 {
     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
       "Illegal set affinity operation when not capable");
-
+#if KMP_USE_HWLOC
+    int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
+#else
     int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
+#endif
     if (retval >= 0) {
         return 0;
     }
@@ -198,7 +201,11 @@ __kmp_get_system_affinity( kmp_affin_mas
     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
       "Illegal get affinity operation when not capable");
 
+#if KMP_USE_HWLOC
+    int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
+#else
     int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
+#endif
     if (retval >= 0) {
         return 0;
     }
@@ -220,10 +227,12 @@ __kmp_affinity_bind_thread( int which )
     KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
       "Illegal set affinity operation when not capable");
 
-    kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
+    kmp_affin_mask_t *mask;
+    KMP_CPU_ALLOC_ON_STACK(mask);
     KMP_CPU_ZERO(mask);
     KMP_CPU_SET(which, mask);
     __kmp_set_system_affinity(mask, TRUE);
+    KMP_CPU_FREE_FROM_STACK(mask);
 }
 
 /*

Modified: openmp/trunk/runtime/test/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/CMakeLists.txt?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/test/CMakeLists.txt (original)
+++ openmp/trunk/runtime/test/CMakeLists.txt Mon Nov 30 14:02:59 2015
@@ -1,12 +1,23 @@
 # CMakeLists.txt file for unit testing OpenMP Library
 include(FindPythonInterp)
 include(CheckTypeSize)
+
 if(NOT PYTHONINTERP_FOUND)
   libomp_warning_say("Could not find Python.")
   libomp_warning_say("The check-libomp target will not be available!")
   return()
 endif()
 
+macro(pythonize_bool var)
+  if (${var})
+    set(${var} True)
+  else()
+    set(${var} False)
+  endif()
+endmacro()
+
+pythonize_bool(LIBOMP_USE_HWLOC)
+
 set(LIBOMP_TEST_CFLAGS "" CACHE STRING
   "Extra compiler flags to send to the test compiler")
 

Modified: openmp/trunk/runtime/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/lit.cfg?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/test/lit.cfg (original)
+++ openmp/trunk/runtime/test/lit.cfg Mon Nov 30 14:02:59 2015
@@ -9,11 +9,20 @@ if 'PYLINT_IMPORT' in os.environ:
     config = object()
     lit_config = object()
 
-def append_dynamic_library_path(name, value, sep):
+def append_dynamic_library_path(path):
+    if config.operating_system == 'Windows':
+        name = 'PATH'
+        sep = ';'
+    elif config.operating_system == 'Darwin':
+        name = 'DYLD_LIBRARY_PATH'
+        sep = ':'
+    else:
+        name = 'LD_LIBRARY_PATH'
+        sep = ':'
     if name in config.environment:
-        config.environment[name] = value + sep + config.environment[name]
+        config.environment[name] = path + sep + config.environment[name]
     else:
-        config.environment[name] = value
+        config.environment[name] = path
 
 # name: The name of this test suite.
 config.name = 'libomp'
@@ -38,13 +47,15 @@ config.test_cflags = config.test_openmp_
     " " + config.test_extra_cflags
 
 # Setup environment to find dynamic library at runtime
-if config.operating_system == 'Windows':
-    append_dynamic_library_path('PATH', config.library_dir, ";")
-elif config.operating_system == 'Darwin':
-    append_dynamic_library_path('DYLD_LIBRARY_PATH', config.library_dir, ":")
+append_dynamic_library_path(config.library_dir)
+if config.using_hwloc:
+    append_dynamic_library_path(config.hwloc_library_dir)
+
+# Rpath modifications for Darwin
+if config.operating_system == 'Darwin':
     config.test_cflags += " -Wl,-rpath," + config.library_dir
-else: # Unices
-    append_dynamic_library_path('LD_LIBRARY_PATH', config.library_dir, ":")
+    if config.using_hwloc:
+        config.test_cflags += " -Wl,-rpath," + config.hwloc_library_dir
 
 # substitutions
 config.substitutions.append(("%libomp-compile-and-run", \

Modified: openmp/trunk/runtime/test/lit.site.cfg.in
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/lit.site.cfg.in?rev=254320&r1=254319&r2=254320&view=diff
==============================================================================
--- openmp/trunk/runtime/test/lit.site.cfg.in (original)
+++ openmp/trunk/runtime/test/lit.site.cfg.in Mon Nov 30 14:02:59 2015
@@ -7,6 +7,8 @@ config.libomp_obj_root = "@CMAKE_CURRENT
 config.library_dir = "@LIBOMP_LIBRARY_DIR@"
 config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
 config.operating_system = "@CMAKE_SYSTEM_NAME@"
+config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@"
+config.using_hwloc = @LIBOMP_USE_HWLOC@
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@LIBOMP_BASE_DIR@/test/lit.cfg")




More information about the Openmp-commits mailing list