[Openmp-commits] [openmp] r286890 - Introduce dynamic affinity dispatch capabilities

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Mon Nov 14 13:08:36 PST 2016


Author: jlpeyton
Date: Mon Nov 14 15:08:35 2016
New Revision: 286890

URL: http://llvm.org/viewvc/llvm-project?rev=286890&view=rev
Log:
Introduce dynamic affinity dispatch capabilities

This set of changes enables the affinity interface (Either the preexisting
native operating system or HWLOC) to be dynamically set at runtime
initialization. The point of this change is that we were seeing performance
degradations when using HWLOC. This allows the user to use the old affinity
mechanisms which on large machines (>64 cores) makes a large difference in
initialization time.

These changes mostly move affinity code under a small class hierarchy:

KMPAffinity
  class Mask {}
KMPNativeAffinity : public KMPAffinity
  class Mask : public KMPAffinity::Mask
KMPHwlocAffinity
  class Mask : public KMPAffinity::Mask

Since all interface functions (for both affinity and the mask implementation)
are virtual, the implementation can be chosen at runtime initialization.

Differential Revision: https://reviews.llvm.org/D26356

Modified:
    openmp/trunk/runtime/cmake/LibompHandleFlags.cmake
    openmp/trunk/runtime/cmake/config-ix.cmake
    openmp/trunk/runtime/src/kmp.h
    openmp/trunk/runtime/src/kmp_affinity.cpp
    openmp/trunk/runtime/src/kmp_affinity.h
    openmp/trunk/runtime/src/kmp_ftn_cdecl.c
    openmp/trunk/runtime/src/kmp_ftn_entry.h
    openmp/trunk/runtime/src/kmp_ftn_extra.c
    openmp/trunk/runtime/src/kmp_global.c
    openmp/trunk/runtime/src/kmp_runtime.c
    openmp/trunk/runtime/src/kmp_settings.c
    openmp/trunk/runtime/src/z_Linux_util.c
    openmp/trunk/runtime/src/z_Windows_NT_util.c

Modified: openmp/trunk/runtime/cmake/LibompHandleFlags.cmake
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/cmake/LibompHandleFlags.cmake?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/cmake/LibompHandleFlags.cmake (original)
+++ openmp/trunk/runtime/cmake/LibompHandleFlags.cmake Mon Nov 14 15:08:35 2016
@@ -28,6 +28,7 @@ function(libomp_get_c_and_cxxflags_commo
   set(flags_local)
   libomp_append(flags_local -std=c++11 LIBOMP_HAVE_STD_CPP11_FLAG)
   libomp_append(flags_local -fno-exceptions LIBOMP_HAVE_FNO_EXCEPTIONS_FLAG)
+  libomp_append(flags_local -fno-rtti LIBOMP_HAVE_FNO_RTTI_FLAG)
   if(${LIBOMP_ENABLE_WERROR})
     libomp_append(flags_local -Werror LIBOMP_HAVE_WERROR_FLAG)
   endif()

Modified: openmp/trunk/runtime/cmake/config-ix.cmake
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/cmake/config-ix.cmake?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/cmake/config-ix.cmake (original)
+++ openmp/trunk/runtime/cmake/config-ix.cmake Mon Nov 14 15:08:35 2016
@@ -49,6 +49,7 @@ endfunction()
 # Checking C, CXX, Linker Flags
 check_cxx_compiler_flag(-std=c++11 LIBOMP_HAVE_STD_CPP11_FLAG)
 check_cxx_compiler_flag(-fno-exceptions LIBOMP_HAVE_FNO_EXCEPTIONS_FLAG)
+check_cxx_compiler_flag(-fno-rtti LIBOMP_HAVE_FNO_RTTI_FLAG)
 check_c_compiler_flag("-x c++" LIBOMP_HAVE_X_CPP_FLAG)
 check_c_compiler_flag(-Werror LIBOMP_HAVE_WERROR_FLAG)
 check_c_compiler_flag(-Wunused-function LIBOMP_HAVE_WNO_UNUSED_FUNCTION_FLAG)

Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Mon Nov 14 15:08:35 2016
@@ -528,8 +528,8 @@ typedef int PACKED_REDUCTION_METHOD_T;
  */
 #if KMP_AFFINITY_SUPPORTED
 
-# if KMP_GROUP_AFFINITY
 // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
+# if KMP_OS_WINDOWS
 #  if _MSC_VER < 1600
 typedef struct GROUP_AFFINITY {
     KAFFINITY Mask;
@@ -537,7 +537,11 @@ typedef struct GROUP_AFFINITY {
     WORD Reserved[3];
 } GROUP_AFFINITY;
 #  endif /* _MSC_VER < 1600 */
+#  if KMP_GROUP_AFFINITY
 extern int __kmp_num_proc_groups;
+#  else
+static const int __kmp_num_proc_groups = 1;
+#  endif /* KMP_GROUP_AFFINITY */
 typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
 extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
 
@@ -549,285 +553,107 @@ extern kmp_GetThreadGroupAffinity_t __km
 
 typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
-# endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_OS_WINDOWS */
+
+# if KMP_USE_HWLOC
+extern hwloc_topology_t __kmp_hwloc_topology;
+extern int __kmp_hwloc_error;
+# endif
 
 extern size_t __kmp_affin_mask_size;
 # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
 # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
 # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
-# if !KMP_USE_HWLOC
-#  define KMP_CPU_SETSIZE        (__kmp_affin_mask_size * CHAR_BIT)
-#  define KMP_CPU_SET_ITERATE(i,mask) \
-    for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
-# endif
-
-#if KMP_USE_HWLOC
-
-extern hwloc_topology_t __kmp_hwloc_topology;
-extern int __kmp_hwloc_error;
-typedef hwloc_cpuset_t kmp_affin_mask_t;
-# define KMP_CPU_SET(i,mask)       hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
-# define KMP_CPU_ISSET(i,mask)     hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
-# define KMP_CPU_CLR(i,mask)       hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
-# define KMP_CPU_ZERO(mask)        hwloc_bitmap_zero((hwloc_cpuset_t)mask)
-# define KMP_CPU_COPY(dest, src)   hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
-# define KMP_CPU_AND(dest, src)    hwloc_bitmap_and((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
-    { \
-        unsigned i; \
-        for(i=0;i<(unsigned)max_bit_number+1;i++) { \
-            if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
-                hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
-            } else { \
-                hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
-            } \
-        } \
-        hwloc_bitmap_and((hwloc_cpuset_t)mask, (hwloc_cpuset_t)mask, \
-            (hwloc_cpuset_t)__kmp_affin_fullMask); \
-    } \
-
-# define KMP_CPU_UNION(dest, src)  hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
 # define KMP_CPU_SET_ITERATE(i,mask) \
-    for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))
-
-# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
-# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
+    for (i = (mask)->begin(); i != (mask)->end() ; i = (mask)->next(i))
+# define KMP_CPU_SET(i,mask) (mask)->set(i)
+# define KMP_CPU_ISSET(i,mask) (mask)->is_set(i)
+# define KMP_CPU_CLR(i,mask) (mask)->clear(i)
+# define KMP_CPU_ZERO(mask) (mask)->zero()
+# define KMP_CPU_COPY(dest, src) (dest)->copy(src)
+# define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
+# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
+# define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
+# define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
+# define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
 # define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
 # define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
 # define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
 # define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
-
-//
-// The following macro should be used to index an array of masks.
-// The array should be declared as "kmp_affinity_t *" and allocated with
-// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact
-// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
-// on Linux* OS, sizeof(kmp_affin_t) is 1.
-//
-# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
-# define KMP_CPU_ALLOC_ARRAY(arr, n) {                                   \
-    arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
-    unsigned i;                                                           \
-    for(i=0;i<(unsigned)n;i++) {                                          \
-        arr[i] = hwloc_bitmap_alloc();                                    \
-    }                                                                     \
-   }
-# define KMP_CPU_FREE_ARRAY(arr, n) { \
-    unsigned i;                        \
-    for(i=0;i<(unsigned)n;i++) {       \
-        hwloc_bitmap_free(arr[i]);     \
-    }                                  \
-    __kmp_free(arr);                   \
-   }
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) {                               \
-    arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
-    unsigned i;                                                                \
-    for(i=0;i<(unsigned)n;i++) {                                               \
-        arr[i] = hwloc_bitmap_alloc();                                         \
-    }                                                                          \
-   }
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
-    unsigned i;                                 \
-    for(i=0;i<(unsigned)n;i++) {                \
-        hwloc_bitmap_free(arr[i]);              \
-    }                                           \
-    KMP_INTERNAL_FREE(arr);                     \
-   }
-
-#else /* KMP_USE_HWLOC */
-# if KMP_OS_LINUX
-//
-// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
-// (in bytes).  It should be allocated on a word boundary.
-//
-// WARNING!!!  We have made the base type of the affinity mask unsigned char,
-// in order to eliminate a lot of checks that the true system mask size is
-// really a multiple of 4 bytes (on Linux* OS).
-//
-// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!!
-//
-
-typedef unsigned char kmp_affin_mask_t;
-
-#  define _KMP_CPU_SET(i,mask)   (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
-#  define KMP_CPU_SET(i,mask)    _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
-#  define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT))))
-#  define KMP_CPU_ISSET(i,mask)  _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
-#  define _KMP_CPU_CLR(i,mask)   (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
-#  define KMP_CPU_CLR(i,mask)    _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
-
-#  define KMP_CPU_ZERO(mask) \
-        {                                                                    \
-            size_t __i;                                                      \
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
-                ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \
-            }                                                                \
-        }
-
-#  define KMP_CPU_COPY(dest, src) \
-        {                                                                    \
-            size_t __i;                                                      \
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
-                ((kmp_affin_mask_t *)(dest))[__i]                            \
-                  = ((kmp_affin_mask_t *)(src))[__i];                        \
-            }                                                                \
-        }
-
-#  define KMP_CPU_AND(dest, src) \
-        {                                                                    \
-            size_t __i;                                                      \
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
-                ((kmp_affin_mask_t *)(dest))[__i]                            \
-                  &= ((kmp_affin_mask_t *)(src))[__i];                       \
-            }                                                                \
-        }
-
-#  define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
-        {                                                                    \
-            size_t __i;                                                      \
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
-                ((kmp_affin_mask_t *)(mask))[__i]                            \
-                  = ~((kmp_affin_mask_t *)(mask))[__i];                      \
-            }                                                                \
-            KMP_CPU_AND(mask, __kmp_affin_fullMask);                                     \
-        }
-
-#  define KMP_CPU_UNION(dest, src) \
-        {                                                                    \
-            size_t __i;                                                      \
-            for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
-                ((kmp_affin_mask_t *)(dest))[__i]                            \
-                  |= ((kmp_affin_mask_t *)(src))[__i];                       \
-            }                                                                \
-        }
-
-# endif /* KMP_OS_LINUX */
-
-# if KMP_OS_WINDOWS
-//
-// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on
-// Intel(R) 64 it is 8 bytes times the number of processor groups.
-//
-
-#  if KMP_GROUP_AFFINITY
-typedef DWORD_PTR kmp_affin_mask_t;
-
-#   define _KMP_CPU_SET(i,mask) \
-        (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |=                    \
-        (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
-
-#   define KMP_CPU_SET(i,mask) \
-        _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
-
-#   define _KMP_CPU_ISSET(i,mask) \
-        (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &                  \
-        (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))))
-
-#   define KMP_CPU_ISSET(i,mask) \
-        _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
-
-#   define _KMP_CPU_CLR(i,mask) \
-        (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &=                    \
-        ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
-
-#   define KMP_CPU_CLR(i,mask) \
-        _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
-
-#   define KMP_CPU_ZERO(mask) \
-        {                                                                    \
-            int __i;                                                         \
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
-                ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \
-            }                                                                \
-        }
-
-#   define KMP_CPU_COPY(dest, src) \
-        {                                                                    \
-            int __i;                                                         \
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
-                ((kmp_affin_mask_t *)(dest))[__i]                            \
-                  = ((kmp_affin_mask_t *)(src))[__i];                        \
-            }                                                                \
-        }
-
-#   define KMP_CPU_AND(dest, src) \
-        {                                                                    \
-            int __i;                                                         \
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
-                ((kmp_affin_mask_t *)(dest))[__i]                            \
-                  &= ((kmp_affin_mask_t *)(src))[__i];                       \
-            }                                                                \
-        }
-
-#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
-        {                                                                    \
-            int __i;                                                         \
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
-                ((kmp_affin_mask_t *)(mask))[__i]                            \
-                  = ~((kmp_affin_mask_t *)(mask))[__i];                      \
-            }                                                                \
-            KMP_CPU_AND(mask, __kmp_affin_fullMask);                                     \
-        }
-
-#   define KMP_CPU_UNION(dest, src) \
-        {                                                                    \
-            int __i;                                                         \
-            for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
-                ((kmp_affin_mask_t *)(dest))[__i]                            \
-                  |= ((kmp_affin_mask_t *)(src))[__i];                       \
-            }                                                                \
-        }
-
-
-#  else /* KMP_GROUP_AFFINITY */
-
-typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
-
-#   define KMP_CPU_SET(i,mask)      (*(mask) |= (((kmp_affin_mask_t)1) << (i)))
-#   define KMP_CPU_ISSET(i,mask)    (!!(*(mask) & (((kmp_affin_mask_t)1) << (i))))
-#   define KMP_CPU_CLR(i,mask)      (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
-#   define KMP_CPU_ZERO(mask)       (*(mask) = 0)
-#   define KMP_CPU_COPY(dest, src)  (*(dest) = *(src))
-#   define KMP_CPU_AND(dest, src)   (*(dest) &= *(src))
-#   define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)); KMP_CPU_AND(mask, __kmp_affin_fullMask)
-#   define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
-
-#  endif /* KMP_GROUP_AFFINITY */
-
-# endif /* KMP_OS_WINDOWS */
-
-//
-// __kmp_allocate() will return memory allocated on a 4-bytes boundary.
-// after zeroing it - it takes care of those assumptions stated above.
-//
-# define KMP_CPU_ALLOC(ptr) \
-        (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
-# define KMP_CPU_FREE(ptr) __kmp_free(ptr)
-# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
-# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
-# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
-# define KMP_CPU_INTERNAL_FREE(ptr)  KMP_INTERNAL_FREE(ptr)
-
-//
-// The following macro should be used to index an array of masks.
-// The array should be declared as "kmp_affinity_t *" and allocated with
-// size "__kmp_affinity_mask_size * len".  The macro takes care of the fact
-// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
-// on Linux* OS, sizeof(kmp_affin_t) is 1.
-//
-# define KMP_CPU_INDEX(array,i) \
-        ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
-# define KMP_CPU_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
-# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)  arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);
-
-#endif /* KMP_USE_HWLOC */
-
-// prototype after typedef of kmp_affin_mask_t
-#if KMP_GROUP_AFFINITY
-extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
+# define KMP_CPU_INDEX(arr,i) __kmp_affinity_dispatch->index_mask_array(arr, i)
+# define KMP_CPU_ALLOC_ARRAY(arr, n) (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
+# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_affinity_dispatch->deallocate_mask_array(arr)
+# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
+# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
+# define __kmp_get_system_affinity(mask, abort_bool) (mask)->get_system_affinity(abort_bool)
+# define __kmp_set_system_affinity(mask, abort_bool) (mask)->set_system_affinity(abort_bool)
+# define __kmp_get_proc_group(mask) (mask)->get_proc_group()
+
+class KMPAffinity {
+public:
+    class Mask {
+    public:
+        void* operator new(size_t n);
+        void operator delete(void* p);
+        void* operator new[](size_t n);
+        void operator delete[](void* p);
+        virtual ~Mask() {}
+        // Set bit i to 1
+        virtual void set(int i) {}
+        // Return bit i
+        virtual bool is_set(int i) const { return false; }
+        // Set bit i to 0
+        virtual void clear(int i) {}
+        // Zero out entire mask
+        virtual void zero() {}
+        // Copy src into this mask
+        virtual void copy(const Mask* src) {}
+        // this &= rhs
+        virtual void bitwise_and(const Mask* rhs) {}
+        // this |= rhs
+        virtual void bitwise_or(const Mask* rhs) {}
+        // this = ~this
+        virtual void bitwise_not() {}
+        // API for iterating over an affinity mask
+        // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
+        virtual int begin() const { return 0; }
+        virtual int end() const { return 0; }
+        virtual int next(int previous) const { return 0; }
+        // Set the system's affinity to this affinity mask's value
+        virtual int set_system_affinity(bool abort_on_error) const { return -1; }
+        // Set this affinity mask to the current system affinity
+        virtual int get_system_affinity(bool abort_on_error) { return -1; }
+        // Only 1 DWORD in the mask should have any procs set.
+        // Return the appropriate index, or -1 for an invalid mask.
+        virtual int get_proc_group() const { return -1; }
+    };
+    void* operator new(size_t n);
+    void operator delete(void* p);
+    // Determine if affinity is capable
+    virtual void determine_capable(const char* env_var) {}
+    // Bind the current thread to os proc
+    virtual void bind_thread(int proc) {}
+    // Factory functions to allocate/deallocate a mask
+    virtual Mask* allocate_mask() { return nullptr; }
+    virtual void deallocate_mask(Mask* m) { }
+    virtual Mask* allocate_mask_array(int num) { return nullptr; }
+    virtual void deallocate_mask_array(Mask* m) { }
+    virtual Mask* index_mask_array(Mask* m, int index) { return nullptr; }
+    static void pick_api();
+    static void destroy_api();
+    enum api_type {
+        NATIVE_OS
+#if KMP_USE_HWLOC
+        , HWLOC
 #endif
+    };
+    virtual api_type get_api_type() const { KMP_ASSERT(0); return NATIVE_OS; };
+private:
+    static bool picked_api;
+};
+
+typedef KMPAffinity::Mask kmp_affin_mask_t;
+extern KMPAffinity* __kmp_affinity_dispatch;
 
 //
 // Declare local char buffers with this size for printing debug and info
@@ -895,8 +721,6 @@ extern int __kmp_affinity_respect_mask;
 extern char * __kmp_affinity_proclist; /* proc ID list */
 extern kmp_affin_mask_t *__kmp_affinity_masks;
 extern unsigned __kmp_affinity_num_masks;
-extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error);
-extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error);
 extern void __kmp_affinity_bind_thread(int which);
 
 extern kmp_affin_mask_t *__kmp_affin_fullMask;
@@ -2606,7 +2430,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_
     int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
 
     // Read/write by workers as well -----------------------------------------------------------------------
-#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_USE_HWLOC
+#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
     // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
     // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
     // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.

Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Mon Nov 14 15:08:35 2016
@@ -47,53 +47,42 @@ void __kmp_get_hierarchy(kmp_uint32 npro
 
 #if KMP_AFFINITY_SUPPORTED
 
-//
-// Print the affinity mask to the character array in a pretty format.
-//
-#if KMP_USE_HWLOC
-char *
-__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
-{
-    int num_chars_to_write, num_chars_written;
-    char* scan;
-    KMP_ASSERT(buf_len >= 40);
+bool KMPAffinity::picked_api = false;
 
-    // bufsize of 0 just retrieves the needed buffer size.
-    num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
+void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
+void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
+void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); }
+void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); }
+void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
+void KMPAffinity::operator delete(void* p) { __kmp_free(p); }
+
+void KMPAffinity::pick_api() {
+    KMPAffinity* affinity_dispatch;
+    if (picked_api)
+        return;
+#if KMP_USE_HWLOC
+    if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+        affinity_dispatch = new KMPHwlocAffinity();
+    } else
+#endif
+    {
+        affinity_dispatch = new KMPNativeAffinity();
+    }
+    __kmp_affinity_dispatch = affinity_dispatch;
+    picked_api = true;
+}
 
-    // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
-    // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
-    //   take into account the '\0' character.
-    if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
-        KMP_SNPRINTF(buf, buf_len, "{<empty>}");
-    } else if(num_chars_to_write < buf_len - 3) {
-        // no problem fitting the mask into buf_len number of characters
-        buf[0] = '{';
-        // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
-        num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
-        buf[num_chars_written+1] = '}';
-        buf[num_chars_written+2] = '\0';
-    } else {
-        // Need to truncate the affinity mask string and add ellipsis.
-        // To do this, we first write out the '{' + str(mask)
-        buf[0] = '{';
-        hwloc_bitmap_list_snprintf(buf+1, buf_len-1, (hwloc_bitmap_t)mask);
-        // then, what we do here is go to the 7th to last character, then go backwards until we are NOT
-        // on a digit then write "...}\0".  This way it is a clean ellipsis addition and we don't
-        // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
-        // { 45, 67,...} instead.
-        scan = buf + buf_len - 7;
-        while(*scan >= '0' && *scan <= '9' && scan >= buf)
-            scan--;
-        *(scan+1) = '.';
-        *(scan+2) = '.';
-        *(scan+3) = '.';
-        *(scan+4) = '}';
-        *(scan+5) = '\0';
+void KMPAffinity::destroy_api() {
+    if (__kmp_affinity_dispatch != NULL) {
+        delete __kmp_affinity_dispatch;
+        __kmp_affinity_dispatch = NULL;
+        picked_api = false;
     }
-    return buf;
 }
-#else
+
+//
+// Print the affinity mask to the character array in a pretty format.
+//
 char *
 __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
 {
@@ -105,12 +94,8 @@ __kmp_affinity_print_mask(char *buf, int
     // Find first element / check for empty set.
     //
     size_t i;
-    for (i = 0; i < KMP_CPU_SETSIZE; i++) {
-        if (KMP_CPU_ISSET(i, mask)) {
-            break;
-        }
-    }
-    if (i == KMP_CPU_SETSIZE) {
+    i = mask->begin();
+    if (i == mask->end()) {
         KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
         while (*scan != '\0') scan++;
         KMP_ASSERT(scan <= end);
@@ -120,7 +105,7 @@ __kmp_affinity_print_mask(char *buf, int
     KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
     while (*scan != '\0') scan++;
     i++;
-    for (; i < KMP_CPU_SETSIZE; i++) {
+    for (; i != mask->end(); i = mask->next(i)) {
         if (! KMP_CPU_ISSET(i, mask)) {
             continue;
         }
@@ -137,7 +122,7 @@ __kmp_affinity_print_mask(char *buf, int
         KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
         while (*scan != '\0') scan++;
     }
-    if (i < KMP_CPU_SETSIZE) {
+    if (i != mask->end()) {
         KMP_SNPRINTF(scan, end-scan+1,  ",...");
         while (*scan != '\0') scan++;
     }
@@ -146,7 +131,6 @@ __kmp_affinity_print_mask(char *buf, int
     KMP_ASSERT(scan <= end);
     return buf;
 }
-#endif // KMP_USE_HWLOC
 
 
 void
@@ -677,7 +661,7 @@ __kmp_affinity_create_flat_map(AddrUnsPa
     __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
     if (__kmp_affinity_type == affinity_none) {
         int avail_ct = 0;
-        unsigned int i;
+        int i;
         KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
             if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
                 continue;
@@ -1031,7 +1015,7 @@ __kmp_affinity_create_apicid_map(AddrUns
         }
         KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
 
-        __kmp_affinity_bind_thread(i);
+        __kmp_affinity_dispatch->bind_thread(i);
         threadInfo[nApics].osId = i;
 
         //
@@ -1547,7 +1531,7 @@ __kmp_affinity_create_x2apicid_map(AddrU
         }
         KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
 
-        __kmp_affinity_bind_thread(proc);
+        __kmp_affinity_dispatch->bind_thread(proc);
 
         //
         // Extrach the labels for each level in the machine topology map
@@ -3705,7 +3689,7 @@ __kmp_aux_affinity_initialize(void)
         const char *file_name = NULL;
         int line = 0;
 # if KMP_USE_HWLOC
-        if (depth < 0) {
+        if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
             if (__kmp_affinity_verbose) {
                 KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
             }
@@ -3947,6 +3931,7 @@ __kmp_aux_affinity_initialize(void)
 
 # if KMP_USE_HWLOC
     else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+        KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
         if (__kmp_affinity_verbose) {
             KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
         }
@@ -4233,6 +4218,7 @@ __kmp_affinity_uninitialize(void)
         __kmp_hwloc_topology = NULL;
     }
 # endif
+    KMPAffinity::destroy_api();
 }
 
 

Modified: openmp/trunk/runtime/src/kmp_affinity.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.h?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.h (original)
+++ openmp/trunk/runtime/src/kmp_affinity.h Mon Nov 14 15:08:35 2016
@@ -15,6 +15,470 @@
 #ifndef KMP_AFFINITY_H
 #define KMP_AFFINITY_H
 
+#include "kmp_os.h"
+#include "kmp.h"
+
+#if KMP_AFFINITY_SUPPORTED
+#if KMP_USE_HWLOC
+class KMPHwlocAffinity: public KMPAffinity {
+public:
+    class Mask : public KMPAffinity::Mask {
+        hwloc_cpuset_t mask;
+    public:
+        Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
+        ~Mask() { hwloc_bitmap_free(mask); }
+        void set(int i) override { hwloc_bitmap_set(mask, i); }
+        bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
+        void clear(int i) override { hwloc_bitmap_clr(mask, i); }
+        void zero() override { hwloc_bitmap_zero(mask); }
+        void copy(const KMPAffinity::Mask* src) override {
+            const Mask* convert = static_cast<const Mask*>(src);
+            hwloc_bitmap_copy(mask, convert->mask);
+        }
+        void bitwise_and(const KMPAffinity::Mask* rhs) override {
+            const Mask* convert = static_cast<const Mask*>(rhs);
+            hwloc_bitmap_and(mask, mask, convert->mask);
+        }
+        void bitwise_or(const KMPAffinity::Mask * rhs) override {
+            const Mask* convert = static_cast<const Mask*>(rhs);
+            hwloc_bitmap_or(mask, mask, convert->mask);
+        }
+        void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
+        int begin() const override { return hwloc_bitmap_first(mask); }
+        int end() const override { return -1; }
+        int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
+        int get_system_affinity(bool abort_on_error) override {
+            KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+              "Illegal get affinity operation when not capable");
+            int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+            if (retval >= 0) {
+                return 0;
+            }
+            int error = errno;
+            if (abort_on_error) {
+                __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+            }
+            return error;
+        }
+        int set_system_affinity(bool abort_on_error) const override {
+            KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+              "Illegal get affinity operation when not capable");
+            int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+            if (retval >= 0) {
+                return 0;
+            }
+            int error = errno;
+            if (abort_on_error) {
+                __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+            }
+            return error;
+        }
+        int get_proc_group() const override {
+            int i;
+            int group = -1;
+# if KMP_OS_WINDOWS
+            if (__kmp_num_proc_groups == 1) {
+                return 1;
+            }
+            for (i = 0; i < __kmp_num_proc_groups; i++) {
+                // On windows, the long type is always 32 bits
+                unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
+                unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
+                if (first_32_bits == 0 && second_32_bits == 0) {
+                    continue;
+                }
+                if (group >= 0) {
+                    return -1;
+                }
+                group = i;
+            }
+# endif /* KMP_OS_WINDOWS */
+            return group;
+        }
+    };
+    void determine_capable(const char* var) override {
+        const hwloc_topology_support* topology_support;
+        if(__kmp_hwloc_topology == NULL) {
+            if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
+                __kmp_hwloc_error = TRUE;
+                if(__kmp_affinity_verbose)
+                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
+            }
+            if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
+                __kmp_hwloc_error = TRUE;
+                if(__kmp_affinity_verbose)
+                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
+            }
+        }
+        topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
+        // Is the system capable of setting/getting this thread's affinity?
+        // also, is topology discovery possible? (pu indicates ability to discover processing units)
+        // and finally, were there no errors when calling any hwloc_* API functions?
+        if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
+           topology_support->cpubind->get_thisthread_cpubind &&
+           topology_support->discovery->pu &&
+           !__kmp_hwloc_error)
+        {
+            // enables affinity according to KMP_AFFINITY_CAPABLE() macro
+            KMP_AFFINITY_ENABLE(TRUE);
+        } else {
+            // indicate that hwloc didn't work and disable affinity
+            __kmp_hwloc_error = TRUE;
+            KMP_AFFINITY_DISABLE();
+        }
+    }
+    void bind_thread(int which) override {
+        KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+          "Illegal set affinity operation when not capable");
+        KMPAffinity::Mask *mask;
+        KMP_CPU_ALLOC_ON_STACK(mask);
+        KMP_CPU_ZERO(mask);
+        KMP_CPU_SET(which, mask);
+        __kmp_set_system_affinity(mask, TRUE);
+        KMP_CPU_FREE_FROM_STACK(mask);
+    }
+    KMPAffinity::Mask* allocate_mask() override { return new Mask();  }
+    void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
+    KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
+    void deallocate_mask_array(KMPAffinity::Mask* array) override {
+        Mask* hwloc_array = static_cast<Mask*>(array);
+        delete[] hwloc_array;
+    }
+    KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
+        Mask* hwloc_array = static_cast<Mask*>(array);
+        return &(hwloc_array[index]);
+    }
+    api_type get_api_type() const override { return HWLOC; }
+};
+#endif /* KMP_USE_HWLOC */
+
+#if KMP_OS_LINUX
+/*
+ * On some of the older OS's that we build on, these constants aren't present
+ * in <asm/unistd.h> #included from <sys.syscall.h>.  They must be the same on
+ * all systems of the same arch where they are defined, and they cannot change.
+ * stone forever.
+ */
+#include <sys/syscall.h>
+# if KMP_ARCH_X86 || KMP_ARCH_ARM
+#  ifndef __NR_sched_setaffinity
+#   define __NR_sched_setaffinity  241
+#  elif __NR_sched_setaffinity != 241
+#   error Wrong code for setaffinity system call.
+#  endif /* __NR_sched_setaffinity */
+#  ifndef __NR_sched_getaffinity
+#   define __NR_sched_getaffinity  242
+#  elif __NR_sched_getaffinity != 242
+#   error Wrong code for getaffinity system call.
+#  endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_AARCH64
+#  ifndef __NR_sched_setaffinity
+#   define __NR_sched_setaffinity  122
+#  elif __NR_sched_setaffinity != 122
+#   error Wrong code for setaffinity system call.
+#  endif /* __NR_sched_setaffinity */
+#  ifndef __NR_sched_getaffinity
+#   define __NR_sched_getaffinity  123
+#  elif __NR_sched_getaffinity != 123
+#   error Wrong code for getaffinity system call.
+#  endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_X86_64
+#  ifndef __NR_sched_setaffinity
+#   define __NR_sched_setaffinity  203
+#  elif __NR_sched_setaffinity != 203
+#   error Wrong code for setaffinity system call.
+#  endif /* __NR_sched_setaffinity */
+#  ifndef __NR_sched_getaffinity
+#   define __NR_sched_getaffinity  204
+#  elif __NR_sched_getaffinity != 204
+#   error Wrong code for getaffinity system call.
+#  endif /* __NR_sched_getaffinity */
+# elif KMP_ARCH_PPC64
+#  ifndef __NR_sched_setaffinity
+#   define __NR_sched_setaffinity  222
+#  elif __NR_sched_setaffinity != 222
+#   error Wrong code for setaffinity system call.
+#  endif /* __NR_sched_setaffinity */
+#  ifndef __NR_sched_getaffinity
+#   define __NR_sched_getaffinity  223
+#  elif __NR_sched_getaffinity != 223
+#   error Wrong code for getaffinity system call.
+#  endif /* __NR_sched_getaffinity */
+# else
+#  error Unknown or unsupported architecture
+# endif /* KMP_ARCH_* */
+class KMPNativeAffinity : public KMPAffinity {
+    class Mask : public KMPAffinity::Mask {
+        typedef unsigned char mask_t;
+        static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
+    public:
+        mask_t* mask;
+        Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
+        ~Mask() { if (mask) __kmp_free(mask); }
+        void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
+        bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
+        void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
+        void zero() override {
+            for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+                mask[i] = 0;
+        }
+        void copy(const KMPAffinity::Mask* src) override {
+            const Mask * convert = static_cast<const Mask*>(src);
+            for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+                mask[i] = convert->mask[i];
+        }
+        void bitwise_and(const KMPAffinity::Mask* rhs) override {
+            const Mask * convert = static_cast<const Mask*>(rhs);
+            for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+                mask[i] &= convert->mask[i];
+        }
+        void bitwise_or(const KMPAffinity::Mask* rhs) override {
+            const Mask * convert = static_cast<const Mask*>(rhs);
+            for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+                mask[i] |= convert->mask[i];
+        }
+        void bitwise_not() override {
+            for (size_t i=0; i<__kmp_affin_mask_size; ++i)
+                mask[i] = ~(mask[i]);
+        }
+        int begin() const override {
+            int retval = 0;
+            while (retval < end() && !is_set(retval))
+                ++retval;
+            return retval;
+        }
+        int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
+        int next(int previous) const override {
+            int retval = previous+1;
+            while (retval < end() && !is_set(retval))
+                ++retval;
+            return retval;
+        }
+        int get_system_affinity(bool abort_on_error) override {
+            KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+              "Illegal get affinity operation when not capable");
+            int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
+            if (retval >= 0) {
+                return 0;
+            }
+            int error = errno;
+            if (abort_on_error) {
+                __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+            }
+            return error;
+        }
+        int set_system_affinity(bool abort_on_error) const override {
+            KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+              "Illegal get affinity operation when not capable");
+            int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
+            if (retval >= 0) {
+                return 0;
+            }
+            int error = errno;
+            if (abort_on_error) {
+                __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
+            }
+            return error;
+        }
+    };
+    void determine_capable(const char* env_var) override {
+        __kmp_affinity_determine_capable(env_var);
+    }
+    void bind_thread(int which) override {
+        __kmp_affinity_bind_thread(which);
+    }
+    KMPAffinity::Mask* allocate_mask() override {
+        KMPNativeAffinity::Mask* retval = new Mask();
+        return retval;
+    }
+    void deallocate_mask(KMPAffinity::Mask* m) override {
+        KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
+        delete m;
+    }
+    KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
+    void deallocate_mask_array(KMPAffinity::Mask* array) override {
+        Mask* linux_array = static_cast<Mask*>(array);
+        delete[] linux_array;
+    }
+    KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
+        Mask* linux_array = static_cast<Mask*>(array);
+        return &(linux_array[index]);
+    }
+    api_type get_api_type() const override { return NATIVE_OS; }
+};
+#endif /* KMP_OS_LINUX */
+
+#if KMP_OS_WINDOWS
+class KMPNativeAffinity : public KMPAffinity {
+    class Mask : public KMPAffinity::Mask {
+        typedef ULONG_PTR mask_t;
+        static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
+        mask_t* mask;
+    public:
+        Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
+        ~Mask() { if (mask) __kmp_free(mask); }
+        void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
+        bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
+        void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
+        void zero() override {
+            for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+                mask[i] = 0;
+        }
+        void copy(const KMPAffinity::Mask* src) override {
+            const Mask * convert = static_cast<const Mask*>(src);
+            for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+                mask[i] = convert->mask[i];
+        }
+        void bitwise_and(const KMPAffinity::Mask* rhs) override {
+            const Mask * convert = static_cast<const Mask*>(rhs);
+            for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+                mask[i] &= convert->mask[i];
+        }
+        void bitwise_or(const KMPAffinity::Mask* rhs) override {
+            const Mask * convert = static_cast<const Mask*>(rhs);
+            for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+                mask[i] |= convert->mask[i];
+        }
+        void bitwise_not() override {
+            for (size_t i=0; i<__kmp_num_proc_groups; ++i)
+                mask[i] = ~(mask[i]);
+        }
+        int begin() const override {
+            int retval = 0;
+            while (retval < end() && !is_set(retval))
+                ++retval;
+            return retval;
+        }
+        int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
+        int next(int previous) const override {
+            int retval = previous+1;
+            while (retval < end() && !is_set(retval))
+                ++retval;
+            return retval;
+        }
+        int set_system_affinity(bool abort_on_error) const override {
+            if (__kmp_num_proc_groups > 1) {
+                // Check for a valid mask.
+                GROUP_AFFINITY ga;
+                int group = get_proc_group();
+                if (group < 0) {
+                    if (abort_on_error) {
+                        KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+                    }
+                    return -1;
+                }
+                // Transform the bit vector into a GROUP_AFFINITY struct
+                // and make the system call to set affinity.
+                ga.Group = group;
+                ga.Mask = mask[group];
+                ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
+
+                KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
+                if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
+                    DWORD error = GetLastError();
+                    if (abort_on_error) {
+                        __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
+                                  KMP_ERR( error ), __kmp_msg_null);
+                    }
+                    return error;
+                }
+            } else {
+                if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
+                    DWORD error = GetLastError();
+                    if (abort_on_error) {
+                        __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
+                                  KMP_ERR( error ), __kmp_msg_null);
+                    }
+                    return error;
+                }
+            }
+            return 0;
+        }
+        int get_system_affinity(bool abort_on_error) override {
+            if (__kmp_num_proc_groups > 1) {
+                this->zero();
+                GROUP_AFFINITY ga;
+                KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
+                if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
+                    DWORD error = GetLastError();
+                    if (abort_on_error) {
+                        __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
+                                  KMP_ERR(error), __kmp_msg_null);
+                    }
+                    return error;
+                }
+                if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
+                    return -1;
+                }
+                mask[ga.Group] = ga.Mask;
+            } else {
+                mask_t newMask, sysMask, retval;
+                if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
+                    DWORD error = GetLastError();
+                    if (abort_on_error) {
+                        __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
+                                  KMP_ERR(error), __kmp_msg_null);
+                    }
+                    return error;
+                }
+                retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
+                if (! retval) {
+                    DWORD error = GetLastError();
+                    if (abort_on_error) {
+                        __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+                                  KMP_ERR(error), __kmp_msg_null);
+                    }
+                    return error;
+                }
+                newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
+                if (! newMask) {
+                    DWORD error = GetLastError();
+                    if (abort_on_error) {
+                        __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+                                  KMP_ERR(error), __kmp_msg_null);
+                    }
+                }
+                *mask = retval;
+            }
+            return 0;
+        }
+        int get_proc_group() const override {
+            int group = -1;
+            if (__kmp_num_proc_groups == 1) {
+                return 1;
+            }
+            for (int i = 0; i < __kmp_num_proc_groups; i++) {
+                if (mask[i] == 0)
+                    continue;
+                if (group >= 0)
+                    return -1;
+                group = i;
+            }
+            return group;
+        }
+    };
+    void determine_capable(const char* env_var) override {
+        __kmp_affinity_determine_capable(env_var);
+    }
+    void bind_thread(int which) override {
+        __kmp_affinity_bind_thread(which);
+    }
+    KMPAffinity::Mask* allocate_mask() override { return new Mask();  }
+    void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
+    KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
+    void deallocate_mask_array(KMPAffinity::Mask* array) override {
+        Mask* windows_array = static_cast<Mask*>(array);
+        delete[] windows_array;
+    }
+    KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
+        Mask* windows_array = static_cast<Mask*>(array);
+        return &(windows_array[index]);
+    }
+    api_type get_api_type() const override { return NATIVE_OS; }
+};
+#endif /* KMP_OS_WINDOWS */
+#endif /* KMP_AFFINITY_SUPPORTED */
+
 class Address {
 public:
     static const unsigned maxDepth = 32;

Modified: openmp/trunk/runtime/src/kmp_ftn_cdecl.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_ftn_cdecl.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_ftn_cdecl.c (original)
+++ openmp/trunk/runtime/src/kmp_ftn_cdecl.c Mon Nov 14 15:08:35 2016
@@ -14,6 +14,7 @@
 
 
 #include "kmp.h"
+#include "kmp_affinity.h"
 
 #if KMP_OS_WINDOWS
 #   if defined  KMP_WIN_CDECL ||  !defined KMP_DYNAMIC_LIB

Modified: openmp/trunk/runtime/src/kmp_ftn_entry.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_ftn_entry.h?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_ftn_entry.h (original)
+++ openmp/trunk/runtime/src/kmp_ftn_entry.h Mon Nov 14 15:08:35 2016
@@ -279,15 +279,13 @@ FTN_CREATE_AFFINITY_MASK( void **mask )
         //
         // We really only NEED serial initialization here.
         //
+        kmp_affin_mask_t* mask_internals;
         if ( ! TCR_4(__kmp_init_middle) ) {
             __kmp_middle_initialize();
         }
-    # if KMP_USE_HWLOC
-        *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc();
-    # else
-        *mask = kmpc_malloc( __kmp_affin_mask_size );
-    # endif
-        KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) );
+        mask_internals = __kmp_affinity_dispatch->allocate_mask();
+        KMP_CPU_ZERO( mask_internals );
+        *mask = mask_internals;
     #endif
 }
 
@@ -300,6 +298,7 @@ FTN_DESTROY_AFFINITY_MASK( void **mask )
         //
         // We really only NEED serial initialization here.
         //
+        kmp_affin_mask_t* mask_internals;
         if ( ! TCR_4(__kmp_init_middle) ) {
             __kmp_middle_initialize();
         }
@@ -308,11 +307,8 @@ FTN_DESTROY_AFFINITY_MASK( void **mask )
 	        KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" );
 	    }
         }
-    # if KMP_USE_HWLOC
-        hwloc_bitmap_free((hwloc_cpuset_t)(*mask));
-    # else
-        kmpc_free( *mask );
-    # endif
+        mask_internals = (kmp_affin_mask_t*)(*mask);
+        __kmp_affinity_dispatch->deallocate_mask(mask_internals);
         *mask = NULL;
     #endif
 }

Modified: openmp/trunk/runtime/src/kmp_ftn_extra.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_ftn_extra.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_ftn_extra.c (original)
+++ openmp/trunk/runtime/src/kmp_ftn_extra.c Mon Nov 14 15:08:35 2016
@@ -14,6 +14,7 @@
 
 
 #include "kmp.h"
+#include "kmp_affinity.h"
 
 #if KMP_OS_WINDOWS
 #   define KMP_FTN_ENTRIES KMP_FTN_PLAIN

Modified: openmp/trunk/runtime/src/kmp_global.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.c (original)
+++ openmp/trunk/runtime/src/kmp_global.c Mon Nov 14 15:08:35 2016
@@ -14,6 +14,7 @@
 
 
 #include "kmp.h"
+#include "kmp_affinity.h"
 
 kmp_key_t __kmp_gtid_threadprivate_key;
 
@@ -222,21 +223,22 @@ enum mic_type __kmp_mic_type = non_mic;
 
 #if KMP_AFFINITY_SUPPORTED
 
+KMPAffinity* __kmp_affinity_dispatch = NULL;
+
 # if KMP_USE_HWLOC
 int __kmp_hwloc_error = FALSE;
 hwloc_topology_t __kmp_hwloc_topology = NULL;
 # endif
 
-# if KMP_GROUP_AFFINITY
-
+# if KMP_OS_WINDOWS
+#  if KMP_GROUP_AFFINITY
 int __kmp_num_proc_groups = 1;
-
+#  endif /* KMP_GROUP_AFFINITY */
 kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL;
 kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL;
 kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL;
 kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL;
-
-# endif /* KMP_GROUP_AFFINITY */
+# endif /* KMP_OS_WINDOWS */
 
 size_t   __kmp_affin_mask_size = 0;
 enum affinity_type __kmp_affinity_type = affinity_default;

Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Mon Nov 14 15:08:35 2016
@@ -25,6 +25,7 @@
 #include "kmp_error.h"
 #include "kmp_stats.h"
 #include "kmp_wait_release.h"
+#include "kmp_affinity.h"
 
 #if OMPT_SUPPORT
 #include "ompt-specific.h"

Modified: openmp/trunk/runtime/src/kmp_settings.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_settings.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_settings.c (original)
+++ openmp/trunk/runtime/src/kmp_settings.c Mon Nov 14 15:08:35 2016
@@ -23,6 +23,7 @@
 #include "kmp_i18n.h"
 #include "kmp_lock.h"
 #include "kmp_io.h"
+#include "kmp_affinity.h"
 
 static int __kmp_env_toPrint( char const * name, int flag );
 
@@ -5339,44 +5340,12 @@ __kmp_env_initialize( char const * strin
         // affinity.
         //
         const char *var = "KMP_AFFINITY";
-# if KMP_USE_HWLOC
-        if(__kmp_hwloc_topology == NULL) {
-            if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
-                __kmp_hwloc_error = TRUE;
-                if(__kmp_affinity_verbose)
-                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
-            }
-            if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
-                __kmp_hwloc_error = TRUE;
-                if(__kmp_affinity_verbose)
-                    KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
-            }
-        }
-# endif
+        KMPAffinity::pick_api();
         if ( __kmp_affinity_type == affinity_disabled ) {
             KMP_AFFINITY_DISABLE();
         }
         else if ( ! KMP_AFFINITY_CAPABLE() ) {
-# if KMP_USE_HWLOC
-            const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
-            // Is the system capable of setting/getting this thread's affinity?
-            // also, is topology discovery possible? (pu indicates ability to discover processing units)
-            // and finally, were there no errors when calling any hwloc_* API functions?
-            if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
-               topology_support->cpubind->get_thisthread_cpubind &&
-               topology_support->discovery->pu &&
-               !__kmp_hwloc_error)
-            {
-                // enables affinity according to KMP_AFFINITY_CAPABLE() macro
-                KMP_AFFINITY_ENABLE(TRUE);
-            } else {
-                // indicate that hwloc didn't work and disable affinity
-                __kmp_hwloc_error = TRUE;
-                KMP_AFFINITY_DISABLE();
-            }
-# else
-            __kmp_affinity_determine_capable( var );
-# endif // KMP_USE_HWLOC
+            __kmp_affinity_dispatch->determine_capable(var);
             if ( ! KMP_AFFINITY_CAPABLE() ) {
                 if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings
                   && ( __kmp_affinity_type != affinity_default )

Modified: openmp/trunk/runtime/src/z_Linux_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_util.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_util.c (original)
+++ openmp/trunk/runtime/src/z_Linux_util.c Mon Nov 14 15:08:35 2016
@@ -22,6 +22,7 @@
 #include "kmp_io.h"
 #include "kmp_stats.h"
 #include "kmp_wait_release.h"
+#include "kmp_affinity.h"
 
 #if !KMP_OS_FREEBSD && !KMP_OS_NETBSD
 # include <alloca.h>
@@ -113,118 +114,6 @@ __kmp_print_cond( char *buffer, kmp_cond
  * Affinity support
  */
 
-/*
- * On some of the older OS's that we build on, these constants aren't present
- * in <asm/unistd.h> #included from <sys.syscall.h>.  They must be the same on
- * all systems of the same arch where they are defined, and they cannot change.
- * stone forever.
- */
-
-#  if KMP_ARCH_X86 || KMP_ARCH_ARM
-#   ifndef __NR_sched_setaffinity
-#    define __NR_sched_setaffinity  241
-#   elif __NR_sched_setaffinity != 241
-#    error Wrong code for setaffinity system call.
-#   endif /* __NR_sched_setaffinity */
-#   ifndef __NR_sched_getaffinity
-#    define __NR_sched_getaffinity  242
-#   elif __NR_sched_getaffinity != 242
-#    error Wrong code for getaffinity system call.
-#   endif /* __NR_sched_getaffinity */
-
-#  elif KMP_ARCH_AARCH64
-#   ifndef __NR_sched_setaffinity
-#    define __NR_sched_setaffinity  122
-#   elif __NR_sched_setaffinity != 122
-#    error Wrong code for setaffinity system call.
-#   endif /* __NR_sched_setaffinity */
-#   ifndef __NR_sched_getaffinity
-#    define __NR_sched_getaffinity  123
-#   elif __NR_sched_getaffinity != 123
-#    error Wrong code for getaffinity system call.
-#   endif /* __NR_sched_getaffinity */
-
-#  elif KMP_ARCH_X86_64
-#   ifndef __NR_sched_setaffinity
-#    define __NR_sched_setaffinity  203
-#   elif __NR_sched_setaffinity != 203
-#    error Wrong code for setaffinity system call.
-#   endif /* __NR_sched_setaffinity */
-#   ifndef __NR_sched_getaffinity
-#    define __NR_sched_getaffinity  204
-#   elif __NR_sched_getaffinity != 204
-#    error Wrong code for getaffinity system call.
-#   endif /* __NR_sched_getaffinity */
-
-#  elif KMP_ARCH_PPC64
-#   ifndef __NR_sched_setaffinity
-#    define __NR_sched_setaffinity  222
-#   elif __NR_sched_setaffinity != 222
-#    error Wrong code for setaffinity system call.
-#   endif /* __NR_sched_setaffinity */
-#   ifndef __NR_sched_getaffinity
-#    define __NR_sched_getaffinity  223
-#   elif __NR_sched_getaffinity != 223
-#    error Wrong code for getaffinity system call.
-#   endif /* __NR_sched_getaffinity */
-
-
-#  else
-#   error Unknown or unsupported architecture
-
-#  endif /* KMP_ARCH_* */
-
-int
-__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
-{
-    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
-      "Illegal set affinity operation when not capable");
-#if KMP_USE_HWLOC
-    int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
-#else
-    int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
-#endif
-    if (retval >= 0) {
-        return 0;
-    }
-    int error = errno;
-    if (abort_on_error) {
-        __kmp_msg(
-            kmp_ms_fatal,
-            KMP_MSG( FatalSysError ),
-            KMP_ERR( error ),
-            __kmp_msg_null
-        );
-    }
-    return error;
-}
-
-int
-__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
-{
-    KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
-      "Illegal get affinity operation when not capable");
-
-#if KMP_USE_HWLOC
-    int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
-#else
-    int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
-#endif
-    if (retval >= 0) {
-        return 0;
-    }
-    int error = errno;
-    if (abort_on_error) {
-        __kmp_msg(
-            kmp_ms_fatal,
-            KMP_MSG( FatalSysError ),
-            KMP_ERR( error ),
-            __kmp_msg_null
-        );
-    }
-    return error;
-}
-
 void
 __kmp_affinity_bind_thread( int which )
 {

Modified: openmp/trunk/runtime/src/z_Windows_NT_util.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Windows_NT_util.c?rev=286890&r1=286889&r2=286890&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Windows_NT_util.c (original)
+++ openmp/trunk/runtime/src/z_Windows_NT_util.c Mon Nov 14 15:08:35 2016
@@ -18,6 +18,7 @@
 #include "kmp_i18n.h"
 #include "kmp_io.h"
 #include "kmp_wait_release.h"
+#include "kmp_affinity.h"
 
 /* This code is related to NtQuerySystemInformation() function. This function
    is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
@@ -127,9 +128,7 @@ HMODULE ntdll = NULL;
 
 /* End of NtQuerySystemInformation()-related code */
 
-#if KMP_GROUP_AFFINITY
 static HMODULE kernel32 = NULL;
-#endif /* KMP_GROUP_AFFINITY */
 
 /* ----------------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------------------- */
@@ -542,227 +541,9 @@ __kmp_gtid_get_specific()
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
-#if KMP_GROUP_AFFINITY
-
-//
-// Only 1 DWORD in the mask should have any procs set.
-// Return the appropriate index, or -1 for an invalid mask.
-//
-int
-__kmp_get_proc_group( kmp_affin_mask_t const *mask )
-{
-    int i;
-    int group = -1;
-    for (i = 0; i < __kmp_num_proc_groups; i++) {
-#if KMP_USE_HWLOC
-        // On windows, the long type is always 32 bits
-        unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2);
-        unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1);
-        if (first_32_bits == 0 && second_32_bits == 0) {
-            continue;
-        }
-#else
-        if (mask[i] == 0) {
-            continue;
-        }
-#endif
-        if (group >= 0) {
-            return -1;
-        }
-        group = i;
-    }
-    return group;
-}
-
-#endif /* KMP_GROUP_AFFINITY */
-
-int
-__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
-{
-#if KMP_USE_HWLOC
-    int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
-    if (retval >= 0) {
-        return 0;
-    }
-    int error = errno;
-    if (abort_on_error) {
-        __kmp_msg(
-            kmp_ms_fatal,
-            KMP_MSG( FatalSysError ),
-            KMP_ERR( error ),
-            __kmp_msg_null
-        );
-    }
-    return error;
-#else
-# if KMP_GROUP_AFFINITY
-
-    if (__kmp_num_proc_groups > 1) {
-        //
-        // Check for a valid mask.
-        //
-        GROUP_AFFINITY ga;
-        int group = __kmp_get_proc_group( mask );
-        if (group < 0) {
-            if (abort_on_error) {
-                KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
-            }
-            return -1;
-        }
-
-        //
-        // Transform the bit vector into a GROUP_AFFINITY struct
-        // and make the system call to set affinity.
-        //
-        ga.Group = group;
-        ga.Mask = mask[group];
-        ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
-
-        KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
-        if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
-            DWORD error = GetLastError();
-            if (abort_on_error) {
-                __kmp_msg(
-                    kmp_ms_fatal,
-                    KMP_MSG( CantSetThreadAffMask ),
-                    KMP_ERR( error ),
-                    __kmp_msg_null
-                );
-            }
-            return error;
-        }
-    }
-    else
-
-# endif /* KMP_GROUP_AFFINITY */
-
-    {
-        if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
-            DWORD error = GetLastError();
-            if (abort_on_error) {
-                __kmp_msg(
-                    kmp_ms_fatal,
-                    KMP_MSG( CantSetThreadAffMask ),
-                    KMP_ERR( error ),
-                    __kmp_msg_null
-                );
-            }
-            return error;
-        }
-    }
-#endif /* KMP_USE_HWLOC */
-    return 0;
-}
-
-int
-__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
-{
-#if KMP_USE_HWLOC
-    int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
-    if (retval >= 0) {
-        return 0;
-    }
-    int error = errno;
-    if (abort_on_error) {
-        __kmp_msg(
-            kmp_ms_fatal,
-            KMP_MSG( FatalSysError ),
-            KMP_ERR( error ),
-            __kmp_msg_null
-        );
-    }
-    return error;
-#else /* KMP_USE_HWLOC */
-# if KMP_GROUP_AFFINITY
-
-    if (__kmp_num_proc_groups > 1) {
-        KMP_CPU_ZERO(mask);
-        GROUP_AFFINITY ga;
-        KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
-
-        if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
-            DWORD error = GetLastError();
-            if (abort_on_error) {
-                __kmp_msg(
-                    kmp_ms_fatal,
-                    KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
-                    KMP_ERR(error),
-                    __kmp_msg_null
-                );
-            }
-            return error;
-        }
-
-        if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups)
-          || (ga.Mask == 0)) {
-            return -1;
-        }
-
-        mask[ga.Group] = ga.Mask;
-    }
-    else
-
-# endif /* KMP_GROUP_AFFINITY */
-
-    {
-        kmp_affin_mask_t newMask, sysMask, retval;
-
-        if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
-            DWORD error = GetLastError();
-            if (abort_on_error) {
-                __kmp_msg(
-                    kmp_ms_fatal,
-                    KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
-                    KMP_ERR(error),
-                    __kmp_msg_null
-                );
-            }
-            return error;
-        }
-        retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
-        if (! retval) {
-            DWORD error = GetLastError();
-            if (abort_on_error) {
-                __kmp_msg(
-                    kmp_ms_fatal,
-                    KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
-                    KMP_ERR(error),
-                    __kmp_msg_null
-                );
-            }
-            return error;
-        }
-        newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
-        if (! newMask) {
-            DWORD error = GetLastError();
-            if (abort_on_error) {
-                __kmp_msg(
-                    kmp_ms_fatal,
-                    KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
-                    KMP_ERR(error),
-                    __kmp_msg_null
-                );
-            }
-        }
-        *mask = retval;
-    }
-#endif /* KMP_USE_HWLOC */
-    return 0;
-}
-
 void
 __kmp_affinity_bind_thread( int proc )
 {
-#if KMP_USE_HWLOC
-    kmp_affin_mask_t *mask;
-    KMP_CPU_ALLOC_ON_STACK(mask);
-    KMP_CPU_ZERO(mask);
-    KMP_CPU_SET(proc, mask);
-    __kmp_set_system_affinity(mask, TRUE);
-    KMP_CPU_FREE_FROM_STACK(mask);
-#else /* KMP_USE_HWLOC */
-# if KMP_GROUP_AFFINITY
-
     if (__kmp_num_proc_groups > 1) {
         //
         // Form the GROUP_AFFINITY struct directly, rather than filling
@@ -787,18 +568,14 @@ __kmp_affinity_bind_thread( int proc )
                 );
             }
         }
+    } else {
+        kmp_affin_mask_t *mask;
+        KMP_CPU_ALLOC_ON_STACK(mask);
+        KMP_CPU_ZERO(mask);
+        KMP_CPU_SET(proc, mask);
+        __kmp_set_system_affinity(mask, TRUE);
+        KMP_CPU_FREE_FROM_STACK(mask);
     }
-    else
-
-# endif /* KMP_GROUP_AFFINITY */
-
-    {
-        kmp_affin_mask_t mask;
-        KMP_CPU_ZERO(&mask);
-        KMP_CPU_SET(proc, &mask);
-        __kmp_set_system_affinity(&mask, TRUE);
-    }
-#endif /* KMP_USE_HWLOC */
 }
 
 void




More information about the Openmp-commits mailing list