[Openmp-commits] [PATCH] D16640: Fix task dependency performance problem

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Wed Jan 27 10:51:27 PST 2016


jlpeyton created this revision.
jlpeyton added reviewers: AndreyChurbanov, hfinkel, tlwilmar.
jlpeyton added a subscriber: openmp-commits.
jlpeyton set the repository for this revision to rL LLVM.

In: http://lists.llvm.org/pipermail/openmp-dev/2015-August/000858.html, a performance issue was found with libomp's task dependencies.

The task dependencies hash table has an issue with collisions.  The current table size is a power of two.  This combined with the current hash function causes a large number of collisions to occurr.  Also, the current size (64) is too small for larger applications so the table size is increased.

This patch creates a two level hash table approach for task dependencies.  The implicit task is considered the "master" or "top-level" task which has a large static sized hash table (997), and nested tasks will have smaller hash tables (97).  Prime numbers were chosen to help reduce collisions.

Repository:
  rL LLVM

http://reviews.llvm.org/D16640

Files:
  runtime/src/kmp.h
  runtime/src/kmp_taskdeps.cpp

Index: runtime/src/kmp_taskdeps.cpp
===================================================================
--- runtime/src/kmp_taskdeps.cpp
+++ runtime/src/kmp_taskdeps.cpp
@@ -73,44 +73,55 @@
 static void
 __kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list );
 
-static const kmp_int32 kmp_dephash_log2 = 6;
-static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2);
+enum {
+    KMP_DEPHASH_OTHER_SIZE = 97,
+    KMP_DEPHASH_MASTER_SIZE = 997
+};
 
 static inline kmp_int32
-__kmp_dephash_hash ( kmp_intptr_t addr )
+__kmp_dephash_hash ( kmp_intptr_t addr, size_t hsize )
 {
     //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets );
-    return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size;
+    return ((addr >> 6) ^ (addr >> 2)) % hsize;
 }
 
 static kmp_dephash_t *
-__kmp_dephash_create ( kmp_info_t *thread )
+__kmp_dephash_create ( kmp_info_t *thread, kmp_taskdata_t *current_task )
 {
     kmp_dephash_t *h;
 
-    kmp_int32 size = kmp_dephash_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t);
+    size_t h_size;
+
+    if ( current_task->td_flags.tasktype == TASK_IMPLICIT )
+        h_size = KMP_DEPHASH_MASTER_SIZE;
+    else
+        h_size = KMP_DEPHASH_OTHER_SIZE;
+
+    kmp_int32 size = h_size * sizeof(kmp_dephash_entry_t) + sizeof(kmp_dephash_t);
 
 #if USE_FAST_MEMORY
     h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size );
 #else
     h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size );
 #endif
+    h->size = h_size;
 
 #ifdef KMP_DEBUG
     h->nelements = 0;
+    h->nconflicts = 0;
 #endif
     h->buckets = (kmp_dephash_entry **)(h+1);
 
-    for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ )
+    for ( size_t i = 0; i < h_size; i++ )
         h->buckets[i] = 0;
 
     return h;
 }
 
 static void
 __kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h )
 {
-    for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) {
+    for ( size_t i=0; i < h->size; i++ ) {
         if ( h->buckets[i] ) {
             kmp_dephash_entry_t *next;
             for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) {
@@ -135,7 +146,7 @@
 static kmp_dephash_entry *
 __kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr )
 {
-    kmp_int32 bucket = __kmp_dephash_hash(addr);
+    kmp_int32 bucket = __kmp_dephash_hash(addr,h->size);
 
     kmp_dephash_entry_t *entry;
     for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket )
@@ -412,7 +423,7 @@
     if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) {
         /* if no dependencies have been tracked yet, create the dependence hash */
         if ( current_task->td_dephash == NULL )
-            current_task->td_dephash = __kmp_dephash_create(thread);
+            current_task->td_dephash = __kmp_dephash_create(thread, current_task);
 
 #if USE_FAST_MEMORY
         kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t));
Index: runtime/src/kmp.h
===================================================================
--- runtime/src/kmp.h
+++ runtime/src/kmp.h
@@ -2057,6 +2057,7 @@
 
 typedef struct kmp_dephash {
    kmp_dephash_entry_t     ** buckets;
+   size_t		      size;
 #ifdef KMP_DEBUG
    kmp_uint32                 nelements;
    kmp_uint32                 nconflicts;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16640.46152.patch
Type: text/x-patch
Size: 3338 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20160127/9cf16587/attachment.bin>


More information about the Openmp-commits mailing list