[Openmp-commits] [openmp] 1e45cd7 - [OpenMP][host runtime] Fix indirect lock table race condition

via Openmp-commits openmp-commits at lists.llvm.org
Mon Sep 20 11:02:08 PDT 2021


Author: Peyton, Jonathan L
Date: 2021-09-20T13:01:58-05:00
New Revision: 1e45cd75dfb1df61892c1a26654c8997d8aeef66

URL: https://github.com/llvm/llvm-project/commit/1e45cd75dfb1df61892c1a26654c8997d8aeef66
DIFF: https://github.com/llvm/llvm-project/commit/1e45cd75dfb1df61892c1a26654c8997d8aeef66.diff

LOG: [OpenMP][host runtime] Fix indirect lock table race condition

The indirect lock table can exhibit a race condition during initializing
and setting/unsetting locks. This occurs if the lock table is
resized by one thread (during an omp_init_lock) and accessed (during an
omp_set|unset_lock) by another thread.

The test runtime/test/lock/omp_init_lock.c test exposed this issue and
will fail if run enough times.

This patch restructures the lock table so pointer/iterator validity is
always kept. Instead of reallocating a single table to a larger size, the
lock table begins preallocated to accommodate 8K locks. Each row of the
table is allocated as needed with each row allowing 1K locks. If the 8K
limit is reached for the initial table, then another table, capable of
holding double the number of locks, is allocated and linked
as the next table. The indices stored in the user's locks take this
linked structure into account when finding the lock within the table.

Differential Revision: https://reviews.llvm.org/D109725

Added: 
    

Modified: 
    openmp/runtime/src/kmp_lock.cpp
    openmp/runtime/src/kmp_lock.h

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index d138ba9ecf50f..13bb7ace86826 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -3104,7 +3104,7 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
                                                   kmp_int32 gtid,
                                                   kmp_indirect_locktag_t tag) {
   kmp_indirect_lock_t *lck;
-  kmp_lock_index_t idx;
+  kmp_lock_index_t idx, table_idx;
 
   __kmp_acquire_lock(&__kmp_global_lock, gtid);
 
@@ -3117,26 +3117,41 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
     KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
                   lck));
   } else {
-    idx = __kmp_i_lock_table.next;
-    // Check capacity and double the size if it is full
-    if (idx == __kmp_i_lock_table.size) {
-      // Double up the space for block pointers
-      int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
-      kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
-          2 * row * sizeof(kmp_indirect_lock_t *));
-      KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
-                 row * sizeof(kmp_indirect_lock_t *));
-      kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
-      __kmp_i_lock_table.table = new_table;
-      __kmp_free(old_table);
-      // Allocate new objects in the new blocks
-      for (int i = row; i < 2 * row; ++i)
-        *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
-            KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
-      __kmp_i_lock_table.size = 2 * idx;
+    kmp_uint32 row, col;
+    kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
+    idx = 0;
+    // Find location in list of lock tables to put new lock
+    while (1) {
+      table_idx = lock_table->next; // index within this table
+      idx += lock_table->next; // global index within list of tables
+      if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
+        row = table_idx / KMP_I_LOCK_CHUNK;
+        col = table_idx % KMP_I_LOCK_CHUNK;
+        // Allocate a new row of locks if necessary
+        if (!lock_table->table[row]) {
+          lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
+              sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
+        }
+        break;
+      }
+      // Allocate a new lock table if necessary with double the capacity
+      if (!lock_table->next_table) {
+        kmp_indirect_lock_table_t *next_table =
+            (kmp_indirect_lock_table_t *)__kmp_allocate(
+                sizeof(kmp_indirect_lock_table_t));
+        next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
+            sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
+        next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
+        next_table->next = 0;
+        next_table->next_table = nullptr;
+        lock_table->next_table = next_table;
+      }
+      lock_table = lock_table->next_table;
+      KMP_ASSERT(lock_table);
     }
-    __kmp_i_lock_table.next++;
-    lck = KMP_GET_I_LOCK(idx);
+    lock_table->next++;
+
+    lck = &lock_table->table[row][col];
     // Allocate a new base lock object
     lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
     KA_TRACE(20,
@@ -3167,10 +3182,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
     }
     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
       kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
-      if (idx >= __kmp_i_lock_table.size) {
-        KMP_FATAL(LockIsUninitialized, func);
-      }
-      lck = KMP_GET_I_LOCK(idx);
+      lck = __kmp_get_i_lock(idx);
     } else {
       lck = *((kmp_indirect_lock_t **)user_lock);
     }
@@ -3180,7 +3192,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
     return lck;
   } else {
     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
-      return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
+      return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
     } else {
       return *((kmp_indirect_lock_t **)user_lock);
     }
@@ -3323,12 +3335,13 @@ void __kmp_init_dynamic_user_locks() {
     return;
 
   // Initialize lock index table
-  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
-  __kmp_i_lock_table.table =
-      (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
+  __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
+  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
+      sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
   *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
       KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
   __kmp_i_lock_table.next = 0;
+  __kmp_i_lock_table.next_table = nullptr;
 
   // Indirect lock size
   __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
@@ -3393,7 +3406,6 @@ void __kmp_init_dynamic_user_locks() {
 
 // Clean up the lock table.
 void __kmp_cleanup_indirect_user_locks() {
-  kmp_lock_index_t i;
   int k;
 
   // Clean up locks in the pools first (they were already destroyed before going
@@ -3411,22 +3423,29 @@ void __kmp_cleanup_indirect_user_locks() {
     __kmp_indirect_lock_pool[k] = NULL;
   }
   // Clean up the remaining undestroyed locks.
-  for (i = 0; i < __kmp_i_lock_table.next; i++) {
-    kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
-    if (l->lock != NULL) {
-      // Locks not destroyed explicitly need to be destroyed here.
-      KMP_I_LOCK_FUNC(l, destroy)(l->lock);
-      KA_TRACE(
-          20,
-          ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
-           l));
-      __kmp_free(l->lock);
+  kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
+  while (ptr) {
+    for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
+      if (!ptr->table[row])
+        continue;
+      for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
+        kmp_indirect_lock_t *l = &ptr->table[row][col];
+        if (l->lock) {
+          // Locks not destroyed explicitly need to be destroyed here.
+          KMP_I_LOCK_FUNC(l, destroy)(l->lock);
+          KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
+                        "from table\n",
+                        l));
+          __kmp_free(l->lock);
+        }
+      }
+      __kmp_free(ptr->table[row]);
     }
+    kmp_indirect_lock_table_t *next_table = ptr->next_table;
+    if (ptr != &__kmp_i_lock_table)
+      __kmp_free(ptr);
+    ptr = next_table;
   }
-  // Free the table
-  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
-    __kmp_free(__kmp_i_lock_table.table[i]);
-  __kmp_free(__kmp_i_lock_table.table);
 
   __kmp_init_user_locks = FALSE;
 }

diff  --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 4f6ad6414e53d..90afd8fd7eb3e 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -1217,22 +1217,41 @@ extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
        ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock)                    \
        : NULL)
 
-#define KMP_I_LOCK_CHUNK                                                       \
-  1024 // number of kmp_indirect_lock_t objects to be allocated together
+// number of kmp_indirect_lock_t objects to be allocated together
+#define KMP_I_LOCK_CHUNK 1024
+// Keep at a power of 2 since it is used in multiplication & division
+KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0);
+// number of row entries in the initial lock table
+#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8
 
 // Lock table for indirect locks.
 typedef struct kmp_indirect_lock_table {
   kmp_indirect_lock_t **table; // blocks of indirect locks allocated
-  kmp_lock_index_t size; // size of the indirect lock table
+  kmp_uint32 nrow_ptrs; // number *table pointer entries in table
   kmp_lock_index_t next; // index to the next lock to be allocated
+  struct kmp_indirect_lock_table *next_table;
 } kmp_indirect_lock_table_t;
 
 extern kmp_indirect_lock_table_t __kmp_i_lock_table;
 
 // Returns the indirect lock associated with the given index.
-#define KMP_GET_I_LOCK(index)                                                  \
-  (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) +                  \
-   (index) % KMP_I_LOCK_CHUNK)
+// Returns nullptr if no lock at given index
+static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) {
+  kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
+  while (lock_table) {
+    kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK;
+    if (idx < max_locks) {
+      kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK;
+      kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK;
+      if (!lock_table->table[row] || idx >= lock_table->next)
+        break;
+      return &lock_table->table[row][col];
+    }
+    idx -= max_locks;
+    lock_table = lock_table->next_table;
+  }
+  return nullptr;
+}
 
 // Number of locks in a lock block, which is fixed to "1" now.
 // TODO: No lock block implementation now. If we do support, we need to manage
@@ -1241,8 +1260,9 @@ extern int __kmp_num_locks_in_block;
 
 // Fast lock table lookup without consistency checking
 #define KMP_LOOKUP_I_LOCK(l)                                                   \
-  ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \
-                                      : *((kmp_indirect_lock_t **)(l)))
+  ((OMP_LOCK_T_SIZE < sizeof(void *))                                          \
+       ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l))                              \
+       : *((kmp_indirect_lock_t **)(l)))
 
 // Used once in kmp_error.cpp
 extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);


        


More information about the Openmp-commits mailing list