[llvm-branch-commits] [openmp] c3b5009 - [OpenMP] Use RTM lock for OMP lock with synchronization hint
Hansang Bae via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 9 17:20:28 PST 2020
Author: Hansang Bae
Date: 2020-12-09T19:14:53-06:00
New Revision: c3b5009aa7f42ab8376b39d96e762e2d2e98ab5e
URL: https://github.com/llvm/llvm-project/commit/c3b5009aa7f42ab8376b39d96e762e2d2e98ab5e
DIFF: https://github.com/llvm/llvm-project/commit/c3b5009aa7f42ab8376b39d96e762e2d2e98ab5e.diff
LOG: [OpenMP] Use RTM lock for OMP lock with synchronization hint
This patch introduces a new RTM lock type based on spin lock which is
used for OMP lock with speculative hint on supported architecture.
Differential Revision: https://reviews.llvm.org/D92615
Added:
Modified:
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_lock.cpp
openmp/runtime/src/kmp_lock.h
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 1a8db51a667b..fbe7c3d646d6 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -1249,7 +1249,7 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
if (hint & kmp_lock_hint_hle)
return KMP_TSX_LOCK(hle);
if (hint & kmp_lock_hint_rtm)
- return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
+ return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
if (hint & kmp_lock_hint_adaptive)
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
@@ -1268,9 +1268,9 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
return lockseq_tas;
- // HLE lock for speculation
+ // Use RTM lock for speculation
if (hint & omp_lock_hint_speculative)
- return KMP_TSX_LOCK(hle);
+ return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
return __kmp_user_lock_seq;
}
@@ -1291,6 +1291,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
return kmp_mutex_impl_spin;
#if KMP_USE_TSX
case locktag_hle:
+ case locktag_rtm_spin:
return kmp_mutex_impl_speculative;
#endif
default:
@@ -1302,7 +1303,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
switch (ilock->type) {
#if KMP_USE_TSX
case locktag_adaptive:
- case locktag_rtm:
+ case locktag_rtm_queuing:
return kmp_mutex_impl_speculative;
#endif
case locktag_nested_tas:
@@ -1336,7 +1337,8 @@ static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
return kmp_mutex_impl_queuing;
#if KMP_USE_TSX
case lk_hle:
- case lk_rtm:
+ case lk_rtm_queuing:
+ case lk_rtm_spin:
case lk_adaptive:
return kmp_mutex_impl_speculative;
#endif
@@ -2144,7 +2146,8 @@ __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
kmp_dyna_lockseq_t seq) {
#if KMP_USE_TSX
// Don't have nested lock implementation for speculative locks
- if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
+ if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
+ seq == lockseq_rtm_spin || seq == lockseq_adaptive)
seq = __kmp_user_lock_seq;
#endif
switch (seq) {
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 6fa6bf060673..38b43a36fcb8 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -2764,20 +2764,22 @@ static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
}
-static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) {
+static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
__kmp_init_queuing_lock(lck);
}
-static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) {
+static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
__kmp_destroy_queuing_lock(lck);
}
-static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) {
+static void
+__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
__kmp_destroy_queuing_lock_with_checks(lck);
}
KMP_ATTRIBUTE_TARGET_RTM
-static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
+static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
+ kmp_int32 gtid) {
unsigned retries = 3, status;
do {
status = _xbegin();
@@ -2799,13 +2801,14 @@ static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
__kmp_acquire_queuing_lock(lck, gtid);
}
-static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
- kmp_int32 gtid) {
- __kmp_acquire_rtm_lock(lck, gtid);
+static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
+ kmp_int32 gtid) {
+ __kmp_acquire_rtm_queuing_lock(lck, gtid);
}
KMP_ATTRIBUTE_TARGET_RTM
-static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
+static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
+ kmp_int32 gtid) {
if (__kmp_is_unlocked_queuing_lock(lck)) {
// Releasing from speculation
_xend();
@@ -2816,13 +2819,14 @@ static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
return KMP_LOCK_RELEASED;
}
-static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
- kmp_int32 gtid) {
- return __kmp_release_rtm_lock(lck, gtid);
+static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
+ kmp_int32 gtid) {
+ return __kmp_release_rtm_queuing_lock(lck, gtid);
}
KMP_ATTRIBUTE_TARGET_RTM
-static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
+static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
+ kmp_int32 gtid) {
unsigned retries = 3, status;
do {
status = _xbegin();
@@ -2833,12 +2837,108 @@ static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
break;
} while (retries--);
- return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0;
+ return __kmp_test_queuing_lock(lck, gtid);
}
-static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
- kmp_int32 gtid) {
- return __kmp_test_rtm_lock(lck, gtid);
+static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
+ kmp_int32 gtid) {
+ return __kmp_test_rtm_queuing_lock(lck, gtid);
+}
+
+// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
+typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
+
+static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
+ KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
+}
+
+static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
+ __kmp_destroy_rtm_spin_lock(lck);
+}
+
+KMP_ATTRIBUTE_TARGET_RTM
+static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
+ kmp_int32 gtid) {
+ unsigned retries = 3, status;
+ kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
+ kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
+ do {
+ status = _xbegin();
+ if (status == _XBEGIN_STARTED) {
+ if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
+ return KMP_LOCK_ACQUIRED_FIRST;
+ _xabort(0xff);
+ }
+ if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
+ // Wait until lock becomes free
+ while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
+ KMP_YIELD(TRUE);
+ }
+ } else if (!(status & _XABORT_RETRY))
+ break;
+ } while (retries--);
+
+ // Fall-back spin lock
+ KMP_FSYNC_PREPARE(lck);
+ kmp_backoff_t backoff = __kmp_spin_backoff_params;
+ while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
+ !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
+ __kmp_spin_backoff(&backoff);
+ }
+ KMP_FSYNC_ACQUIRED(lck);
+ return KMP_LOCK_ACQUIRED_FIRST;
+}
+
+static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
+ kmp_int32 gtid) {
+ return __kmp_acquire_rtm_spin_lock(lck, gtid);
+}
+
+KMP_ATTRIBUTE_TARGET_RTM
+static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
+ kmp_int32 gtid) {
+ if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
+ // Releasing from speculation
+ _xend();
+ } else {
+ // Releasing from a real lock
+ KMP_FSYNC_RELEASING(lck);
+ KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
+ }
+ return KMP_LOCK_RELEASED;
+}
+
+static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
+ kmp_int32 gtid) {
+ return __kmp_release_rtm_spin_lock(lck, gtid);
+}
+
+KMP_ATTRIBUTE_TARGET_RTM
+static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
+ unsigned retries = 3, status;
+ kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
+ kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
+ do {
+ status = _xbegin();
+ if (status == _XBEGIN_STARTED &&
+ KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
+ return TRUE;
+ }
+ if (!(status & _XABORT_RETRY))
+ break;
+ } while (retries--);
+
+ if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
+ __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
+ KMP_FSYNC_ACQUIRED(lck);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
+ kmp_int32 gtid) {
+ return __kmp_test_rtm_spin_lock(lck, gtid);
}
#endif // KMP_USE_TSX
@@ -3124,7 +3224,7 @@ static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
}
#endif
#if KMP_USE_TSX
- if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
+ if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
seq = lockseq_queuing;
}
#endif
@@ -3266,7 +3366,7 @@ void __kmp_init_dynamic_user_locks() {
#endif
__kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
#if KMP_USE_TSX
- __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
+ __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
#endif
__kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
#if KMP_USE_FUTEX
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index b80e54777e8c..3b70f95c7c56 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -587,7 +587,8 @@ enum kmp_lock_kind {
#endif
#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
lk_hle,
- lk_rtm,
+ lk_rtm_queuing,
+ lk_rtm_spin,
#endif
lk_ticket,
lk_queuing,
@@ -1041,19 +1042,19 @@ extern void __kmp_cleanup_user_locks();
// All nested locks are indirect lock types.
#if KMP_USE_TSX
#if KMP_USE_FUTEX
-#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a)
+#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a)
#define KMP_FOREACH_I_LOCK(m, a) \
- m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
+ m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \
m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
m(nested_queuing, a) m(nested_drdpa, a)
#else
-#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a)
+#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) m(rtm_spin, a)
#define KMP_FOREACH_I_LOCK(m, a) \
- m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
+ m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \
m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \
m(nested_drdpa, a)
#endif // KMP_USE_FUTEX
-#define KMP_LAST_D_LOCK lockseq_hle
+#define KMP_LAST_D_LOCK lockseq_rtm_spin
#else
#if KMP_USE_FUTEX
#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a)
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index f6d4524150f0..6e8b3e5836ec 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -1630,7 +1630,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
-#if USE_ITT_BUILD
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
KMP_ITT_DEBUG) &&
__kmp_forkjoin_frames_mode == 3 &&
@@ -1644,7 +1644,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// create new stack stitching id before entering fork barrier
parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
}
-#endif /* USE_ITT_BUILD */
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n",
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index bc4b4fc9945d..d88324e2c7d5 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -4102,15 +4102,24 @@ static void __kmp_stg_parse_lock_kind(char const *name, char const *value,
}
#endif // KMP_USE_ADAPTIVE_LOCKS
#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
- else if (__kmp_str_match("rtm", 1, value)) {
+ else if (__kmp_str_match("rtm_queuing", 1, value)) {
if (__kmp_cpuinfo.rtm) {
- __kmp_user_lock_kind = lk_rtm;
- KMP_STORE_LOCK_SEQ(rtm);
+ __kmp_user_lock_kind = lk_rtm_queuing;
+ KMP_STORE_LOCK_SEQ(rtm_queuing);
} else {
KMP_WARNING(AdaptiveNotSupported, name, value);
__kmp_user_lock_kind = lk_queuing;
KMP_STORE_LOCK_SEQ(queuing);
}
+ } else if (__kmp_str_match("rtm_spin", 1, value)) {
+ if (__kmp_cpuinfo.rtm) {
+ __kmp_user_lock_kind = lk_rtm_spin;
+ KMP_STORE_LOCK_SEQ(rtm_spin);
+ } else {
+ KMP_WARNING(AdaptiveNotSupported, name, value);
+ __kmp_user_lock_kind = lk_tas;
+ KMP_STORE_LOCK_SEQ(queuing);
+ }
} else if (__kmp_str_match("hle", 1, value)) {
__kmp_user_lock_kind = lk_hle;
KMP_STORE_LOCK_SEQ(hle);
@@ -4141,8 +4150,12 @@ static void __kmp_stg_print_lock_kind(kmp_str_buf_t *buffer, char const *name,
#endif
#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
- case lk_rtm:
- value = "rtm";
+ case lk_rtm_queuing:
+ value = "rtm_queuing";
+ break;
+
+ case lk_rtm_spin:
+ value = "rtm_spin";
break;
case lk_hle:
More information about the llvm-branch-commits
mailing list