[flang-commits] [flang] [Flang][runtime] Distinguish CPU time and elapsed time for cpu_time and system_clock (PR #96652)
Michael Klemm via flang-commits
flang-commits at lists.llvm.org
Thu Jun 27 09:22:18 PDT 2024
https://github.com/mjklemm updated https://github.com/llvm/llvm-project/pull/96652
>From bc31456640341e8e6b2a05ab8c1b5d303e7c107a Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Tue, 25 Jun 2024 16:48:22 +0200
Subject: [PATCH 1/9] Distinguish CPU time and elapsed time
---
flang/runtime/time-intrinsic.cpp | 39 ++++++++++++++++++++------------
1 file changed, 24 insertions(+), 15 deletions(-)
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp
index a141fe63764a7..2b2c8a3c34e4d 100644
--- a/flang/runtime/time-intrinsic.cpp
+++ b/flang/runtime/time-intrinsic.cpp
@@ -64,20 +64,29 @@ template <typename Unused = void> double GetCpuTime(fallback_implementation) {
// clock_gettime is implemented in the pthread library for MinGW.
// Using it here would mean that all programs that link libFortranRuntime are
// required to also link to pthread. Instead, don't use the function.
-#undef CLOCKID
-#elif defined CLOCK_PROCESS_CPUTIME_ID
-#define CLOCKID CLOCK_PROCESS_CPUTIME_ID
+#undef CLOCKID_CPU_TIME
+#undef CLOCKID_ELAPSED_TIME
+#else
+// Determine what clock to use for CPU time.
+#if defined CLOCK_PROCESS_CPUTIME_ID
+#define CLOCKID_CPU_TIME CLOCK_PROCESS_CPUTIME_ID
#elif defined CLOCK_THREAD_CPUTIME_ID
-#define CLOCKID CLOCK_THREAD_CPUTIME_ID
-#elif defined CLOCK_MONOTONIC
-#define CLOCKID CLOCK_MONOTONIC
+#define CLOCKID_CPU_TIME CLOCK_THREAD_CPUTIME_ID
+#else
+#undef CLOCKID_CPU_TIME
+#endif
+
+// Determine what clock to use for elapsed time.
+#if defined CLOCK_MONOTONIC
+#define CLOCKID_ELAPSED_TIME CLOCK_MONOTONIC
#elif defined CLOCK_REALTIME
-#define CLOCKID CLOCK_REALTIME
+#define CLOCKID_ELAPSED_TIME CLOCK_REALTIME
#else
-#undef CLOCKID
+#undef CLOCKID_ELAPSED_TIME
+#endif
#endif
-#ifdef CLOCKID
+#ifdef CLOCKID_CPU_TIME
// POSIX implementation using clock_gettime. This is only enabled where
// clock_gettime is available.
template <typename T = int, typename U = struct timespec>
@@ -86,13 +95,13 @@ double GetCpuTime(preferred_implementation,
T ClockId = 0, U *Timespec = nullptr,
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
- if (clock_gettime(CLOCKID, &tspec) == 0) {
+ if (clock_gettime(CLOCKID_CPU_TIME, &tspec) == 0) {
return tspec.tv_nsec * 1.0e-9 + tspec.tv_sec;
}
// Return some negative value to represent failure.
return -1.0;
}
-#endif
+#endif // CLOCKID_CPU_TIME
using count_t = std::int64_t;
using unsigned_count_t = std::uint64_t;
@@ -149,15 +158,15 @@ constexpr unsigned_count_t DS_PER_SEC{10u};
constexpr unsigned_count_t MS_PER_SEC{1'000u};
constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
-#ifdef CLOCKID
+#ifdef CLOCKID_ELAPSED_TIME
template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCount(int kind, preferred_implementation,
// We need some dummy parameters to pass to decltype(clock_gettime).
T ClockId = 0, U *Timespec = nullptr,
- decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
+ decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
const unsigned_count_t huge{GetHUGE(kind)};
- if (clock_gettime(CLOCKID, &tspec) != 0) {
+ if (clock_gettime(CLOCKID_ELAPSED_TIME, &tspec) != 0) {
return -huge; // failure
}
unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
@@ -170,7 +179,7 @@ count_t GetSystemClockCount(int kind, preferred_implementation,
return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
}
}
-#endif
+#endif // CLOCKID_ELAPSED_TIME
template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCountRate(int kind, preferred_implementation,
>From 9ec6a00340b0b9083fe1a63e2b4331d9e7c11f40 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Wed, 26 Jun 2024 14:06:31 +0200
Subject: [PATCH 2/9] Rework fallback implementation to use C++11 high-res
clock
---
flang/runtime/time-intrinsic.cpp | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp
index 2b2c8a3c34e4d..f8ea2e3e4d2d1 100644
--- a/flang/runtime/time-intrinsic.cpp
+++ b/flang/runtime/time-intrinsic.cpp
@@ -22,6 +22,7 @@
#ifdef _WIN32
#include "flang/Common/windows-include.h"
#else
+#include <chrono>
#include <sys/time.h> // gettimeofday
#include <sys/times.h>
#include <unistd.h>
@@ -114,26 +115,18 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) {
return (unsigned_count_t{1} << ((8 * kind) - 1)) - 1;
}
-// This is the fallback implementation, which should work everywhere. Note that
-// in general we can't recover after std::clock has reached its maximum value.
+// This is the fallback implementation, which should work everywhere.
template <typename Unused = void>
count_t GetSystemClockCount(int kind, fallback_implementation) {
- std::clock_t timestamp{std::clock()};
- if (timestamp == static_cast<std::clock_t>(-1)) {
+ unsigned_count_t timestamp;
+ timestamp = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+ if (timestamp == static_cast<unsigned_count_t>(-1)) {
// Return -HUGE(COUNT) to represent failure.
return -static_cast<count_t>(GetHUGE(kind));
}
- // Convert the timestamp to std::uint64_t with wrap-around. The timestamp is
- // most likely a floating-point value (since C'11), so compute the modulus
- // carefully when one is required.
- constexpr auto maxUnsignedCount{std::numeric_limits<unsigned_count_t>::max()};
- if constexpr (std::numeric_limits<std::clock_t>::max() > maxUnsignedCount) {
- timestamp -= maxUnsignedCount * std::floor(timestamp / maxUnsignedCount);
- }
- unsigned_count_t unsignedCount{static_cast<unsigned_count_t>(timestamp)};
// Return the modulus of the unsigned integral count with HUGE(COUNT)+1.
// The result is a signed integer but never negative.
- return static_cast<count_t>(unsignedCount % (GetHUGE(kind) + 1));
+ return static_cast<count_t>(timestamp % (GetHUGE(kind) + 1));
}
template <typename Unused = void>
>From 850589dc942c56050b9ee84c08381c124ad3f2ad Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Wed, 26 Jun 2024 14:08:19 +0200
Subject: [PATCH 3/9] Make clang-format happy
---
flang/runtime/time-intrinsic.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp
index f8ea2e3e4d2d1..dcc6a05124937 100644
--- a/flang/runtime/time-intrinsic.cpp
+++ b/flang/runtime/time-intrinsic.cpp
@@ -119,7 +119,8 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) {
template <typename Unused = void>
count_t GetSystemClockCount(int kind, fallback_implementation) {
unsigned_count_t timestamp;
- timestamp = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+ timestamp =
+ std::chrono::high_resolution_clock::now().time_since_epoch().count();
if (timestamp == static_cast<unsigned_count_t>(-1)) {
// Return -HUGE(COUNT) to represent failure.
return -static_cast<count_t>(GetHUGE(kind));
@@ -156,7 +157,7 @@ template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCount(int kind, preferred_implementation,
// We need some dummy parameters to pass to decltype(clock_gettime).
T ClockId = 0, U *Timespec = nullptr,
- decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
+ decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
const unsigned_count_t huge{GetHUGE(kind)};
if (clock_gettime(CLOCKID_ELAPSED_TIME, &tspec) != 0) {
>From 7d0dbb0028ff0b4f6bef6dc47dda120e0ee2671e Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Wed, 26 Jun 2024 16:32:06 +0200
Subject: [PATCH 4/9] Add SystemClockCount to the no-cpp-dep.c test
---
flang/test/Runtime/no-cpp-dep.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c
index 654bebed345b1..606a5d189f719 100644
--- a/flang/test/Runtime/no-cpp-dep.c
+++ b/flang/test/Runtime/no-cpp-dep.c
@@ -30,6 +30,7 @@ int32_t RTNAME(ArgumentCount)();
int32_t RTNAME(GetCommandArgument)(int32_t, const struct Descriptor *,
const struct Descriptor *, const struct Descriptor *);
int32_t RTNAME(GetEnvVariable)();
+int64_t RTNAME(SystemClockCount)(int kind);
int main() {
double x = RTNAME(CpuTime)();
@@ -37,5 +38,6 @@ int main() {
int32_t c = RTNAME(ArgumentCount)();
int32_t v = RTNAME(GetCommandArgument)(0, 0, 0, 0);
int32_t e = RTNAME(GetEnvVariable)("FOO", 0, 0);
+ int64_t t = RTNAME(SystemClockCount)(8);
return x + c + v + e;
}
>From 82ca5e25bdc4ddb26d47b5e444c73617623b551f Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Wed, 26 Jun 2024 17:37:38 +0200
Subject: [PATCH 5/9] Fix Windows build
---
flang/runtime/time-intrinsic.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp
index dcc6a05124937..9bce6b6866127 100644
--- a/flang/runtime/time-intrinsic.cpp
+++ b/flang/runtime/time-intrinsic.cpp
@@ -19,10 +19,10 @@
#include <cstdlib>
#include <cstring>
#include <ctime>
+#include <chrono>
#ifdef _WIN32
#include "flang/Common/windows-include.h"
#else
-#include <chrono>
#include <sys/time.h> // gettimeofday
#include <sys/times.h>
#include <unistd.h>
@@ -132,7 +132,7 @@ count_t GetSystemClockCount(int kind, fallback_implementation) {
template <typename Unused = void>
count_t GetSystemClockCountRate(int kind, fallback_implementation) {
- return CLOCKS_PER_SEC;
+ return std::chrono::high_resolution_clock::period::den;
}
template <typename Unused = void>
>From f55b5fa7707b698a5ecea8e35ce69d3d49e4265e Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Wed, 26 Jun 2024 18:44:24 +0200
Subject: [PATCH 6/9] Test if system_clock() is returning a meaningful result
---
flang/test/Runtime/system_clock.f90 | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
create mode 100644 flang/test/Runtime/system_clock.f90
diff --git a/flang/test/Runtime/system_clock.f90 b/flang/test/Runtime/system_clock.f90
new file mode 100644
index 0000000000000..d9f5333773a94
--- /dev/null
+++ b/flang/test/Runtime/system_clock.f90
@@ -0,0 +1,27 @@
+! RUN: %flang -o %t %s
+! RUN: %t
+
+program system_clock_test
+ use iso_fortran_env, only: int64, real64
+ implicit none
+
+ integer, parameter :: delta = 1
+ real(kind=real64), parameter :: epsilon = 0.001
+
+ integer(kind=int64) :: t_start, t_end
+ integer(kind=int64) :: rate
+ real(kind=real64) :: diff
+
+ call system_clock(count_rate=rate)
+
+ call system_clock(t_start)
+ call sleep(delta)
+ call system_clock(t_end)
+
+ diff = real(t_end - t_start, kind=real64) / real(rate, kind=real64)
+
+ if (abs(diff - real(delta, kind=real64)) <= epsilon) then
+ stop 0, quiet=.true.
+ end if
+ stop 1, quiet=.true.
+end program system_clock_test
>From c9d09594cc3eef8f3120613d4e0ab2995ab57543 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Thu, 27 Jun 2024 14:36:46 +0200
Subject: [PATCH 7/9] Reduce precision requirement for the test
---
flang/test/Runtime/system_clock.f90 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/test/Runtime/system_clock.f90 b/flang/test/Runtime/system_clock.f90
index d9f5333773a94..cd8b1e3d54750 100644
--- a/flang/test/Runtime/system_clock.f90
+++ b/flang/test/Runtime/system_clock.f90
@@ -6,7 +6,7 @@ program system_clock_test
implicit none
integer, parameter :: delta = 1
- real(kind=real64), parameter :: epsilon = 0.001
+ real(kind=real64), parameter :: epsilon = 0.1
integer(kind=int64) :: t_start, t_end
integer(kind=int64) :: rate
>From a742c2f2871f411538bb6cd1e0b606977d6790e3 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Thu, 27 Jun 2024 16:57:42 +0200
Subject: [PATCH 8/9] Dump std::chrono and use timespec_get instead
With commit, some functions seem to be very close to each other, so some cleanup might be desired.
---
flang/runtime/time-intrinsic.cpp | 42 +++++++++++++++++++-------------
1 file changed, 25 insertions(+), 17 deletions(-)
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp
index 9bce6b6866127..b494c7436f7b5 100644
--- a/flang/runtime/time-intrinsic.cpp
+++ b/flang/runtime/time-intrinsic.cpp
@@ -107,6 +107,15 @@ double GetCpuTime(preferred_implementation,
using count_t = std::int64_t;
using unsigned_count_t = std::uint64_t;
+// POSIX implementation using clock_gettime where available. The clock_gettime
+// result is in nanoseconds, which is converted as necessary to
+// - deciseconds for kind 1
+// - milliseconds for kinds 2, 4
+// - nanoseconds for kinds 8, 16
+constexpr unsigned_count_t DS_PER_SEC{10u};
+constexpr unsigned_count_t MS_PER_SEC{1'000u};
+constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
+
// Computes HUGE(INT(0,kind)) as an unsigned integer value.
static constexpr inline unsigned_count_t GetHUGE(int kind) {
if (kind > 8) {
@@ -118,21 +127,29 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) {
// This is the fallback implementation, which should work everywhere.
template <typename Unused = void>
count_t GetSystemClockCount(int kind, fallback_implementation) {
- unsigned_count_t timestamp;
- timestamp =
- std::chrono::high_resolution_clock::now().time_since_epoch().count();
- if (timestamp == static_cast<unsigned_count_t>(-1)) {
+ std::timespec tspec;
+
+ if (std::timespec_get(&tspec, TIME_UTC) < 0) {
// Return -HUGE(COUNT) to represent failure.
return -static_cast<count_t>(GetHUGE(kind));
}
- // Return the modulus of the unsigned integral count with HUGE(COUNT)+1.
- // The result is a signed integer but never negative.
- return static_cast<count_t>(timestamp % (GetHUGE(kind) + 1));
+
+ // compute the timestamp as seconds plus nanoseconds
+ const unsigned_count_t huge{GetHUGE(kind)};
+ unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
+ unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
+ if (kind >= 8) {
+ return (sec * NS_PER_SEC + nsec) % (huge + 1);
+ } else if (kind >= 2) {
+ return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
+ } else { // kind == 1
+ return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
+ }
}
template <typename Unused = void>
count_t GetSystemClockCountRate(int kind, fallback_implementation) {
- return std::chrono::high_resolution_clock::period::den;
+ return kind >= 8 ? NS_PER_SEC : kind >= 2 ? MS_PER_SEC : DS_PER_SEC;
}
template <typename Unused = void>
@@ -143,15 +160,6 @@ count_t GetSystemClockCountMax(int kind, fallback_implementation) {
: static_cast<count_t>(maxCount);
}
-// POSIX implementation using clock_gettime where available. The clock_gettime
-// result is in nanoseconds, which is converted as necessary to
-// - deciseconds for kind 1
-// - milliseconds for kinds 2, 4
-// - nanoseconds for kinds 8, 16
-constexpr unsigned_count_t DS_PER_SEC{10u};
-constexpr unsigned_count_t MS_PER_SEC{1'000u};
-constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
-
#ifdef CLOCKID_ELAPSED_TIME
template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCount(int kind, preferred_implementation,
>From 940bddfab476331e3f28d8a9869072a2d2de72bc Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Thu, 27 Jun 2024 18:22:02 +0200
Subject: [PATCH 9/9] Remove constantly failing test
---
flang/test/Runtime/system_clock.f90 | 27 ---------------------------
1 file changed, 27 deletions(-)
delete mode 100644 flang/test/Runtime/system_clock.f90
diff --git a/flang/test/Runtime/system_clock.f90 b/flang/test/Runtime/system_clock.f90
deleted file mode 100644
index cd8b1e3d54750..0000000000000
--- a/flang/test/Runtime/system_clock.f90
+++ /dev/null
@@ -1,27 +0,0 @@
-! RUN: %flang -o %t %s
-! RUN: %t
-
-program system_clock_test
- use iso_fortran_env, only: int64, real64
- implicit none
-
- integer, parameter :: delta = 1
- real(kind=real64), parameter :: epsilon = 0.1
-
- integer(kind=int64) :: t_start, t_end
- integer(kind=int64) :: rate
- real(kind=real64) :: diff
-
- call system_clock(count_rate=rate)
-
- call system_clock(t_start)
- call sleep(delta)
- call system_clock(t_end)
-
- diff = real(t_end - t_start, kind=real64) / real(rate, kind=real64)
-
- if (abs(diff - real(delta, kind=real64)) <= epsilon) then
- stop 0, quiet=.true.
- end if
- stop 1, quiet=.true.
-end program system_clock_test
More information about the flang-commits
mailing list