[libcxx-commits] [libcxx] 60ce987 - [RISCV][PowerPC] Fix google/benchmark benchmark::cycleclock::Now

Luís Marques via libcxx-commits libcxx-commits at lists.llvm.org
Sat Apr 18 01:32:54 PDT 2020


Author: Luís Marques
Date: 2020-04-18T09:32:19+01:00
New Revision: 60ce987bf31d6249c1f19e454f53b9d228925dcd

URL: https://github.com/llvm/llvm-project/commit/60ce987bf31d6249c1f19e454f53b9d228925dcd
DIFF: https://github.com/llvm/llvm-project/commit/60ce987bf31d6249c1f19e454f53b9d228925dcd.diff

LOG: [RISCV][PowerPC] Fix google/benchmark benchmark::cycleclock::Now

Cherrypick the upstream fix commit a77d5f7 onto llvm/utils/benchmark
and libcxx/utils/google-benchmark.
This fixes LLVM's 32-bit RISC-V compilation, and the issues
mentioned in https://github.com/google/benchmark/pull/955
An additional cherrypick of ecc1685 fixes some minor formatting
issues introduced by the preceding commit.

Differential Revision: https://reviews.llvm.org/D78084

Added: 
    

Modified: 
    libcxx/utils/google-benchmark/README.LLVM
    libcxx/utils/google-benchmark/src/cycleclock.h
    llvm/utils/benchmark/README.LLVM
    llvm/utils/benchmark/src/cycleclock.h

Removed: 
    


################################################################################
diff  --git a/libcxx/utils/google-benchmark/README.LLVM b/libcxx/utils/google-benchmark/README.LLVM
index 05c5e3ee3432..ea92eee202ce 100644
--- a/libcxx/utils/google-benchmark/README.LLVM
+++ b/libcxx/utils/google-benchmark/README.LLVM
@@ -14,3 +14,11 @@ Changes:
   is applied on top of
   https://github.com/google/benchmark/commit/4528c76b718acc9b57956f63069c699ae21edcab
   to fix cross-build from linux to windows via MinGW.
+* https://github.com/google/benchmark/commit/a77d5f70efaebe2b7e8c10134526a23a7ce7ef35
+  and
+  https://github.com/google/benchmark/commit/ecc1685340f58f7fe6b707036bc0bb1fccabb0c1
+  are applied on top of
+  https://github.com/google/benchmark/commit/8e48105d465c586068dd8e248fe75a8971c6ba3a
+  to fix timestamp-related inline asm issues and 32-bit RISC-V build failures.
+  The second cherrypicked commit fixes formatting issues introduced by the
+  preceding change.

diff  --git a/libcxx/utils/google-benchmark/src/cycleclock.h b/libcxx/utils/google-benchmark/src/cycleclock.h
index d5d62c4c7fe1..179c67cd614a 100644
--- a/libcxx/utils/google-benchmark/src/cycleclock.h
+++ b/libcxx/utils/google-benchmark/src/cycleclock.h
@@ -84,13 +84,21 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   return (high << 32) | low;
 #elif defined(__powerpc__) || defined(__ppc__)
   // This returns a time-base, which is not always precisely a cycle-count.
-  int64_t tbl, tbu0, tbu1;
-  asm("mftbu %0" : "=r"(tbu0));
-  asm("mftb  %0" : "=r"(tbl));
-  asm("mftbu %0" : "=r"(tbu1));
-  tbl &= -static_cast<int64_t>(tbu0 == tbu1);
-  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
-  return (tbu1 << 32) | tbl;
+#if defined(__powerpc64__) || defined(__ppc64__)
+  int64_t tb;
+  asm volatile("mfspr %0, 268" : "=r"(tb));
+  return tb;
+#else
+  uint32_t tbl, tbu0, tbu1;
+  asm volatile(
+      "mftbu %0\n"
+      "mftbl %1\n"
+      "mftbu %2"
+      : "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
+  tbl &= -static_cast<int32_t>(tbu0 == tbu1);
+  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is no longer needed)
+  return (static_cast<uint64_t>(tbu1) << 32) | tbl;
+#endif
 #elif defined(__sparc__)
   int64_t tick;
   asm(".byte 0x83, 0x41, 0x00, 0x00");
@@ -167,16 +175,22 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__riscv) // RISC-V
   // Use RDCYCLE (and RDCYCLEH on riscv32)
 #if __riscv_xlen == 32
-  uint64_t cycles_low, cycles_hi0, cycles_hi1;
-  asm("rdcycleh %0" : "=r"(cycles_hi0));
-  asm("rdcycle %0" : "=r"(cycles_lo));
-  asm("rdcycleh %0" : "=r"(cycles_hi1));
-  // This matches the PowerPC overflow detection, above
-  cycles_lo &= -static_cast<int64_t>(cycles_hi0 == cycles_hi1);
-  return (cycles_hi1 << 32) | cycles_lo;
+  uint32_t cycles_lo, cycles_hi0, cycles_hi1;
+  // This asm also includes the PowerPC overflow handling strategy, as above.
+  // Implemented in assembly because Clang insisted on branching.
+  asm volatile(
+      "rdcycleh %0\n"
+      "rdcycle %1\n"
+      "rdcycleh %2\n"
+      "sub %0, %0, %2\n"
+      "seqz %0, %0\n"
+      "sub %0, zero, %0\n"
+      "and %1, %1, %0\n"
+      : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
+  return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
 #else
   uint64_t cycles;
-  asm("rdcycle %0" : "=r"(cycles));
+  asm volatile("rdcycle %0" : "=r"(cycles));
   return cycles;
 #endif
 #else

diff  --git a/llvm/utils/benchmark/README.LLVM b/llvm/utils/benchmark/README.LLVM
index e3668ce5c082..b370925b9543 100644
--- a/llvm/utils/benchmark/README.LLVM
+++ b/llvm/utils/benchmark/README.LLVM
@@ -27,3 +27,9 @@ Changes:
   is applied on top of v1.4.1 to add RISC-V timer support.
 * https://github.com/google/benchmark/commit/8e48105d465c586068dd8e248fe75a8971c6ba3a
   is applied on top of v1.4.1 to fix cross-build from linux to windows via MinGW.
+* https://github.com/google/benchmark/commit/a77d5f70efaebe2b7e8c10134526a23a7ce7ef35
+  and
+  https://github.com/google/benchmark/commit/ecc1685340f58f7fe6b707036bc0bb1fccabb0c1
+  are applied on top of the previous cherrypick to fix timestamp-related inline
+  asm issues and 32-bit RISC-V build failures. The second cherrypicked commit
+  fixes formatting issues introduced by the preceding change.

diff  --git a/llvm/utils/benchmark/src/cycleclock.h b/llvm/utils/benchmark/src/cycleclock.h
index 7b54b2530378..1b0f09359c9b 100644
--- a/llvm/utils/benchmark/src/cycleclock.h
+++ b/llvm/utils/benchmark/src/cycleclock.h
@@ -84,13 +84,21 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   return (high << 32) | low;
 #elif defined(__powerpc__) || defined(__ppc__)
   // This returns a time-base, which is not always precisely a cycle-count.
-  int64_t tbl, tbu0, tbu1;
-  asm("mftbu %0" : "=r"(tbu0));
-  asm("mftb  %0" : "=r"(tbl));
-  asm("mftbu %0" : "=r"(tbu1));
-  tbl &= -static_cast<int64_t>(tbu0 == tbu1);
-  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
-  return (tbu1 << 32) | tbl;
+#if defined(__powerpc64__) || defined(__ppc64__)
+  int64_t tb;
+  asm volatile("mfspr %0, 268" : "=r"(tb));
+  return tb;
+#else
+  uint32_t tbl, tbu0, tbu1;
+  asm volatile(
+      "mftbu %0\n"
+      "mftbl %1\n"
+      "mftbu %2"
+      : "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
+  tbl &= -static_cast<int32_t>(tbu0 == tbu1);
+  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is no longer needed)
+  return (static_cast<uint64_t>(tbu1) << 32) | tbl;
+#endif
 #elif defined(__sparc__)
   int64_t tick;
   asm(".byte 0x83, 0x41, 0x00, 0x00");
@@ -167,16 +175,22 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__riscv) // RISC-V
   // Use RDCYCLE (and RDCYCLEH on riscv32)
 #if __riscv_xlen == 32
-  uint64_t cycles_low, cycles_hi0, cycles_hi1;
-  asm("rdcycleh %0" : "=r"(cycles_hi0));
-  asm("rdcycle %0" : "=r"(cycles_lo));
-  asm("rdcycleh %0" : "=r"(cycles_hi1));
-  // This matches the PowerPC overflow detection, above
-  cycles_lo &= -static_cast<int64_t>(cycles_hi0 == cycles_hi1);
-  return (cycles_hi1 << 32) | cycles_lo;
+  uint32_t cycles_lo, cycles_hi0, cycles_hi1;
+  // This asm also includes the PowerPC overflow handling strategy, as above.
+  // Implemented in assembly because Clang insisted on branching.
+  asm volatile(
+      "rdcycleh %0\n"
+      "rdcycle %1\n"
+      "rdcycleh %2\n"
+      "sub %0, %0, %2\n"
+      "seqz %0, %0\n"
+      "sub %0, zero, %0\n"
+      "and %1, %1, %0\n"
+      : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
+  return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
 #else
   uint64_t cycles;
-  asm("rdcycle %0" : "=r"(cycles));
+  asm volatile("rdcycle %0" : "=r"(cycles));
   return cycles;
 #endif
 #else


        


More information about the libcxx-commits mailing list