[libcxx-commits] [libcxx] [libc++][chrono] Loads tzdata.zi in tzdb. (PR #74928)

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Tue Jan 30 09:29:40 PST 2024


================
@@ -96,6 +121,369 @@ static void __matches(istream& __input, string_view __expected) {
   }
 }
 
+[[nodiscard]] static int64_t __parse_integral(istream& __input, bool __leading_zero_allowed) {
+  int64_t __result = __input.get();
+  if (__leading_zero_allowed) {
+    if (__result < '0' || __result > '9')
+      std::__throw_runtime_error("corrupt tzdb: expected a digit");
+  } else {
+    if (__result < '1' || __result > '9')
+      std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit");
+  }
+  __result -= '0';
+  while (true) {
+    if (__input.peek() < '0' || __input.peek() > '9')
+      return __result;
+
+    // In order to avoid possible overflows we limit the accepted range.
+    // Most values parsed are expected to be very small:
+    // - 8784 hours in a year
+    // - 31 days in a month
+    // - year no real maximum, these values are expected to be less than
+    //   the range of the year type.
+    //
+    // However the leapseconds use a seconds after epoch value. Using an
+    // int would run into an overflow in 2038. By using a 64-bit value
+    // the range is large enough for the bilions of years. Limiting that
+    // range slightly to make the code easier is not an issue.
+    if (__result > (std::numeric_limits<int64_t>::max() / 16))
+      std::__throw_runtime_error("corrupt tzdb: integral too large");
+
+    __result *= 10;
+    __result += __input.get() - '0';
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                          Calendar
+//===----------------------------------------------------------------------===//
+
+[[nodiscard]] static day __parse_day(istream& __input) {
+  unsigned __result = chrono::__parse_integral(__input, false);
+  if (__result > 31)
+    std::__throw_runtime_error("corrupt tzdb day: value too large");
+  return day{__result};
+}
+
+[[nodiscard]] static weekday __parse_weekday(istream& __input) {
+  // TZDB allows the shortest unique name.
+  switch (std::tolower(__input.get())) {
+  case 'f':
+    chrono::__skip(__input, "riday");
+    return Friday;
+
+  case 'm':
+    chrono::__skip(__input, "onday");
+    return Monday;
+
+  case 's':
+    switch (std::tolower(__input.get())) {
+    case 'a':
+      chrono::__skip(__input, "turday");
+      return Saturday;
+
+    case 'u':
+      chrono::__skip(__input, "unday");
+      return Sunday;
+    }
+    break;
+
+  case 't':
+    switch (std::tolower(__input.get())) {
+    case 'h':
+      chrono::__skip(__input, "ursday");
+      return Thursday;
+
+    case 'u':
+      chrono::__skip(__input, "esday");
+      return Tuesday;
+    }
+    break;
+  case 'w':
+    chrono::__skip(__input, "ednesday");
+    return Wednesday;
+  }
+
+  std::__throw_runtime_error("corrupt tzdb weekday: invalid name");
+}
+
+[[nodiscard]] static month __parse_month(istream& __input) {
+  // TZDB allows the shortest unique name.
+  switch (std::tolower(__input.get())) {
+  case 'a':
+    switch (std::tolower(__input.get())) {
+    case 'p':
+      chrono::__skip(__input, "ril");
+      return April;
+
+    case 'u':
+      chrono::__skip(__input, "gust");
+      return August;
+    }
+    break;
+
+  case 'd':
+    chrono::__skip(__input, "ecember");
+    return December;
+
+  case 'f':
+    chrono::__skip(__input, "ebruary");
+    return February;
+
+  case 'j':
+    switch (std::tolower(__input.get())) {
+    case 'a':
+      chrono::__skip(__input, "nuary");
+      return January;
+
+    case 'u':
+      switch (std::tolower(__input.get())) {
+      case 'n':
+        chrono::__skip(__input, 'e');
+        return June;
+
+      case 'l':
+        chrono::__skip(__input, 'y');
+        return July;
+      }
+    }
+    break;
+
+  case 'm':
+    if (std::tolower(__input.get()) == 'a')
+      switch (std::tolower(__input.get())) {
+      case 'y':
+        return May;
+
+      case 'r':
+        chrono::__skip(__input, "ch");
+        return March;
+      }
+    break;
+
+  case 'n':
+    chrono::__skip(__input, "ovember");
+    return November;
+
+  case 'o':
+    chrono::__skip(__input, "ctober");
+    return October;
+
+  case 's':
+    chrono::__skip(__input, "eptember");
+    return September;
+  }
+  std::__throw_runtime_error("corrupt tzdb month: invalid name");
+}
+
+[[nodiscard]] static year __parse_year_value(istream& __input) {
+  bool __negative = __input.peek() == '-';
+  if (__negative) [[unlikely]]
+    __input.get();
+
+  static_assert(year::min() == -year::max());
+  int __result = std::min<int64_t>(__parse_integral(__input, true), static_cast<int>(year::max()));
----------------
ldionne wrote:

If we parse an integral larger than `year::max()`, my understanding is that we'll silently read it as `year::max()` instead. Is there a reason for not throwing an exception here instead? Given that we have an implementation limit based on `chrono::year`, I think it makes sense to error out in *some* way at least. We could also `assert` here, I think that would be reasonable, but throwing an exception might provide a better user experience.

Silently clamping to `year::max()` seems like a really surprising behavior to me.

https://github.com/llvm/llvm-project/pull/74928


More information about the libcxx-commits mailing list