[clang] [clang] [Gnu] Improve GCCVersion parsing to match versions such as "10-win32" (PR #69079)

Martin Storsjö via cfe-commits cfe-commits at lists.llvm.org
Sat Oct 14 14:08:21 PDT 2023


https://github.com/mstorsjo created https://github.com/llvm/llvm-project/pull/69079

In earlier GCC versions, the Debian/Ubuntu provided mingw toolchains were packaged in /usr/lib/gcc/<triple> with version strings such as "5.3-win32", which were matched and found since 6afcd64eb65fca233a7b173f88cffb2c2c9c114c. However in recent versions, they have stopped including the minor version number and only have version strings such as "10-win32" and "10-posix".

Generalize the parsing code to tolerate the patch suffix to be present on a version number with only a major number.

Refactor the string parsing code to highlight the overall structure of the parsing. This implementation should yield the same result as before, except for when there's only one segment and it has trailing, non-number contents.

This allows Clang to find the GCC libraries and headers in Debian/Ubuntu provided MinGW cross compilers.

>From 2b127200dc7b7b7c60e3001c7acf49a33a22e2a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Sat, 14 Oct 2023 00:06:05 +0300
Subject: [PATCH 1/2] [clang] [unittest] Add a test for
 Generic_GCC::GCCVersion::Parse

This adds actual test cases for all the cases that are listed in
a code comment in the implementation of this function; having such
test coverage eases doing further modifications to the function.
---
 clang/unittests/Driver/CMakeLists.txt     |  1 +
 clang/unittests/Driver/GCCVersionTest.cpp | 48 +++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 clang/unittests/Driver/GCCVersionTest.cpp

diff --git a/clang/unittests/Driver/CMakeLists.txt b/clang/unittests/Driver/CMakeLists.txt
index e37c158d7137a88..752037f78fb147d 100644
--- a/clang/unittests/Driver/CMakeLists.txt
+++ b/clang/unittests/Driver/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS
 add_clang_unittest(ClangDriverTests
   DistroTest.cpp
   DXCModeTest.cpp
+  GCCVersionTest.cpp
   ToolChainTest.cpp
   ModuleCacheTest.cpp
   MultilibBuilderTest.cpp
diff --git a/clang/unittests/Driver/GCCVersionTest.cpp b/clang/unittests/Driver/GCCVersionTest.cpp
new file mode 100644
index 000000000000000..ef05a0b4fe734e5
--- /dev/null
+++ b/clang/unittests/Driver/GCCVersionTest.cpp
@@ -0,0 +1,48 @@
+//===- unittests/Driver/GCCVersionTest.cpp --- GCCVersion parser tests ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Unit tests for Generic_GCC::GCCVersion
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/Driver/ToolChains/Gnu.h"
+#include "gtest/gtest.h"
+
+using namespace clang::driver;
+using namespace clang;
+
+struct VersionParseTest {
+  std::string Text;
+
+  int Major, Minor, Patch;
+  std::string MajorStr, MinorStr, PatchSuffix;
+};
+
+const VersionParseTest TestCases[] = {
+    {"5", 5, -1, -1, "5", "", ""},
+    {"4.4", 4, 4, -1, "4", "4", ""},
+    {"4.4-patched", 4, 4, -1, "4", "4", "-patched"},
+    {"4.4.0", 4, 4, 0, "4", "4", ""},
+    {"4.4.x", 4, 4, -1, "4", "4", ""},
+    {"4.4.2-rc4", 4, 4, 2, "4", "4", "-rc4"},
+    {"4.4.x-patched", 4, 4, -1, "4", "4", ""},
+    {"not-a-version", -1, -1, -1, "", "", ""},
+};
+
+TEST(GCCVersionTest, Parse) {
+  for (const auto &TC : TestCases) {
+    auto V = toolchains::Generic_GCC::GCCVersion::Parse(TC.Text);
+    ASSERT_EQ(V.Text, TC.Text);
+    ASSERT_EQ(V.Major, TC.Major);
+    ASSERT_EQ(V.Minor, TC.Minor);
+    ASSERT_EQ(V.Patch, TC.Patch);
+    ASSERT_EQ(V.MajorStr, TC.MajorStr);
+    ASSERT_EQ(V.MinorStr, TC.MinorStr);
+    ASSERT_EQ(V.PatchSuffix, TC.PatchSuffix);
+  }
+}

>From 2c923927f2aaf58e0879fe88b573fd1bc80063a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Sat, 14 Oct 2023 00:55:18 +0300
Subject: [PATCH 2/2] [clang] [Gnu] Improve GCCVersion parsing to match
 versions such as "10-win32"

In earlier GCC versions, the Debian/Ubuntu provided mingw toolchains
were packaged in /usr/lib/gcc/<triple> with version strings such
as "5.3-win32", which were matched and found since
6afcd64eb65fca233a7b173f88cffb2c2c9c114c. However in recent versions,
they have stopped including the minor version number and only
have version strings such as "10-win32" and "10-posix".

Generalize the parsing code to tolerate the patch suffix to be
present on a version number with only a major number.

Refactor the string parsing code to highlight the overall structure
of the parsing. This implementation should yield the same result
as before, except for when there's only one segment and it has
trailing, non-number contents.

This allows Clang to find the GCC libraries and headers in
Debian/Ubuntu provided MinGW cross compilers.
---
 clang/lib/Driver/ToolChains/Gnu.cpp       | 82 +++++++++++++++--------
 clang/unittests/Driver/GCCVersionTest.cpp |  1 +
 2 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index cdd911af9a73361..e6f94836c4110a1 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2007,45 +2007,71 @@ Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) {
   std::pair<StringRef, StringRef> First = VersionText.split('.');
   std::pair<StringRef, StringRef> Second = First.second.split('.');
 
-  GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
-  if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0)
-    return BadVersion;
-  GoodVersion.MajorStr = First.first.str();
-  if (First.second.empty())
-    return GoodVersion;
+  StringRef MajorStr = First.first;
   StringRef MinorStr = Second.first;
-  if (Second.second.empty()) {
-    if (size_t EndNumber = MinorStr.find_first_not_of("0123456789")) {
-      GoodVersion.PatchSuffix = std::string(MinorStr.substr(EndNumber));
-      MinorStr = MinorStr.slice(0, EndNumber);
-    }
-  }
-  if (MinorStr.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0)
-    return BadVersion;
-  GoodVersion.MinorStr = MinorStr.str();
+  StringRef PatchStr = Second.second;
 
-  // First look for a number prefix and parse that if present. Otherwise just
-  // stash the entire patch string in the suffix, and leave the number
-  // unspecified. This covers versions strings such as:
-  //   5        (handled above)
+  GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
+
+  // Parse version number strings such as:
+  //   5
   //   4.4
   //   4.4-patched
   //   4.4.0
   //   4.4.x
   //   4.4.2-rc4
   //   4.4.x-patched
-  // And retains any patch number it finds.
-  StringRef PatchText = Second.second;
-  if (!PatchText.empty()) {
-    if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) {
-      // Try to parse the number and any suffix.
-      if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) ||
-          GoodVersion.Patch < 0)
-        return BadVersion;
-      GoodVersion.PatchSuffix = std::string(PatchText.substr(EndNumber));
+  //   10-win32
+  // Split on '.', handle 1, 2 or 3 such segments. Each segment must contain
+  // purely a number, except for the last one, where a non-number suffix
+  // is stored in PatchSuffix. The third segment is allowed to not contain
+  // a number at all.
+
+  auto HandleLastNumber = [&](StringRef Segment, int &Number,
+                              std::string &OutStr) -> bool {
+    // Look for a number prefix and parse that, and split out any trailing
+    // string into GoodVersion.PatchSuffix.
+
+    if (size_t EndNumber = Segment.find_first_not_of("0123456789")) {
+      StringRef NumberStr = Segment.slice(0, EndNumber);
+      if (NumberStr.getAsInteger(10, Number) || Number < 0)
+        return false;
+      OutStr = NumberStr;
+      GoodVersion.PatchSuffix = Segment.substr(EndNumber);
+      return true;
     }
+    return false;
+  };
+  auto HandleNumber = [](StringRef Segment, int &Number) -> bool {
+    if (Segment.getAsInteger(10, Number) || Number < 0)
+      return false;
+    return true;
+  };
+
+  if (MinorStr.empty()) {
+    // If no minor string, major is the last segment
+    if (!HandleLastNumber(MajorStr, GoodVersion.Major, GoodVersion.MajorStr))
+      return BadVersion;
+    return GoodVersion;
+  } else {
+    if (!HandleNumber(MajorStr, GoodVersion.Major))
+      return BadVersion;
+    GoodVersion.MajorStr = MajorStr;
+  }
+  if (PatchStr.empty()) {
+    // If no patch string, minor is the last segment
+    if (!HandleLastNumber(MinorStr, GoodVersion.Minor, GoodVersion.MinorStr))
+      return BadVersion;
+    return GoodVersion;
+  } else {
+    if (!HandleNumber(MinorStr, GoodVersion.Minor))
+      return BadVersion;
+    GoodVersion.MinorStr = MinorStr;
   }
 
+  // For the last segment, tolerate a missing number.
+  std::string DummyStr;
+  HandleLastNumber(PatchStr, GoodVersion.Patch, DummyStr);
   return GoodVersion;
 }
 
diff --git a/clang/unittests/Driver/GCCVersionTest.cpp b/clang/unittests/Driver/GCCVersionTest.cpp
index ef05a0b4fe734e5..91842a2ea959754 100644
--- a/clang/unittests/Driver/GCCVersionTest.cpp
+++ b/clang/unittests/Driver/GCCVersionTest.cpp
@@ -32,6 +32,7 @@ const VersionParseTest TestCases[] = {
     {"4.4.2-rc4", 4, 4, 2, "4", "4", "-rc4"},
     {"4.4.x-patched", 4, 4, -1, "4", "4", ""},
     {"not-a-version", -1, -1, -1, "", "", ""},
+    { "10-win32", 10, -1, -1, "10", "", "-win32" },
 };
 
 TEST(GCCVersionTest, Parse) {



More information about the cfe-commits mailing list