[llvm] [clang] [RISCV] Relax march string order constraint (PR #78120)

Piyou Chen via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 25 01:14:03 PST 2024


https://github.com/BeMg updated https://github.com/llvm/llvm-project/pull/78120

>From 7ad5726d93e957050723a7df7a8384bc9ff694b3 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Sun, 14 Jan 2024 19:41:59 -0800
Subject: [PATCH 1/8] [RISCV] Relax march string order constraint

---
 clang/test/Driver/riscv-arch.c              |  14 +-
 llvm/lib/Support/RISCVISAInfo.cpp           | 297 ++++++++++----------
 llvm/unittests/Support/RISCVISAInfoTest.cpp |  91 ++++--
 3 files changed, 226 insertions(+), 176 deletions(-)

diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 0ac81ea982f1b61..38de95e4fbf7aa4 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -156,9 +156,8 @@
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32L %s
 // RV32L: error: invalid arch name 'rv32l'
 
-// RUN: not %clang --target=riscv32-unknown-elf -march=rv32imadf -### %s \
-// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32IMADF %s
-// RV32IMADF: error: invalid arch name 'rv32imadf'
+// RUN: %clang --target=riscv32-unknown-elf -march=rv32imadf -### %s \
+// RUN: -fsyntax-only 2>&1 | FileCheck %s
 
 // RUN: not %clang --target=riscv32-unknown-elf -march=rv32imm -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32IMM %s
@@ -184,9 +183,8 @@
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64L %s
 // RV64L: error: invalid arch name 'rv64l'
 
-// RUN: not %clang --target=riscv64-unknown-elf -march=rv64imadf -### %s \
-// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64IMADF %s
-// RV64IMADF: error: invalid arch name 'rv64imadf'
+// RUN: %clang --target=riscv64-unknown-elf -march=rv64imadf -### %s \
+// RUN: -fsyntax-only 2>&1 | FileCheck %s
 
 // RUN: not %clang --target=riscv64-unknown-elf -march=rv64imm -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64IMM %s
@@ -216,7 +214,7 @@
 // RUN: not %clang --target=riscv32-unknown-elf -march=rv32imcq -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ORDER %s
 // RV32-ORDER: error: invalid arch name 'rv32imcq',
-// RV32-ORDER: standard user-level extension not given in canonical order 'q'
+// RV32-ORDER: unsupported standard user-level extension 'q'
 
 // RUN: not %clang --target=riscv32-unknown-elf -march=rv32izvl64b -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ZVL64B-ER %s
@@ -318,7 +316,7 @@
 // RUN: not %clang --target=riscv32-unknown-elf -march=rv32ixabc_a -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-PREFIX %s
 // RV32-PREFIX: error: invalid arch name 'rv32ixabc_a',
-// RV32-PREFIX: invalid extension prefix 'a'
+// RV32-PREFIX: unsupported non-standard user-level extension 'xabc'
 
 // RUN: not %clang --target=riscv32-unknown-elf -march=rv32ixdef_sabc -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-X-ORDER %s
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 3c02492e99f1db4..06579c20b2637a2 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -703,6 +703,106 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
   return std::move(ISAInfo);
 }
 
+static Error splitExtsByUnderscore(StringRef Exts,
+                                   std::vector<std::string> &SplitedExts) {
+  SmallVector<StringRef, 8> Split;
+  if (Exts.empty())
+    return Error::success();
+
+  Exts.split(Split, "_");
+
+  for (auto Ext : Split) {
+    if (Ext.empty())
+      return createStringError(errc::invalid_argument,
+                               "extension name missing after separator '_'");
+
+    SplitedExts.push_back(Ext.str());
+  }
+  return Error::success();
+}
+
+static Error processMultiLetterExtension(
+    StringRef RawExt, SmallVector<std::string, 8> &SeenExts,
+    SmallVector<RISCVISAInfo::ExtensionVersion, 8> &ExtsVersion,
+    bool IgnoreUnknown, bool EnableExperimentalExtension,
+    bool ExperimentalExtensionVersionCheck) {
+  StringRef Type = getExtensionType(RawExt);
+  StringRef Desc = getExtensionTypeDesc(RawExt);
+  auto Pos = findLastNonVersionCharacter(RawExt) + 1;
+  StringRef Name(RawExt.substr(0, Pos));
+  StringRef Vers(RawExt.substr(Pos));
+
+  if (Type.empty()) {
+    if (IgnoreUnknown)
+      return Error::success();
+    return createStringError(errc::invalid_argument,
+                             "invalid extension prefix '" + RawExt + "'");
+  }
+
+  if (!IgnoreUnknown && Name.size() == Type.size())
+    return createStringError(errc::invalid_argument,
+                             "%s name missing after '%s'", Desc.str().c_str(),
+                             Type.str().c_str());
+
+  unsigned Major, Minor, ConsumeLength;
+  if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
+                                   EnableExperimentalExtension,
+                                   ExperimentalExtensionVersionCheck)) {
+    if (IgnoreUnknown) {
+      consumeError(std::move(E));
+      return Error::success();
+    }
+    return E;
+  }
+
+  // Check if duplicated extension.
+  if (!IgnoreUnknown && llvm::is_contained(SeenExts, Name))
+    return createStringError(errc::invalid_argument, "duplicated %s '%s'",
+                             Desc.str().c_str(), Name.str().c_str());
+
+  if (IgnoreUnknown && !RISCVISAInfo::isSupportedExtension(Name))
+    return Error::success();
+
+  SeenExts.push_back(Name.str());
+  ExtsVersion.push_back({Major, Minor});
+  return Error::success();
+}
+
+static Error processSingleLetterExtension(
+    StringRef &RawExt, SmallVector<std::string, 8> &SeenExts,
+    SmallVector<RISCVISAInfo::ExtensionVersion, 8> &ExtsVersion,
+    bool IgnoreUnknown, bool EnableExperimentalExtension,
+    bool ExperimentalExtensionVersionCheck) {
+  unsigned Major, Minor, ConsumeLength;
+  StringRef Name = RawExt.take_front(1);
+  RawExt.consume_front(Name);
+  if (auto E = getExtensionVersion(Name, RawExt, Major, Minor, ConsumeLength,
+                                   EnableExperimentalExtension,
+                                   ExperimentalExtensionVersionCheck)) {
+    if (IgnoreUnknown) {
+      consumeError(std::move(E));
+      RawExt = RawExt.substr(ConsumeLength);
+      return Error::success();
+    }
+    return E;
+  }
+
+  RawExt = RawExt.substr(ConsumeLength);
+
+  // Check if duplicated extension.
+  if (!IgnoreUnknown && llvm::is_contained(SeenExts, Name))
+    return createStringError(errc::invalid_argument,
+                             "duplicated standard user-level extension '%s'",
+                             Name.str().c_str());
+
+  if (IgnoreUnknown && !RISCVISAInfo::isSupportedExtension(Name))
+    return Error::success();
+
+  SeenExts.push_back(Name.str());
+  ExtsVersion.push_back({Major, Minor});
+  return Error::success();
+}
+
 llvm::Expected<std::unique_ptr<RISCVISAInfo>>
 RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
                               bool ExperimentalExtensionVersionCheck,
@@ -723,6 +823,8 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
 
   unsigned XLen = HasRV64 ? 64 : 32;
   std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+  SmallVector<std::string, 8> SeenExts;
+  SmallVector<RISCVISAInfo::ExtensionVersion, 8> ExtsVersion;
 
   // The canonical order specified in ISA manual.
   // Ref: Table 22.1 in RISC-V User-Level ISA V2.2
@@ -753,17 +855,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   // Skip rvxxx
   StringRef Exts = Arch.substr(5);
 
-  // Remove multi-letter standard extensions, non-standard extensions and
-  // supervisor-level extensions. They have 'z', 'x', 's' prefixes.
-  // Parse them at the end.
-  // Find the very first occurrence of 's', 'x' or 'z'.
-  StringRef OtherExts;
-  size_t Pos = Exts.find_first_of("zsx");
-  if (Pos != StringRef::npos) {
-    OtherExts = Exts.substr(Pos);
-    Exts = Exts.substr(0, Pos);
-  }
-
   unsigned Major, Minor, ConsumeLength;
   if (Baseline == 'g') {
     // Versions for g are disallowed, and this was checked for previously.
@@ -773,9 +864,11 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     // version since the we don't have clear version scheme for that on
     // ISA spec.
     for (const auto *Ext : RISCVGImplications) {
-      if (auto Version = findDefaultVersion(Ext))
-        ISAInfo->addExtension(Ext, *Version);
-      else
+      if (auto Version = findDefaultVersion(Ext)) {
+        // Postpone AddExtension until end of this function
+        SeenExts.push_back(Ext);
+        ExtsVersion.push_back({Version->Major, Version->Minor});
+      } else
         llvm_unreachable("Default extension version not found?");
     }
   } else {
@@ -793,7 +886,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
       Minor = Version->Minor;
     }
 
-    ISAInfo->addExtension(StringRef(&Baseline, 1), {Major, Minor});
+    // Postpone AddExtension until end of this function
+    SeenExts.push_back(StringRef(&Baseline, 1).str());
+    ExtsVersion.push_back({Major, Minor});
   }
 
   // Consume the base ISA version number and any '_' between rvxxx and the
@@ -801,145 +896,51 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   Exts = Exts.drop_front(ConsumeLength);
   Exts.consume_front("_");
 
-  auto StdExtsItr = StdExts.begin();
-  auto StdExtsEnd = StdExts.end();
-  auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength,
-                        StringRef::iterator E) {
-    I += 1 + ConsumeLength;
-    if (I != E && *I == '_')
-      ++I;
-  };
-  for (auto I = Exts.begin(), E = Exts.end(); I != E;) {
-    char C = *I;
-
-    // Check ISA extensions are specified in the canonical order.
-    while (StdExtsItr != StdExtsEnd && *StdExtsItr != C)
-      ++StdExtsItr;
-
-    if (StdExtsItr == StdExtsEnd) {
-      // Either c contains a valid extension but it was not given in
-      // canonical order or it is an invalid extension.
-      if (StdExts.contains(C)) {
-        return createStringError(
-            errc::invalid_argument,
-            "standard user-level extension not given in canonical order '%c'",
-            C);
-      }
-
-      return createStringError(errc::invalid_argument,
-                               "invalid standard user-level extension '%c'", C);
-    }
-
-    // Move to next char to prevent repeated letter.
-    ++StdExtsItr;
-
-    StringRef Next;
-    unsigned Major, Minor, ConsumeLength;
-    if (std::next(I) != E)
-      Next = StringRef(std::next(I), E - std::next(I));
-    if (auto E = getExtensionVersion(StringRef(&C, 1), Next, Major, Minor,
-                                     ConsumeLength, EnableExperimentalExtension,
-                                     ExperimentalExtensionVersionCheck)) {
-      if (IgnoreUnknown) {
-        consumeError(std::move(E));
-        GoToNextExt(I, ConsumeLength, Exts.end());
-        continue;
-      }
-      return std::move(E);
-    }
-
-    // The order is OK, then push it into features.
-    // Currently LLVM supports only "mafdcvh".
-    if (!isSupportedExtension(StringRef(&C, 1))) {
-      if (IgnoreUnknown) {
-        GoToNextExt(I, ConsumeLength, Exts.end());
-        continue;
-      }
-      return createStringError(errc::invalid_argument,
-                               "unsupported standard user-level extension '%c'",
-                               C);
-    }
-    ISAInfo->addExtension(StringRef(&C, 1), {Major, Minor});
-
-    // Consume full extension name and version, including any optional '_'
-    // between this extension and the next
-    GoToNextExt(I, ConsumeLength, Exts.end());
-  }
-
-  // Handle other types of extensions other than the standard
-  // general purpose and standard user-level extensions.
-  // Parse the ISA string containing non-standard user-level
-  // extensions, standard supervisor-level extensions and
-  // non-standard supervisor-level extensions.
-  // These extensions start with 'z', 's', 'x' prefixes, might have a version
-  // number (major, minor) and are separated by a single underscore '_'. We do
-  // not enforce a canonical order for them.
-  // Set the hardware features for the extensions that are supported.
-
-  // Multi-letter extensions are seperated by a single underscore
-  // as described in RISC-V User-Level ISA V2.2.
-  SmallVector<StringRef, 8> Split;
-  OtherExts.split(Split, '_');
-
-  SmallVector<StringRef, 8> AllExts;
-  if (Split.size() > 1 || Split[0] != "") {
-    for (StringRef Ext : Split) {
-      if (Ext.empty())
-        return createStringError(errc::invalid_argument,
-                                 "extension name missing after separator '_'");
-
-      StringRef Type = getExtensionType(Ext);
-      StringRef Desc = getExtensionTypeDesc(Ext);
-      auto Pos = findLastNonVersionCharacter(Ext) + 1;
-      StringRef Name(Ext.substr(0, Pos));
-      StringRef Vers(Ext.substr(Pos));
-
-      if (Type.empty()) {
-        if (IgnoreUnknown)
-          continue;
-        return createStringError(errc::invalid_argument,
-                                 "invalid extension prefix '" + Ext + "'");
-      }
-
-      if (!IgnoreUnknown && Name.size() == Type.size()) {
+  std::vector<std::string> SplitedExts;
+  if (auto E = splitExtsByUnderscore(Exts, SplitedExts))
+    return std::move(E);
+
+  for (auto Ext : SplitedExts) {
+    StringRef CurrExt = Ext;
+    while (!CurrExt.empty()) {
+      if (AllStdExts.contains(CurrExt.front())) {
+        if (auto E = processSingleLetterExtension(
+                CurrExt, SeenExts, ExtsVersion, IgnoreUnknown,
+                EnableExperimentalExtension, ExperimentalExtensionVersionCheck))
+          return E;
+      } else if (CurrExt.front() == 'z' || CurrExt.front() == 's' ||
+                 CurrExt.front() == 'x') {
+        if (auto E = processMultiLetterExtension(
+                CurrExt, SeenExts, ExtsVersion, IgnoreUnknown,
+                EnableExperimentalExtension, ExperimentalExtensionVersionCheck))
+          return E;
+        // Multi-letter extension must be seperate following extension with
+        // underscore
+        break;
+      } else {
+        // FIXME: Could it be ignored by IgnoreUnknown?
         return createStringError(errc::invalid_argument,
-                                 "%s name missing after '%s'",
-                                 Desc.str().c_str(), Type.str().c_str());
-      }
-
-      unsigned Major, Minor, ConsumeLength;
-      if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
-                                       EnableExperimentalExtension,
-                                       ExperimentalExtensionVersionCheck)) {
-        if (IgnoreUnknown) {
-          consumeError(std::move(E));
-          continue;
-        }
-        return std::move(E);
-      }
-
-      // Check if duplicated extension.
-      if (!IgnoreUnknown && llvm::is_contained(AllExts, Name)) {
-        return createStringError(errc::invalid_argument, "duplicated %s '%s'",
-                                 Desc.str().c_str(), Name.str().c_str());
+                                 "invalid standard user-level extension '%c'",
+                                 CurrExt.front());
       }
-
-      if (IgnoreUnknown && !isSupportedExtension(Name))
-        continue;
-
-      ISAInfo->addExtension(Name, {Major, Minor});
-      // Extension format is correct, keep parsing the extensions.
-      // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
-      AllExts.push_back(Name);
     }
   }
 
-  for (auto Ext : AllExts) {
-    if (!isSupportedExtension(Ext)) {
-      StringRef Desc = getExtensionTypeDesc(getExtensionType(Ext));
-      return createStringError(errc::invalid_argument, "unsupported %s '%s'",
-                               Desc.str().c_str(), Ext.str().c_str());
+  // Check all Extensions are supported.
+  for (size_t Idx = 0; Idx < SeenExts.size(); Idx++) {
+    if (!RISCVISAInfo::isSupportedExtension(SeenExts[Idx])) {
+      if (SeenExts[Idx].size() == 1) {
+        return createStringError(
+            errc::invalid_argument,
+            "unsupported standard user-level extension '%s'",
+            SeenExts[Idx].c_str());
+      }
+      return createStringError(
+          errc::invalid_argument, "unsupported %s '%s'",
+          getExtensionTypeDesc(SeenExts[Idx]).str().c_str(),
+          SeenExts[Idx].c_str());
     }
+    ISAInfo->addExtension(SeenExts[Idx], ExtsVersion[Idx]);
   }
 
   return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp
index 9b7112fa2bfeb59..612dd5c3fea6fa6 100644
--- a/llvm/unittests/Support/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp
@@ -203,24 +203,6 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) {
   EXPECT_EQ(InfoRV64G.getFLen(), 64U);
 }
 
-TEST(ParseArchString, RequiresCanonicalOrderForSingleLetterExtensions) {
-  EXPECT_EQ(
-      toString(RISCVISAInfo::parseArchString("rv64idf", true).takeError()),
-      "standard user-level extension not given in canonical order 'f'");
-  EXPECT_EQ(
-      toString(RISCVISAInfo::parseArchString("rv32iam", true).takeError()),
-      "standard user-level extension not given in canonical order 'm'");
-  EXPECT_EQ(
-      toString(
-          RISCVISAInfo::parseArchString("rv32i_zfinx_a", true).takeError()),
-      "invalid extension prefix 'a'");
-  // Canonical ordering not required for z*, s*, and x* extensions.
-  EXPECT_THAT_EXPECTED(
-      RISCVISAInfo::parseArchString(
-          "rv64imafdc_xsfvcp_zicsr_xtheadba_svnapot_zawrs", true),
-      Succeeded());
-}
-
 TEST(ParseArchString, RejectsUnrecognizedExtensionNamesByDefault) {
   EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64ib", true).takeError()),
             "unsupported standard user-level extension 'b'");
@@ -337,10 +319,79 @@ TEST(ParseArchString, AcceptsUnderscoreSplittingExtensions) {
   }
 }
 
+TEST(ParseArchString, AcceptsRelaxSingleLetterExtensions) {
+  for (StringRef Input :
+       {"rv32imfad", "rv32im_fa_d", "rv32im2p0fad", "rv32i2p1m2p0fad"}) {
+    auto MaybeISAInfo = RISCVISAInfo::parseArchString(Input, true);
+    ASSERT_THAT_EXPECTED(MaybeISAInfo, Succeeded());
+    RISCVISAInfo::OrderedExtensionMap Exts = (*MaybeISAInfo)->getExtensions();
+    EXPECT_EQ(Exts.size(), 6UL);
+    EXPECT_EQ(Exts.count("i"), 1U);
+    EXPECT_EQ(Exts.count("m"), 1U);
+    EXPECT_EQ(Exts.count("f"), 1U);
+    EXPECT_EQ(Exts.count("a"), 1U);
+    EXPECT_EQ(Exts.count("d"), 1U);
+    EXPECT_EQ(Exts.count("zicsr"), 1U);
+  }
+}
+
+TEST(ParseArchString, AcceptsRelaxMixedLetterExtensions) {
+  for (StringRef Input :
+       {"rv32i_zihintntl_m_a_f_d_svinval", "rv32izihintntl_mafdsvinval",
+        "rv32i_zihintntl_mafd_svinval"}) {
+    auto MaybeISAInfo = RISCVISAInfo::parseArchString(Input, true);
+    ASSERT_THAT_EXPECTED(MaybeISAInfo, Succeeded());
+    RISCVISAInfo::OrderedExtensionMap Exts = (*MaybeISAInfo)->getExtensions();
+    EXPECT_EQ(Exts.size(), 8UL);
+    EXPECT_EQ(Exts.count("i"), 1U);
+    EXPECT_EQ(Exts.count("m"), 1U);
+    EXPECT_EQ(Exts.count("a"), 1U);
+    EXPECT_EQ(Exts.count("f"), 1U);
+    EXPECT_EQ(Exts.count("d"), 1U);
+    EXPECT_EQ(Exts.count("zihintntl"), 1U);
+    EXPECT_EQ(Exts.count("svinval"), 1U);
+    EXPECT_EQ(Exts.count("zicsr"), 1U);
+  }
+}
+
+TEST(ParseArchString, AcceptsAmbiguousFromRelaxExtensions) {
+  for (StringRef Input : {"rv32i_zba_m", "rv32izba_m", "rv32izba1p0_m2p0"}) {
+    auto MaybeISAInfo = RISCVISAInfo::parseArchString(Input, true);
+    ASSERT_THAT_EXPECTED(MaybeISAInfo, Succeeded());
+    RISCVISAInfo::OrderedExtensionMap Exts = (*MaybeISAInfo)->getExtensions();
+    EXPECT_EQ(Exts.size(), 3UL);
+    EXPECT_EQ(Exts.count("i"), 1U);
+    EXPECT_EQ(Exts.count("zba"), 1U);
+    EXPECT_EQ(Exts.count("m"), 1U);
+  }
+  for (StringRef Input :
+       {"rv32ia_zba_m", "rv32iazba_m", "rv32ia2p1zba1p0_m2p0"}) {
+    auto MaybeISAInfo = RISCVISAInfo::parseArchString(Input, true);
+    ASSERT_THAT_EXPECTED(MaybeISAInfo, Succeeded());
+    RISCVISAInfo::OrderedExtensionMap Exts = (*MaybeISAInfo)->getExtensions();
+    EXPECT_EQ(Exts.size(), 4UL);
+    EXPECT_EQ(Exts.count("i"), 1U);
+    EXPECT_EQ(Exts.count("zba"), 1U);
+    EXPECT_EQ(Exts.count("m"), 1U);
+    EXPECT_EQ(Exts.count("a"), 1U);
+  }
+}
+
+TEST(ParseArchString,
+     RejectsMultiLetterExtensionFollowBySingleLetterExtensions) {
+  for (StringRef Input : {"rv32izbam", "rv32i_zbam"})
+    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
+              "unsupported standard user-level extension 'zbam'");
+  EXPECT_EQ(
+      toString(
+          RISCVISAInfo::parseArchString("rv32i_zba1p0m", true).takeError()),
+      "unsupported standard user-level extension 'zba1p0m'");
+}
+
 TEST(ParseArchString, RejectsDoubleOrTrailingUnderscore) {
   EXPECT_EQ(
       toString(RISCVISAInfo::parseArchString("rv64i__m", true).takeError()),
-      "invalid standard user-level extension '_'");
+      "extension name missing after separator '_'");
 
   for (StringRef Input :
        {"rv32ezicsr__zifencei", "rv32i_", "rv32izicsr_", "rv64im_"}) {
@@ -356,7 +407,7 @@ TEST(ParseArchString, RejectsDuplicateExtensionNames) {
             "invalid standard user-level extension 'e'");
   EXPECT_EQ(
       toString(RISCVISAInfo::parseArchString("rv64imm", true).takeError()),
-      "standard user-level extension not given in canonical order 'm'");
+      "duplicated standard user-level extension 'm'");
   EXPECT_EQ(
       toString(
           RISCVISAInfo::parseArchString("rv32i_zicsr_zicsr", true).takeError()),

>From 6b80582aeac3762d2bd8dfb96751aa516eae988d Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Sun, 21 Jan 2024 03:42:03 -0800
Subject: [PATCH 2/8] Indent continuation lines by 2 spaces

---
 clang/test/Driver/riscv-arch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 38de95e4fbf7aa4..c9e984e07cbea98 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -157,7 +157,7 @@
 // RV32L: error: invalid arch name 'rv32l'
 
 // RUN: %clang --target=riscv32-unknown-elf -march=rv32imadf -### %s \
-// RUN: -fsyntax-only 2>&1 | FileCheck %s
+// RUN:   -fsyntax-only 2>&1 | FileCheck %s
 
 // RUN: not %clang --target=riscv32-unknown-elf -march=rv32imm -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32IMM %s
@@ -184,7 +184,7 @@
 // RV64L: error: invalid arch name 'rv64l'
 
 // RUN: %clang --target=riscv64-unknown-elf -march=rv64imadf -### %s \
-// RUN: -fsyntax-only 2>&1 | FileCheck %s
+// RUN:   -fsyntax-only 2>&1 | FileCheck %s
 
 // RUN: not %clang --target=riscv64-unknown-elf -march=rv64imm -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64IMM %s

>From 9abe967ddbfb90c8b69bf1dcd45fcfe8efef8d05 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Sun, 21 Jan 2024 03:57:25 -0800
Subject: [PATCH 3/8] Keepd comment about extension start with 'z' 's' 'x'

---
 llvm/lib/Support/RISCVISAInfo.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 06579c20b2637a2..350d508ee781f30 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -910,6 +910,14 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
           return E;
       } else if (CurrExt.front() == 'z' || CurrExt.front() == 's' ||
                  CurrExt.front() == 'x') {
+        // Handle other types of extensions other than the standard
+        // general purpose and standard user-level extensions.
+        // Parse the ISA string containing non-standard user-level
+        // extensions, standard supervisor-level extensions and
+        // non-standard supervisor-level extensions.
+        // These extensions start with 'z', 's', 'x' prefixes, might have a
+        // version number (major, minor) and are separated by a single
+        // underscore '_'. We do not enforce a canonical order for them.
         if (auto E = processMultiLetterExtension(
                 CurrExt, SeenExts, ExtsVersion, IgnoreUnknown,
                 EnableExperimentalExtension, ExperimentalExtensionVersionCheck))

>From cf5f677e7538478a7c72678f20838c6a31682fb9 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Sun, 21 Jan 2024 04:48:07 -0800
Subject: [PATCH 4/8] Add testcase for i after zba

---
 llvm/unittests/Support/RISCVISAInfoTest.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp
index 612dd5c3fea6fa6..396c8ace31e15c5 100644
--- a/llvm/unittests/Support/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp
@@ -377,11 +377,23 @@ TEST(ParseArchString, AcceptsAmbiguousFromRelaxExtensions) {
   }
 }
 
+TEST(ParseArchString, RejectsRelaxExtensionsNotStartWithEorIorG) {
+  EXPECT_EQ(
+      toString(RISCVISAInfo::parseArchString("rv32zba_im", true).takeError()),
+      "first letter should be 'e', 'i' or 'g'");
+}
+
 TEST(ParseArchString,
      RejectsMultiLetterExtensionFollowBySingleLetterExtensions) {
   for (StringRef Input : {"rv32izbam", "rv32i_zbam"})
     EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
               "unsupported standard user-level extension 'zbam'");
+  EXPECT_EQ(
+      toString(RISCVISAInfo::parseArchString("rv32izbai_m", true).takeError()),
+      "unsupported standard user-level extension 'zbai'");
+  EXPECT_EQ(
+      toString(RISCVISAInfo::parseArchString("rv32izbaim", true).takeError()),
+      "unsupported standard user-level extension 'zbaim'");
   EXPECT_EQ(
       toString(
           RISCVISAInfo::parseArchString("rv32i_zba1p0m", true).takeError()),

>From bd22631d913ffe561fe90a20917afe96ef5c8be8 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Tue, 23 Jan 2024 02:47:37 -0800
Subject: [PATCH 5/8] Rename the SplitedExts with SplitExts

---
 llvm/lib/Support/RISCVISAInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 350d508ee781f30..fb7894368f4882b 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -704,7 +704,7 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
 }
 
 static Error splitExtsByUnderscore(StringRef Exts,
-                                   std::vector<std::string> &SplitedExts) {
+                                   std::vector<std::string> &SplitExts) {
   SmallVector<StringRef, 8> Split;
   if (Exts.empty())
     return Error::success();
@@ -716,7 +716,7 @@ static Error splitExtsByUnderscore(StringRef Exts,
       return createStringError(errc::invalid_argument,
                                "extension name missing after separator '_'");
 
-    SplitedExts.push_back(Ext.str());
+    SplitExts.push_back(Ext.str());
   }
   return Error::success();
 }

>From 357a77d723a6d5d125be6238ff6048a25070b0fe Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Tue, 23 Jan 2024 04:12:30 -0800
Subject: [PATCH 6/8] Use MapVector instead of SmallVector

---
 llvm/lib/Support/RISCVISAInfo.cpp | 61 ++++++++++++++++---------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index fb7894368f4882b..fdab13f2aa80794 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -722,8 +723,9 @@ static Error splitExtsByUnderscore(StringRef Exts,
 }
 
 static Error processMultiLetterExtension(
-    StringRef RawExt, SmallVector<std::string, 8> &SeenExts,
-    SmallVector<RISCVISAInfo::ExtensionVersion, 8> &ExtsVersion,
+    StringRef RawExt,
+    MapVector<std::string, RISCVISAInfo::ExtensionVersion,
+              std::map<std::string, unsigned>> &SeenExtMap,
     bool IgnoreUnknown, bool EnableExperimentalExtension,
     bool ExperimentalExtensionVersionCheck) {
   StringRef Type = getExtensionType(RawExt);
@@ -756,21 +758,21 @@ static Error processMultiLetterExtension(
   }
 
   // Check if duplicated extension.
-  if (!IgnoreUnknown && llvm::is_contained(SeenExts, Name))
+  if (!IgnoreUnknown && (SeenExtMap.find(Name.str()) != SeenExtMap.end()))
     return createStringError(errc::invalid_argument, "duplicated %s '%s'",
                              Desc.str().c_str(), Name.str().c_str());
 
   if (IgnoreUnknown && !RISCVISAInfo::isSupportedExtension(Name))
     return Error::success();
 
-  SeenExts.push_back(Name.str());
-  ExtsVersion.push_back({Major, Minor});
+  SeenExtMap[Name.str()] = {Major, Minor};
   return Error::success();
 }
 
 static Error processSingleLetterExtension(
-    StringRef &RawExt, SmallVector<std::string, 8> &SeenExts,
-    SmallVector<RISCVISAInfo::ExtensionVersion, 8> &ExtsVersion,
+    StringRef &RawExt,
+    MapVector<std::string, RISCVISAInfo::ExtensionVersion,
+              std::map<std::string, unsigned>> &SeenExtMap,
     bool IgnoreUnknown, bool EnableExperimentalExtension,
     bool ExperimentalExtensionVersionCheck) {
   unsigned Major, Minor, ConsumeLength;
@@ -790,7 +792,7 @@ static Error processSingleLetterExtension(
   RawExt = RawExt.substr(ConsumeLength);
 
   // Check if duplicated extension.
-  if (!IgnoreUnknown && llvm::is_contained(SeenExts, Name))
+  if (!IgnoreUnknown && (SeenExtMap.find(Name.str()) != SeenExtMap.end()))
     return createStringError(errc::invalid_argument,
                              "duplicated standard user-level extension '%s'",
                              Name.str().c_str());
@@ -798,8 +800,7 @@ static Error processSingleLetterExtension(
   if (IgnoreUnknown && !RISCVISAInfo::isSupportedExtension(Name))
     return Error::success();
 
-  SeenExts.push_back(Name.str());
-  ExtsVersion.push_back({Major, Minor});
+  SeenExtMap[Name.str()] = {Major, Minor};
   return Error::success();
 }
 
@@ -823,8 +824,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
 
   unsigned XLen = HasRV64 ? 64 : 32;
   std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
-  SmallVector<std::string, 8> SeenExts;
-  SmallVector<RISCVISAInfo::ExtensionVersion, 8> ExtsVersion;
+  MapVector<std::string, RISCVISAInfo::ExtensionVersion,
+            std::map<std::string, unsigned>>
+      SeenExtMap;
 
   // The canonical order specified in ISA manual.
   // Ref: Table 22.1 in RISC-V User-Level ISA V2.2
@@ -866,8 +868,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     for (const auto *Ext : RISCVGImplications) {
       if (auto Version = findDefaultVersion(Ext)) {
         // Postpone AddExtension until end of this function
-        SeenExts.push_back(Ext);
-        ExtsVersion.push_back({Version->Major, Version->Minor});
+        SeenExtMap[Ext] = {Version->Major, Version->Minor};
       } else
         llvm_unreachable("Default extension version not found?");
     }
@@ -887,8 +888,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     }
 
     // Postpone AddExtension until end of this function
-    SeenExts.push_back(StringRef(&Baseline, 1).str());
-    ExtsVersion.push_back({Major, Minor});
+    SeenExtMap[StringRef(&Baseline, 1).str()] = {Major, Minor};
   }
 
   // Consume the base ISA version number and any '_' between rvxxx and the
@@ -905,8 +905,8 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     while (!CurrExt.empty()) {
       if (AllStdExts.contains(CurrExt.front())) {
         if (auto E = processSingleLetterExtension(
-                CurrExt, SeenExts, ExtsVersion, IgnoreUnknown,
-                EnableExperimentalExtension, ExperimentalExtensionVersionCheck))
+                CurrExt, SeenExtMap, IgnoreUnknown, EnableExperimentalExtension,
+                ExperimentalExtensionVersionCheck))
           return E;
       } else if (CurrExt.front() == 'z' || CurrExt.front() == 's' ||
                  CurrExt.front() == 'x') {
@@ -919,8 +919,8 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
         // version number (major, minor) and are separated by a single
         // underscore '_'. We do not enforce a canonical order for them.
         if (auto E = processMultiLetterExtension(
-                CurrExt, SeenExts, ExtsVersion, IgnoreUnknown,
-                EnableExperimentalExtension, ExperimentalExtensionVersionCheck))
+                CurrExt, SeenExtMap, IgnoreUnknown, EnableExperimentalExtension,
+                ExperimentalExtensionVersionCheck))
           return E;
         // Multi-letter extension must be seperate following extension with
         // underscore
@@ -935,20 +935,21 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   }
 
   // Check all Extensions are supported.
-  for (size_t Idx = 0; Idx < SeenExts.size(); Idx++) {
-    if (!RISCVISAInfo::isSupportedExtension(SeenExts[Idx])) {
-      if (SeenExts[Idx].size() == 1) {
+  for (auto SeenExtAndVers : SeenExtMap) {
+    std::string ExtName = SeenExtAndVers.first;
+    RISCVISAInfo::ExtensionVersion ExtVers = SeenExtAndVers.second;
+
+    if (!RISCVISAInfo::isSupportedExtension(ExtName)) {
+      if (ExtName.size() == 1) {
         return createStringError(
             errc::invalid_argument,
-            "unsupported standard user-level extension '%s'",
-            SeenExts[Idx].c_str());
+            "unsupported standard user-level extension '%s'", ExtName.c_str());
       }
-      return createStringError(
-          errc::invalid_argument, "unsupported %s '%s'",
-          getExtensionTypeDesc(SeenExts[Idx]).str().c_str(),
-          SeenExts[Idx].c_str());
+      return createStringError(errc::invalid_argument, "unsupported %s '%s'",
+                               getExtensionTypeDesc(ExtName).str().c_str(),
+                               ExtName.c_str());
     }
-    ISAInfo->addExtension(SeenExts[Idx], ExtsVersion[Idx]);
+    ISAInfo->addExtension(ExtName, ExtVers);
   }
 
   return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));

>From e57522fafda762cbf04226139446e4bc59ea2172 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Tue, 23 Jan 2024 17:52:32 -0800
Subject: [PATCH 7/8] Use contains instead of find

---
 llvm/lib/Support/RISCVISAInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index fdab13f2aa80794..4753abc1009270d 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -758,7 +758,7 @@ static Error processMultiLetterExtension(
   }
 
   // Check if duplicated extension.
-  if (!IgnoreUnknown && (SeenExtMap.find(Name.str()) != SeenExtMap.end()))
+  if (!IgnoreUnknown && SeenExtMap.contains(Name.str()))
     return createStringError(errc::invalid_argument, "duplicated %s '%s'",
                              Desc.str().c_str(), Name.str().c_str());
 
@@ -792,7 +792,7 @@ static Error processSingleLetterExtension(
   RawExt = RawExt.substr(ConsumeLength);
 
   // Check if duplicated extension.
-  if (!IgnoreUnknown && (SeenExtMap.find(Name.str()) != SeenExtMap.end()))
+  if (!IgnoreUnknown && SeenExtMap.contains(Name.str()))
     return createStringError(errc::invalid_argument,
                              "duplicated standard user-level extension '%s'",
                              Name.str().c_str());

>From 06ac457b8d9d986107b9b876703073688178c2ad Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Thu, 25 Jan 2024 01:12:34 -0800
Subject: [PATCH 8/8] Use reference type

---
 llvm/lib/Support/RISCVISAInfo.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 4753abc1009270d..6360ae1280ad25d 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -900,7 +900,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   if (auto E = splitExtsByUnderscore(Exts, SplitedExts))
     return std::move(E);
 
-  for (auto Ext : SplitedExts) {
+  for (auto &Ext : SplitedExts) {
     StringRef CurrExt = Ext;
     while (!CurrExt.empty()) {
       if (AllStdExts.contains(CurrExt.front())) {
@@ -935,8 +935,8 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   }
 
   // Check all Extensions are supported.
-  for (auto SeenExtAndVers : SeenExtMap) {
-    std::string ExtName = SeenExtAndVers.first;
+  for (auto &SeenExtAndVers : SeenExtMap) {
+    const std::string &ExtName = SeenExtAndVers.first;
     RISCVISAInfo::ExtensionVersion ExtVers = SeenExtAndVers.second;
 
     if (!RISCVISAInfo::isSupportedExtension(ExtName)) {



More information about the cfe-commits mailing list