[llvm] [LLVM][Option] Refactor option name comparison (PR #108219)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 08:58:15 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/108219

>From 03960dbb6e551e0ffc701e46a6fbe68b324bff97 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Sat, 7 Sep 2024 15:43:12 -0700
Subject: [PATCH] [LLVM][Option] Refactor option name comparison

- Create a common header for `StrCmpOptionName` function and
  use it in both TableGen Option Emitter and Options library.
  Base it on the existing version in OptTable.cpp, with an
  additional mode to control fall back to case insentitive
  comparison.
- Use zip() to iterate through lists of prefixes.
- Rename `CompareOptionRecords` to less ambiguious name
  `IsOptionRecordLess`.
- Merge 2 back-to-back ifs with same condition in
  `IsOptionRecordLess`.
---
 llvm/include/llvm/Option/OptStrCmp.h      | 38 ++++++++++++++
 llvm/lib/Option/OptTable.cpp              | 40 ++++-----------
 llvm/lib/Option/Option.cpp                |  3 +-
 llvm/utils/TableGen/Common/OptEmitter.cpp | 62 ++++++-----------------
 llvm/utils/TableGen/Common/OptEmitter.h   |  3 +-
 llvm/utils/TableGen/OptParserEmitter.cpp  |  2 +-
 llvm/utils/TableGen/OptRSTEmitter.cpp     |  2 +-
 7 files changed, 68 insertions(+), 82 deletions(-)
 create mode 100644 llvm/include/llvm/Option/OptStrCmp.h

diff --git a/llvm/include/llvm/Option/OptStrCmp.h b/llvm/include/llvm/Option/OptStrCmp.h
new file mode 100644
index 00000000000000..1712f7d616d83e
--- /dev/null
+++ b/llvm/include/llvm/Option/OptStrCmp.h
@@ -0,0 +1,38 @@
+//===- OptStrCmp.h - Option String Comparison -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPTION_OPTSTRCMP_H
+#define LLVM_OPTION_OPTSTRCMP_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm::opt {
+
+// Comparison function for Option strings (option names & prefixes).
+// The ordering is *almost* case-insensitive lexicographic, with an exception.
+// '\0' comes at the end of the alphabet instead of the beginning (thus options
+// precede any other options which prefix them). Additionally, if two options
+// are identical ignoring case, they are ordered according to case sensitive
+// ordering if `FallbackCaseSensitive` is true.
+static int StrCmpOptionName(StringRef A, StringRef B,
+                            bool FallbackCaseSensitive = true) {
+  size_t MinSize = std::min(A.size(), B.size());
+  if (int Res = A.substr(0, MinSize).compare_insensitive(B.substr(0, MinSize)))
+    return Res;
+
+  // If they are identical ignoring case, use case sensitive ordering.
+  if (A.size() == B.size())
+    return FallbackCaseSensitive ? A.compare(B) : 0;
+
+  return (A.size() == MinSize) ? 1 /* A is a prefix of B. */
+                               : -1 /* B is a prefix of A */;
+}
+
+} // namespace llvm::opt
+
+#endif // LLVM_OPTION_OPTSTRCMP_H
\ No newline at end of file
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 3eceb0fbdfc47b..2aa07017cd680a 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -12,6 +12,7 @@
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptSpecifier.h"
+#include "llvm/Option/OptStrCmp.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/CommandLine.h" // for expandResponseFiles
 #include "llvm/Support/Compiler.h"
@@ -30,31 +31,10 @@
 using namespace llvm;
 using namespace llvm::opt;
 
-namespace llvm {
-namespace opt {
-
-// Ordering on Info. The ordering is *almost* case-insensitive lexicographic,
-// with an exception. '\0' comes at the end of the alphabet instead of the
-// beginning (thus options precede any other options which prefix them).
-static int StrCmpOptionNameIgnoreCase(StringRef A, StringRef B) {
-  size_t MinSize = std::min(A.size(), B.size());
-  if (int Res = A.substr(0, MinSize).compare_insensitive(B.substr(0, MinSize)))
-    return Res;
-
-  if (A.size() == B.size())
-    return 0;
-
-  return (A.size() == MinSize) ? 1 /* A is a prefix of B. */
-                               : -1 /* B is a prefix of A */;
-}
-
+namespace llvm::opt {
 #ifndef NDEBUG
-static int StrCmpOptionName(StringRef A, StringRef B) {
-  if (int N = StrCmpOptionNameIgnoreCase(A, B))
-    return N;
-  return A.compare(B);
-}
-
+// Note: This function's logic should match the `IsOptionRecordLess` used in
+// TableGen Option Emitter backend.
 static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
   if (&A == &B)
     return false;
@@ -62,9 +42,8 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
   if (int N = StrCmpOptionName(A.getName(), B.getName()))
     return N < 0;
 
-  for (size_t I = 0, K = std::min(A.Prefixes.size(), B.Prefixes.size()); I != K;
-       ++I)
-    if (int N = StrCmpOptionName(A.Prefixes[I], B.Prefixes[I]))
+  for (const auto &[APre, BPre] : zip(A.Prefixes, B.Prefixes))
+    if (int N = StrCmpOptionName(APre, BPre))
       return N < 0;
 
   // Names are the same, check that classes are in order; exactly one
@@ -77,11 +56,10 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
 
 // Support lower_bound between info and an option name.
 static inline bool operator<(const OptTable::Info &I, StringRef Name) {
-  return StrCmpOptionNameIgnoreCase(I.getName(), Name) < 0;
+  // Do not fallback to case sensitive comparison.
+  return StrCmpOptionName(I.getName(), Name, false) < 0;
 }
-
-} // end namespace opt
-} // end namespace llvm
+} // namespace llvm::opt
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 500768588bc9b3..ecb3e84b1da8bd 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -6,19 +6,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Option/Option.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
-#include "llvm/Option/Option.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <cstring>
 
 using namespace llvm;
 using namespace llvm::opt;
diff --git a/llvm/utils/TableGen/Common/OptEmitter.cpp b/llvm/utils/TableGen/Common/OptEmitter.cpp
index 75e32c36d4f726..8f101880e231fe 100644
--- a/llvm/utils/TableGen/Common/OptEmitter.cpp
+++ b/llvm/utils/TableGen/Common/OptEmitter.cpp
@@ -8,70 +8,42 @@
 
 #include "OptEmitter.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Option/OptStrCmp.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include <cctype>
-#include <cstring>
-
-namespace llvm {
-
-// Ordering on Info. The logic should match with the consumer-side function in
-// llvm/Option/OptTable.h.
-// FIXME: Make this take StringRefs instead of null terminated strings to
-// simplify callers.
-static int StrCmpOptionName(const char *A, const char *B) {
-  const char *X = A, *Y = B;
-  char a = tolower(*A), b = tolower(*B);
-  while (a == b) {
-    if (a == '\0')
-      return strcmp(A, B);
-
-    a = tolower(*++X);
-    b = tolower(*++Y);
-  }
-
-  if (a == '\0') // A is a prefix of B.
-    return 1;
-  if (b == '\0') // B is a prefix of A.
-    return -1;
-
-  // Otherwise lexicographic.
-  return (a < b) ? -1 : 1;
-}
 
 // Returns true if A is ordered before B.
-bool CompareOptionRecords(const Record *A, const Record *B) {
+// Note: Keep this in sync with `operator <` for OptTable::Info in OptTable.cpp.
+bool llvm::IsOptionRecordsLess(const Record *A, const Record *B) {
   if (A == B)
     return false;
+
   // Sentinel options precede all others and are only ordered by precedence.
-  bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
-  bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  const Record *AKind = A->getValueAsDef("Kind");
+  const Record *BKind = B->getValueAsDef("Kind");
+
+  bool ASent = AKind->getValueAsBit("Sentinel");
+  bool BSent = BKind->getValueAsBit("Sentinel");
   if (ASent != BSent)
     return ASent;
 
   // Compare options by name, unless they are sentinels.
-  if (!ASent)
-    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").str().c_str(),
-                                   B->getValueAsString("Name").str().c_str()))
+  if (!ASent) {
+    if (int Cmp = opt::StrCmpOptionName(A->getValueAsString("Name"),
+                                        B->getValueAsString("Name")))
       return Cmp < 0;
 
-  if (!ASent) {
     std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
     std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
-
-    for (std::vector<StringRef>::const_iterator APre = APrefixes.begin(),
-                                                AEPre = APrefixes.end(),
-                                                BPre = BPrefixes.begin(),
-                                                BEPre = BPrefixes.end();
-         APre != AEPre && BPre != BEPre; ++APre, ++BPre) {
-      if (int Cmp = StrCmpOptionName(APre->str().c_str(), BPre->str().c_str()))
+    for (const auto &[APre, BPre] : zip(APrefixes, BPrefixes)) {
+      if (int Cmp = opt::StrCmpOptionName(APre, BPre))
         return Cmp < 0;
     }
   }
 
   // Then by the kind precedence;
-  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
-  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  int APrec = AKind->getValueAsInt("Precedence");
+  int BPrec = BKind->getValueAsInt("Precedence");
   if (APrec == BPrec && A->getValueAsListOfStrings("Prefixes") ==
                             B->getValueAsListOfStrings("Prefixes")) {
     PrintError(A->getLoc(), Twine("Option is equivalent to"));
@@ -80,5 +52,3 @@ bool CompareOptionRecords(const Record *A, const Record *B) {
   }
   return APrec < BPrec;
 }
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/OptEmitter.h b/llvm/utils/TableGen/Common/OptEmitter.h
index 5eecd619873373..f7ffa0bcc4a975 100644
--- a/llvm/utils/TableGen/Common/OptEmitter.h
+++ b/llvm/utils/TableGen/Common/OptEmitter.h
@@ -11,7 +11,8 @@
 
 namespace llvm {
 class Record;
-bool CompareOptionRecords(const Record *A, const Record *B);
+/// Return true of Option record \p A is ordered before \p B.
+bool IsOptionRecordsLess(const Record *A, const Record *B);
 } // namespace llvm
 
 #endif // LLVM_UTILS_TABLEGEN_COMMON_OPTEMITTER_H
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index a41c684f169e9f..79cbf51514ae5c 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -255,10 +255,10 @@ static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) {
   ArrayRef<const Record *> Groups =
       Records.getAllDerivedDefinitions("OptionGroup");
   std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option");
+  llvm::sort(Opts, IsOptionRecordsLess);
 
   emitSourceFileHeader("Option Parsing Definitions", OS);
 
-  llvm::sort(Opts, CompareOptionRecords);
   // Generate prefix groups.
   typedef SmallVector<SmallString<2>, 2> PrefixKeyT;
   typedef std::map<PrefixKeyT, std::string> PrefixesT;
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 43b0f78c44d909..16125198a7c387 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -22,7 +22,7 @@ static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) {
 
   // Get the options.
   std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option");
-  llvm::sort(Opts, CompareOptionRecords);
+  llvm::sort(Opts, IsOptionRecordsLess);
 
   // Get the option groups.
   for (const Record *R : Records.getAllDerivedDefinitions("OptionGroup"))



More information about the llvm-commits mailing list