[llvm] [LLVM][Option] Refactor option name comparison (PR #108219)

Rahul Joshi via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 13 11:07:13 PDT 2024


https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/108219

>From 5b6644eabb78d0839867c5fcc890d9532e8f9ec9 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Sat, 7 Sep 2024 15:43:12 -0700
Subject: [PATCH] [LLVM][Option] Refactor option name comparison

- Create a common header for `StrCmpOptionName` function and
  use it in both TableGen Option Emitter and Options library.
  Base it on the existing version in OptTable.cpp, with an
  additional mode to control fall back to case insentitive
  comparison.
- Use zip() to iterate through lists of prefixes.
- Rename `CompareOptionRecords` to less ambiguious name
  `IsOptionRecordLess`.
- Merge 2 back-to-back ifs with same condition in
  `IsOptionRecordLess`.
---
 llvm/include/llvm/Support/OptionStrCmp.h  | 32 ++++++++++
 llvm/lib/Option/OptTable.cpp              | 52 ++++++-----------
 llvm/lib/Option/Option.cpp                |  3 +-
 llvm/lib/Support/CMakeLists.txt           |  1 +
 llvm/lib/Support/OptionStrCmp.cpp         | 43 ++++++++++++++
 llvm/utils/TableGen/Common/OptEmitter.cpp | 71 ++++++-----------------
 llvm/utils/TableGen/Common/OptEmitter.h   |  3 +-
 llvm/utils/TableGen/OptParserEmitter.cpp  |  2 +-
 llvm/utils/TableGen/OptRSTEmitter.cpp     |  2 +-
 9 files changed, 118 insertions(+), 91 deletions(-)
 create mode 100644 llvm/include/llvm/Support/OptionStrCmp.h
 create mode 100644 llvm/lib/Support/OptionStrCmp.cpp

diff --git a/llvm/include/llvm/Support/OptionStrCmp.h b/llvm/include/llvm/Support/OptionStrCmp.h
new file mode 100644
index 00000000000000..d417fe675e292d
--- /dev/null
+++ b/llvm/include/llvm/Support/OptionStrCmp.h
@@ -0,0 +1,32 @@
+//===- OptionStrCmp.h - Option String Comparison ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_OPTIONSTRCMP_H
+#define LLVM_SUPPORT_OPTIONSTRCMP_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+// Comparison function for Option strings (option names & prefixes).
+// The ordering is *almost* case-insensitive lexicographic, with an exception.
+// '\0' comes at the end of the alphabet instead of the beginning (thus options
+// precede any other options which prefix them). Additionally, if two options
+// are identical ignoring case, they are ordered according to case sensitive
+// ordering if `FallbackCaseSensitive` is true.
+int StrCmpOptionName(StringRef A, StringRef B,
+                     bool FallbackCaseSensitive = true);
+
+// Comparison function for Option prefixes.
+int StrCmpOptionPrefixes(ArrayRef<StringRef> APrefixes,
+                         ArrayRef<StringRef> BPrefixes);
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_OPTIONSTRCMP_H
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 3eceb0fbdfc47b..9fdafed39b8b6c 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/CommandLine.h" // for expandResponseFiles
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/OptionStrCmp.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -30,42 +31,26 @@
 using namespace llvm;
 using namespace llvm::opt;
 
-namespace llvm {
-namespace opt {
-
-// Ordering on Info. The ordering is *almost* case-insensitive lexicographic,
-// with an exception. '\0' comes at the end of the alphabet instead of the
-// beginning (thus options precede any other options which prefix them).
-static int StrCmpOptionNameIgnoreCase(StringRef A, StringRef B) {
-  size_t MinSize = std::min(A.size(), B.size());
-  if (int Res = A.substr(0, MinSize).compare_insensitive(B.substr(0, MinSize)))
-    return Res;
-
-  if (A.size() == B.size())
-    return 0;
-
-  return (A.size() == MinSize) ? 1 /* A is a prefix of B. */
-                               : -1 /* B is a prefix of A */;
-}
-
+namespace llvm::opt {
 #ifndef NDEBUG
-static int StrCmpOptionName(StringRef A, StringRef B) {
-  if (int N = StrCmpOptionNameIgnoreCase(A, B))
-    return N;
-  return A.compare(B);
-}
-
 static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
   if (&A == &B)
     return false;
 
-  if (int N = StrCmpOptionName(A.getName(), B.getName()))
-    return N < 0;
+  if (int Cmp = StrCmpOptionName(A.getName(), B.getName()))
+    return Cmp < 0;
 
-  for (size_t I = 0, K = std::min(A.Prefixes.size(), B.Prefixes.size()); I != K;
-       ++I)
-    if (int N = StrCmpOptionName(A.Prefixes[I], B.Prefixes[I]))
-      return N < 0;
+  // Note: we are converting ArrayRef<StringLiteral> to ArrayRef<StringRef>.
+  // In general, ArrayRef<SubClass> cannot be safely viewed as ArrayRef<Base>
+  // since sizeof(SubClass) may not be same as sizeof(Base). However in this
+  // case, sizeof(StringLiteral) is same as sizeof(StringRef), so this
+  // conversion is safe.
+  static_assert(sizeof(StringRef) == sizeof(StringLiteral));
+  ArrayRef<StringRef> APrefixes(A.Prefixes.data(), A.Prefixes.size());
+  ArrayRef<StringRef> BPrefixes(B.Prefixes.data(), B.Prefixes.size());
+
+  if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes))
+    return Cmp < 0;
 
   // Names are the same, check that classes are in order; exactly one
   // should be joined, and it should succeed the other.
@@ -77,11 +62,10 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
 
 // Support lower_bound between info and an option name.
 static inline bool operator<(const OptTable::Info &I, StringRef Name) {
-  return StrCmpOptionNameIgnoreCase(I.getName(), Name) < 0;
+  // Do not fallback to case sensitive comparison.
+  return StrCmpOptionName(I.getName(), Name, false) < 0;
 }
-
-} // end namespace opt
-} // end namespace llvm
+} // namespace llvm::opt
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 500768588bc9b3..ecb3e84b1da8bd 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -6,19 +6,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Option/Option.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
-#include "llvm/Option/Option.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <cstring>
 
 using namespace llvm;
 using namespace llvm::opt;
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index c55c17fae189f5..97188b0672f032 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -212,6 +212,7 @@ add_llvm_component_library(LLVMSupport
   NativeFormatting.cpp
   OptimizedStructLayout.cpp
   Optional.cpp
+  OptionStrCmp.cpp
   PGOOptions.cpp
   Parallel.cpp
   PluginLoader.cpp
diff --git a/llvm/lib/Support/OptionStrCmp.cpp b/llvm/lib/Support/OptionStrCmp.cpp
new file mode 100644
index 00000000000000..1ffb64c7a1584b
--- /dev/null
+++ b/llvm/lib/Support/OptionStrCmp.cpp
@@ -0,0 +1,43 @@
+//===- OptionStrCmp.cpp - Option String Comparison --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/OptionStrCmp.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+// Comparison function for Option strings (option names & prefixes).
+// The ordering is *almost* case-insensitive lexicographic, with an exception.
+// '\0' comes at the end of the alphabet instead of the beginning (thus options
+// precede any other options which prefix them). Additionally, if two options
+// are identical ignoring case, they are ordered according to case sensitive
+// ordering if `FallbackCaseSensitive` is true.
+int llvm::StrCmpOptionName(StringRef A, StringRef B,
+                           bool FallbackCaseSensitive) {
+  size_t MinSize = std::min(A.size(), B.size());
+  if (int Res = A.substr(0, MinSize).compare_insensitive(B.substr(0, MinSize)))
+    return Res;
+
+  // If they are identical ignoring case, use case sensitive ordering.
+  if (A.size() == B.size())
+    return FallbackCaseSensitive ? A.compare(B) : 0;
+
+  return (A.size() == MinSize) ? 1 /* A is a prefix of B. */
+                               : -1 /* B is a prefix of A */;
+}
+
+// Comparison function for Option prefixes.
+int llvm::StrCmpOptionPrefixes(ArrayRef<StringRef> APrefixes,
+                               ArrayRef<StringRef> BPrefixes) {
+  for (const auto &[APre, BPre] : zip(APrefixes, BPrefixes)) {
+    if (int Cmp = StrCmpOptionName(APre, BPre))
+      return Cmp;
+  }
+  // Both prefixes are identical.
+  return 0;
+}
diff --git a/llvm/utils/TableGen/Common/OptEmitter.cpp b/llvm/utils/TableGen/Common/OptEmitter.cpp
index 75e32c36d4f726..f914909ebe5e48 100644
--- a/llvm/utils/TableGen/Common/OptEmitter.cpp
+++ b/llvm/utils/TableGen/Common/OptEmitter.cpp
@@ -8,77 +8,44 @@
 
 #include "OptEmitter.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/OptionStrCmp.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include <cctype>
-#include <cstring>
-
-namespace llvm {
-
-// Ordering on Info. The logic should match with the consumer-side function in
-// llvm/Option/OptTable.h.
-// FIXME: Make this take StringRefs instead of null terminated strings to
-// simplify callers.
-static int StrCmpOptionName(const char *A, const char *B) {
-  const char *X = A, *Y = B;
-  char a = tolower(*A), b = tolower(*B);
-  while (a == b) {
-    if (a == '\0')
-      return strcmp(A, B);
-
-    a = tolower(*++X);
-    b = tolower(*++Y);
-  }
-
-  if (a == '\0') // A is a prefix of B.
-    return 1;
-  if (b == '\0') // B is a prefix of A.
-    return -1;
-
-  // Otherwise lexicographic.
-  return (a < b) ? -1 : 1;
-}
 
 // Returns true if A is ordered before B.
-bool CompareOptionRecords(const Record *A, const Record *B) {
+bool llvm::IsOptionRecordsLess(const Record *A, const Record *B) {
   if (A == B)
     return false;
+
   // Sentinel options precede all others and are only ordered by precedence.
-  bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
-  bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  const Record *AKind = A->getValueAsDef("Kind");
+  const Record *BKind = B->getValueAsDef("Kind");
+
+  bool ASent = AKind->getValueAsBit("Sentinel");
+  bool BSent = BKind->getValueAsBit("Sentinel");
   if (ASent != BSent)
     return ASent;
 
-  // Compare options by name, unless they are sentinels.
-  if (!ASent)
-    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").str().c_str(),
-                                   B->getValueAsString("Name").str().c_str()))
-      return Cmp < 0;
+  std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
+  std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
 
+  // Compare options by name, unless they are sentinels.
   if (!ASent) {
-    std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
-    std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
+    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name"),
+                                   B->getValueAsString("Name")))
+      return Cmp < 0;
 
-    for (std::vector<StringRef>::const_iterator APre = APrefixes.begin(),
-                                                AEPre = APrefixes.end(),
-                                                BPre = BPrefixes.begin(),
-                                                BEPre = BPrefixes.end();
-         APre != AEPre && BPre != BEPre; ++APre, ++BPre) {
-      if (int Cmp = StrCmpOptionName(APre->str().c_str(), BPre->str().c_str()))
-        return Cmp < 0;
-    }
+    if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes))
+      return Cmp < 0;
   }
 
   // Then by the kind precedence;
-  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
-  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
-  if (APrec == BPrec && A->getValueAsListOfStrings("Prefixes") ==
-                            B->getValueAsListOfStrings("Prefixes")) {
+  int APrec = AKind->getValueAsInt("Precedence");
+  int BPrec = BKind->getValueAsInt("Precedence");
+  if (APrec == BPrec && APrefixes == BPrefixes) {
     PrintError(A->getLoc(), Twine("Option is equivalent to"));
     PrintError(B->getLoc(), Twine("Other defined here"));
     PrintFatalError("Equivalent Options found.");
   }
   return APrec < BPrec;
 }
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/OptEmitter.h b/llvm/utils/TableGen/Common/OptEmitter.h
index 5eecd619873373..f7ffa0bcc4a975 100644
--- a/llvm/utils/TableGen/Common/OptEmitter.h
+++ b/llvm/utils/TableGen/Common/OptEmitter.h
@@ -11,7 +11,8 @@
 
 namespace llvm {
 class Record;
-bool CompareOptionRecords(const Record *A, const Record *B);
+/// Return true of Option record \p A is ordered before \p B.
+bool IsOptionRecordsLess(const Record *A, const Record *B);
 } // namespace llvm
 
 #endif // LLVM_UTILS_TABLEGEN_COMMON_OPTEMITTER_H
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index a41c684f169e9f..79cbf51514ae5c 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -255,10 +255,10 @@ static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) {
   ArrayRef<const Record *> Groups =
       Records.getAllDerivedDefinitions("OptionGroup");
   std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option");
+  llvm::sort(Opts, IsOptionRecordsLess);
 
   emitSourceFileHeader("Option Parsing Definitions", OS);
 
-  llvm::sort(Opts, CompareOptionRecords);
   // Generate prefix groups.
   typedef SmallVector<SmallString<2>, 2> PrefixKeyT;
   typedef std::map<PrefixKeyT, std::string> PrefixesT;
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 43b0f78c44d909..16125198a7c387 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -22,7 +22,7 @@ static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) {
 
   // Get the options.
   std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option");
-  llvm::sort(Opts, CompareOptionRecords);
+  llvm::sort(Opts, IsOptionRecordsLess);
 
   // Get the option groups.
   for (const Record *R : Records.getAllDerivedDefinitions("OptionGroup"))



More information about the llvm-commits mailing list