[llvm] [LLVM][Option] Refactor option name comparison (PR #108219)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 13 11:07:49 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-support

Author: Rahul Joshi (jurahul)

<details>
<summary>Changes</summary>

- Create a common header for functions shared by TableGen Option Emitter and Options library.
- Move `StrCmpOptionName` to this header, and ase it on the existing version in OptTable.cpp,
  with an additional mode to control fall back to case insensitive comparison.
- Add `StrCmpOptionPrefixes` to compare prefixes and use zp() to iterate through lists of prefixes.
- Rename `CompareOptionRecords` to less ambiguious name `IsOptionRecordLess`.
- Merge 2 back-to-back ifs with same condition in `IsOptionRecordLess`.

Fixes https://github.com/llvm/llvm-project/issues/107723

---
Full diff: https://github.com/llvm/llvm-project/pull/108219.diff


9 Files Affected:

- (added) llvm/include/llvm/Support/OptionStrCmp.h (+32) 
- (modified) llvm/lib/Option/OptTable.cpp (+18-34) 
- (modified) llvm/lib/Option/Option.cpp (+1-2) 
- (modified) llvm/lib/Support/CMakeLists.txt (+1) 
- (added) llvm/lib/Support/OptionStrCmp.cpp (+43) 
- (modified) llvm/utils/TableGen/Common/OptEmitter.cpp (+19-52) 
- (modified) llvm/utils/TableGen/Common/OptEmitter.h (+2-1) 
- (modified) llvm/utils/TableGen/OptParserEmitter.cpp (+1-1) 
- (modified) llvm/utils/TableGen/OptRSTEmitter.cpp (+1-1) 


``````````diff
diff --git a/llvm/include/llvm/Support/OptionStrCmp.h b/llvm/include/llvm/Support/OptionStrCmp.h
new file mode 100644
index 00000000000000..d417fe675e292d
--- /dev/null
+++ b/llvm/include/llvm/Support/OptionStrCmp.h
@@ -0,0 +1,32 @@
+//===- OptionStrCmp.h - Option String Comparison ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_OPTIONSTRCMP_H
+#define LLVM_SUPPORT_OPTIONSTRCMP_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+// Comparison function for Option strings (option names & prefixes).
+// The ordering is *almost* case-insensitive lexicographic, with an exception.
+// '\0' comes at the end of the alphabet instead of the beginning (thus options
+// precede any other options which prefix them). Additionally, if two options
+// are identical ignoring case, they are ordered according to case sensitive
+// ordering if `FallbackCaseSensitive` is true.
+int StrCmpOptionName(StringRef A, StringRef B,
+                     bool FallbackCaseSensitive = true);
+
+// Comparison function for Option prefixes.
+int StrCmpOptionPrefixes(ArrayRef<StringRef> APrefixes,
+                         ArrayRef<StringRef> BPrefixes);
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_OPTIONSTRCMP_H
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 3eceb0fbdfc47b..9fdafed39b8b6c 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/CommandLine.h" // for expandResponseFiles
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/OptionStrCmp.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -30,42 +31,26 @@
 using namespace llvm;
 using namespace llvm::opt;
 
-namespace llvm {
-namespace opt {
-
-// Ordering on Info. The ordering is *almost* case-insensitive lexicographic,
-// with an exception. '\0' comes at the end of the alphabet instead of the
-// beginning (thus options precede any other options which prefix them).
-static int StrCmpOptionNameIgnoreCase(StringRef A, StringRef B) {
-  size_t MinSize = std::min(A.size(), B.size());
-  if (int Res = A.substr(0, MinSize).compare_insensitive(B.substr(0, MinSize)))
-    return Res;
-
-  if (A.size() == B.size())
-    return 0;
-
-  return (A.size() == MinSize) ? 1 /* A is a prefix of B. */
-                               : -1 /* B is a prefix of A */;
-}
-
+namespace llvm::opt {
 #ifndef NDEBUG
-static int StrCmpOptionName(StringRef A, StringRef B) {
-  if (int N = StrCmpOptionNameIgnoreCase(A, B))
-    return N;
-  return A.compare(B);
-}
-
 static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
   if (&A == &B)
     return false;
 
-  if (int N = StrCmpOptionName(A.getName(), B.getName()))
-    return N < 0;
+  if (int Cmp = StrCmpOptionName(A.getName(), B.getName()))
+    return Cmp < 0;
 
-  for (size_t I = 0, K = std::min(A.Prefixes.size(), B.Prefixes.size()); I != K;
-       ++I)
-    if (int N = StrCmpOptionName(A.Prefixes[I], B.Prefixes[I]))
-      return N < 0;
+  // Note: we are converting ArrayRef<StringLiteral> to ArrayRef<StringRef>.
+  // In general, ArrayRef<SubClass> cannot be safely viewed as ArrayRef<Base>
+  // since sizeof(SubClass) may not be same as sizeof(Base). However in this
+  // case, sizeof(StringLiteral) is same as sizeof(StringRef), so this
+  // conversion is safe.
+  static_assert(sizeof(StringRef) == sizeof(StringLiteral));
+  ArrayRef<StringRef> APrefixes(A.Prefixes.data(), A.Prefixes.size());
+  ArrayRef<StringRef> BPrefixes(B.Prefixes.data(), B.Prefixes.size());
+
+  if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes))
+    return Cmp < 0;
 
   // Names are the same, check that classes are in order; exactly one
   // should be joined, and it should succeed the other.
@@ -77,11 +62,10 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
 
 // Support lower_bound between info and an option name.
 static inline bool operator<(const OptTable::Info &I, StringRef Name) {
-  return StrCmpOptionNameIgnoreCase(I.getName(), Name) < 0;
+  // Do not fallback to case sensitive comparison.
+  return StrCmpOptionName(I.getName(), Name, false) < 0;
 }
-
-} // end namespace opt
-} // end namespace llvm
+} // namespace llvm::opt
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 500768588bc9b3..ecb3e84b1da8bd 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -6,19 +6,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Option/Option.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
-#include "llvm/Option/Option.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <cstring>
 
 using namespace llvm;
 using namespace llvm::opt;
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index c55c17fae189f5..97188b0672f032 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -212,6 +212,7 @@ add_llvm_component_library(LLVMSupport
   NativeFormatting.cpp
   OptimizedStructLayout.cpp
   Optional.cpp
+  OptionStrCmp.cpp
   PGOOptions.cpp
   Parallel.cpp
   PluginLoader.cpp
diff --git a/llvm/lib/Support/OptionStrCmp.cpp b/llvm/lib/Support/OptionStrCmp.cpp
new file mode 100644
index 00000000000000..1ffb64c7a1584b
--- /dev/null
+++ b/llvm/lib/Support/OptionStrCmp.cpp
@@ -0,0 +1,43 @@
+//===- OptionStrCmp.cpp - Option String Comparison --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/OptionStrCmp.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+// Comparison function for Option strings (option names & prefixes).
+// The ordering is *almost* case-insensitive lexicographic, with an exception.
+// '\0' comes at the end of the alphabet instead of the beginning (thus options
+// precede any other options which prefix them). Additionally, if two options
+// are identical ignoring case, they are ordered according to case sensitive
+// ordering if `FallbackCaseSensitive` is true.
+int llvm::StrCmpOptionName(StringRef A, StringRef B,
+                           bool FallbackCaseSensitive) {
+  size_t MinSize = std::min(A.size(), B.size());
+  if (int Res = A.substr(0, MinSize).compare_insensitive(B.substr(0, MinSize)))
+    return Res;
+
+  // If they are identical ignoring case, use case sensitive ordering.
+  if (A.size() == B.size())
+    return FallbackCaseSensitive ? A.compare(B) : 0;
+
+  return (A.size() == MinSize) ? 1 /* A is a prefix of B. */
+                               : -1 /* B is a prefix of A */;
+}
+
+// Comparison function for Option prefixes.
+int llvm::StrCmpOptionPrefixes(ArrayRef<StringRef> APrefixes,
+                               ArrayRef<StringRef> BPrefixes) {
+  for (const auto &[APre, BPre] : zip(APrefixes, BPrefixes)) {
+    if (int Cmp = StrCmpOptionName(APre, BPre))
+      return Cmp;
+  }
+  // Both prefixes are identical.
+  return 0;
+}
diff --git a/llvm/utils/TableGen/Common/OptEmitter.cpp b/llvm/utils/TableGen/Common/OptEmitter.cpp
index 75e32c36d4f726..f914909ebe5e48 100644
--- a/llvm/utils/TableGen/Common/OptEmitter.cpp
+++ b/llvm/utils/TableGen/Common/OptEmitter.cpp
@@ -8,77 +8,44 @@
 
 #include "OptEmitter.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/OptionStrCmp.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include <cctype>
-#include <cstring>
-
-namespace llvm {
-
-// Ordering on Info. The logic should match with the consumer-side function in
-// llvm/Option/OptTable.h.
-// FIXME: Make this take StringRefs instead of null terminated strings to
-// simplify callers.
-static int StrCmpOptionName(const char *A, const char *B) {
-  const char *X = A, *Y = B;
-  char a = tolower(*A), b = tolower(*B);
-  while (a == b) {
-    if (a == '\0')
-      return strcmp(A, B);
-
-    a = tolower(*++X);
-    b = tolower(*++Y);
-  }
-
-  if (a == '\0') // A is a prefix of B.
-    return 1;
-  if (b == '\0') // B is a prefix of A.
-    return -1;
-
-  // Otherwise lexicographic.
-  return (a < b) ? -1 : 1;
-}
 
 // Returns true if A is ordered before B.
-bool CompareOptionRecords(const Record *A, const Record *B) {
+bool llvm::IsOptionRecordsLess(const Record *A, const Record *B) {
   if (A == B)
     return false;
+
   // Sentinel options precede all others and are only ordered by precedence.
-  bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
-  bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  const Record *AKind = A->getValueAsDef("Kind");
+  const Record *BKind = B->getValueAsDef("Kind");
+
+  bool ASent = AKind->getValueAsBit("Sentinel");
+  bool BSent = BKind->getValueAsBit("Sentinel");
   if (ASent != BSent)
     return ASent;
 
-  // Compare options by name, unless they are sentinels.
-  if (!ASent)
-    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").str().c_str(),
-                                   B->getValueAsString("Name").str().c_str()))
-      return Cmp < 0;
+  std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
+  std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
 
+  // Compare options by name, unless they are sentinels.
   if (!ASent) {
-    std::vector<StringRef> APrefixes = A->getValueAsListOfStrings("Prefixes");
-    std::vector<StringRef> BPrefixes = B->getValueAsListOfStrings("Prefixes");
+    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name"),
+                                   B->getValueAsString("Name")))
+      return Cmp < 0;
 
-    for (std::vector<StringRef>::const_iterator APre = APrefixes.begin(),
-                                                AEPre = APrefixes.end(),
-                                                BPre = BPrefixes.begin(),
-                                                BEPre = BPrefixes.end();
-         APre != AEPre && BPre != BEPre; ++APre, ++BPre) {
-      if (int Cmp = StrCmpOptionName(APre->str().c_str(), BPre->str().c_str()))
-        return Cmp < 0;
-    }
+    if (int Cmp = StrCmpOptionPrefixes(APrefixes, BPrefixes))
+      return Cmp < 0;
   }
 
   // Then by the kind precedence;
-  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
-  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
-  if (APrec == BPrec && A->getValueAsListOfStrings("Prefixes") ==
-                            B->getValueAsListOfStrings("Prefixes")) {
+  int APrec = AKind->getValueAsInt("Precedence");
+  int BPrec = BKind->getValueAsInt("Precedence");
+  if (APrec == BPrec && APrefixes == BPrefixes) {
     PrintError(A->getLoc(), Twine("Option is equivalent to"));
     PrintError(B->getLoc(), Twine("Other defined here"));
     PrintFatalError("Equivalent Options found.");
   }
   return APrec < BPrec;
 }
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/OptEmitter.h b/llvm/utils/TableGen/Common/OptEmitter.h
index 5eecd619873373..f7ffa0bcc4a975 100644
--- a/llvm/utils/TableGen/Common/OptEmitter.h
+++ b/llvm/utils/TableGen/Common/OptEmitter.h
@@ -11,7 +11,8 @@
 
 namespace llvm {
 class Record;
-bool CompareOptionRecords(const Record *A, const Record *B);
+/// Return true of Option record \p A is ordered before \p B.
+bool IsOptionRecordsLess(const Record *A, const Record *B);
 } // namespace llvm
 
 #endif // LLVM_UTILS_TABLEGEN_COMMON_OPTEMITTER_H
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index a41c684f169e9f..79cbf51514ae5c 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -255,10 +255,10 @@ static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) {
   ArrayRef<const Record *> Groups =
       Records.getAllDerivedDefinitions("OptionGroup");
   std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option");
+  llvm::sort(Opts, IsOptionRecordsLess);
 
   emitSourceFileHeader("Option Parsing Definitions", OS);
 
-  llvm::sort(Opts, CompareOptionRecords);
   // Generate prefix groups.
   typedef SmallVector<SmallString<2>, 2> PrefixKeyT;
   typedef std::map<PrefixKeyT, std::string> PrefixesT;
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 43b0f78c44d909..16125198a7c387 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -22,7 +22,7 @@ static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) {
 
   // Get the options.
   std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option");
-  llvm::sort(Opts, CompareOptionRecords);
+  llvm::sort(Opts, IsOptionRecordsLess);
 
   // Get the option groups.
   for (const Record *R : Records.getAllDerivedDefinitions("OptionGroup"))

``````````

</details>


https://github.com/llvm/llvm-project/pull/108219


More information about the llvm-commits mailing list