[PATCH] D126159: [ADT] Add edit_distance_insensitive to StringRef

Nathan James via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon May 23 20:19:29 PDT 2022


njames93 updated this revision to Diff 431570.
njames93 added a comment.

Remove code duplication by adding an extra Map parameter to llvm::ComputeEditDistance.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126159/new/

https://reviews.llvm.org/D126159

Files:
  llvm/include/llvm/ADT/StringRef.h
  llvm/include/llvm/ADT/edit_distance.h
  llvm/lib/Support/StringRef.cpp
  llvm/unittests/ADT/StringRefTest.cpp


Index: llvm/unittests/ADT/StringRefTest.cpp
===================================================================
--- llvm/unittests/ADT/StringRefTest.cpp
+++ llvm/unittests/ADT/StringRefTest.cpp
@@ -584,6 +584,15 @@
                                        "people soiled our green "));
 }
 
+TEST(StringRefTest, EditDistanceInsensitive) {
+  StringRef Hello("HELLO");
+  EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill"));
+  EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello"));
+
+  StringRef Industry("InDuStRy");
+  EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT"));
+}
+
 TEST(StringRefTest, Misc) {
   std::string Storage;
   raw_string_ostream OS(Storage);
Index: llvm/lib/Support/StringRef.cpp
===================================================================
--- llvm/lib/Support/StringRef.cpp
+++ llvm/lib/Support/StringRef.cpp
@@ -98,6 +98,13 @@
       AllowReplacements, MaxEditDistance);
 }
 
+unsigned llvm::StringRef::edit_distance_insensitive(
+    StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
+  return llvm::ComputeEditDistance(
+      makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()),
+      AllowReplacements, MaxEditDistance, llvm::toLower);
+}
+
 //===----------------------------------------------------------------------===//
 // String Operations
 //===----------------------------------------------------------------------===//
Index: llvm/include/llvm/ADT/edit_distance.h
===================================================================
--- llvm/include/llvm/ADT/edit_distance.h
+++ llvm/include/llvm/ADT/edit_distance.h
@@ -36,13 +36,17 @@
 /// routine is allowed to compute. If the edit distance will exceed that
 /// maximum, returns \c MaxEditDistance+1.
 ///
+/// \param Map A Functor to apply to each item of the sequences before
+/// comparison. If unspecified, this maps to itself.
+///
 /// \returns the minimum number of element insertions, removals, or (if
 /// \p AllowReplacements is \c true) replacements needed to transform one of
 /// the given sequences into the other. If zero, the sequences are identical.
-template<typename T>
-unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
-                             bool AllowReplacements = true,
-                             unsigned MaxEditDistance = 0) {
+template <typename T, typename Functor = const T &(*)(const T &)>
+unsigned ComputeEditDistance(
+    ArrayRef<T> FromArray, ArrayRef<T> ToArray, bool AllowReplacements = true,
+    unsigned MaxEditDistance = 0,
+    Functor Map = +[](const T &X) -> const T & { return X; }) {
   // The algorithm implemented below is the "classic"
   // dynamic-programming algorithm for computing the Levenshtein
   // distance, which is described here:
@@ -79,11 +83,12 @@
       int OldRow = Row[x];
       if (AllowReplacements) {
         Row[x] = std::min(
-            Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
-            std::min(Row[x-1], Row[x])+1);
+            Previous + (Map(FromArray[y - 1]) == Map(ToArray[x - 1]) ? 0u : 1u),
+            std::min(Row[x - 1], Row[x]) + 1);
       }
       else {
-        if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
+        if (Map(FromArray[y - 1]) == Map(ToArray[x - 1]))
+          Row[x] = Previous;
         else Row[x] = std::min(Row[x-1], Row[x]) + 1;
       }
       Previous = OldRow;
Index: llvm/include/llvm/ADT/StringRef.h
===================================================================
--- llvm/include/llvm/ADT/StringRef.h
+++ llvm/include/llvm/ADT/StringRef.h
@@ -240,6 +240,10 @@
     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
                            unsigned MaxEditDistance = 0) const;
 
+    LLVM_NODISCARD unsigned
+    edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
+                              unsigned MaxEditDistance = 0) const;
+
     /// str - Get the contents as an std::string.
     LLVM_NODISCARD
     std::string str() const {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D126159.431570.patch
Type: text/x-patch
Size: 4031 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220524/60635802/attachment.bin>


More information about the llvm-commits mailing list