[llvm] a13b61f - [ADT] Add edit_distance_insensitive to StringRef

Nathan James via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 5 04:03:25 PDT 2022


Author: Nathan James
Date: 2022-06-05T12:03:09+01:00
New Revision: a13b61f7f0a2a44546fb39663b0d6f2547841e02

URL: https://github.com/llvm/llvm-project/commit/a13b61f7f0a2a44546fb39663b0d6f2547841e02
DIFF: https://github.com/llvm/llvm-project/commit/a13b61f7f0a2a44546fb39663b0d6f2547841e02.diff

LOG: [ADT] Add edit_distance_insensitive to StringRef

In some instances its advantageous to calculate edit distances without worrying about casing.
Currently to achieve this both strings need to be converted to the same case first, then edit distance can be calculated.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D126159

Added: 
    

Modified: 
    llvm/include/llvm/ADT/StringRef.h
    llvm/include/llvm/ADT/edit_distance.h
    llvm/lib/Support/StringRef.cpp
    llvm/unittests/ADT/StringRefTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 118def2f43e12..80ba47dd619cb 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -240,6 +240,10 @@ namespace llvm {
     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
                            unsigned MaxEditDistance = 0) const;
 
+    LLVM_NODISCARD unsigned
+    edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
+                              unsigned MaxEditDistance = 0) const;
+
     /// str - Get the contents as an std::string.
     LLVM_NODISCARD
     std::string str() const {

diff  --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h
index c480c1e7cd78a..6989cecab9b13 100644
--- a/llvm/include/llvm/ADT/edit_distance.h
+++ b/llvm/include/llvm/ADT/edit_distance.h
@@ -28,6 +28,9 @@ namespace llvm {
 ///
 /// \param ToArray the second sequence to compare.
 ///
+/// \param Map A Functor to apply to each item of the sequences before
+/// comparison.
+///
 /// \param AllowReplacements whether to allow element replacements (change one
 /// element into another) as a single operation, rather than as two operations
 /// (an insertion and a removal).
@@ -39,10 +42,10 @@ namespace llvm {
 /// \returns the minimum number of element insertions, removals, or (if
 /// \p AllowReplacements is \c true) replacements needed to transform one of
 /// the given sequences into the other. If zero, the sequences are identical.
-template<typename T>
-unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
-                             bool AllowReplacements = true,
-                             unsigned MaxEditDistance = 0) {
+template <typename T, typename Functor>
+unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
+                                   Functor Map, bool AllowReplacements = true,
+                                   unsigned MaxEditDistance = 0) {
   // The algorithm implemented below is the "classic"
   // dynamic-programming algorithm for computing the Levenshtein
   // distance, which is described here:
@@ -75,15 +78,16 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
     unsigned BestThisRow = Row[0];
 
     unsigned Previous = y - 1;
+    const auto &CurItem = Map(FromArray[y - 1]);
     for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
       int OldRow = Row[x];
       if (AllowReplacements) {
-        Row[x] = std::min(
-            Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
-            std::min(Row[x-1], Row[x])+1);
+        Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u),
+                          std::min(Row[x - 1], Row[x]) + 1);
       }
       else {
-        if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
+        if (CurItem == Map(ToArray[x - 1]))
+          Row[x] = Previous;
         else Row[x] = std::min(Row[x-1], Row[x]) + 1;
       }
       Previous = OldRow;
@@ -98,6 +102,15 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
   return Result;
 }
 
+template <typename T>
+unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
+                             bool AllowReplacements = true,
+                             unsigned MaxEditDistance = 0) {
+  return ComputeMappedEditDistance(
+      FromArray, ToArray, [](const T &X) -> const T & { return X; },
+      AllowReplacements, MaxEditDistance);
+}
+
 } // End llvm namespace
 
 #endif

diff  --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index 3ed08ed386617..096b2d2d8c078 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -98,6 +98,13 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
       AllowReplacements, MaxEditDistance);
 }
 
+unsigned llvm::StringRef::edit_distance_insensitive(
+    StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
+  return llvm::ComputeMappedEditDistance(
+      makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()),
+      llvm::toLower, AllowReplacements, MaxEditDistance);
+}
+
 //===----------------------------------------------------------------------===//
 // String Operations
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp
index e80a25a19969c..ac734585b0711 100644
--- a/llvm/unittests/ADT/StringRefTest.cpp
+++ b/llvm/unittests/ADT/StringRefTest.cpp
@@ -584,6 +584,15 @@ TEST(StringRefTest, EditDistance) {
                                        "people soiled our green "));
 }
 
+TEST(StringRefTest, EditDistanceInsensitive) {
+  StringRef Hello("HELLO");
+  EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill"));
+  EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello"));
+
+  StringRef Industry("InDuStRy");
+  EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT"));
+}
+
 TEST(StringRefTest, Misc) {
   std::string Storage;
   raw_string_ostream OS(Storage);


        


More information about the llvm-commits mailing list