[PATCH] D126159: [ADT] Add edit_distance_insensitive to StringRef

Nathan James via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun May 22 01:23:11 PDT 2022


njames93 created this revision.
njames93 added reviewers: dblaikie, chandlerc.
Herald added a subscriber: hiraditya.
Herald added a project: All.
njames93 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

In some instances its advantageous to calculate edit distances without worrying about casing.
Currently to achieve this both strings need to be converted to the same case first, then edit distance can be calculated.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D126159

Files:
  llvm/include/llvm/ADT/StringRef.h
  llvm/lib/Support/StringRef.cpp
  llvm/unittests/ADT/StringRefTest.cpp


Index: llvm/unittests/ADT/StringRefTest.cpp
===================================================================
--- llvm/unittests/ADT/StringRefTest.cpp
+++ llvm/unittests/ADT/StringRefTest.cpp
@@ -584,6 +584,15 @@
                                        "people soiled our green "));
 }
 
+TEST(StringRefTest, EditDistanceInsensitive) {
+  StringRef Hello("HELLO");
+  EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill"));
+  EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello"));
+
+  StringRef Industry("InDuStRy");
+  EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT"));
+}
+
 TEST(StringRefTest, Misc) {
   std::string Storage;
   raw_string_ostream OS(Storage);
Index: llvm/lib/Support/StringRef.cpp
===================================================================
--- llvm/lib/Support/StringRef.cpp
+++ llvm/lib/Support/StringRef.cpp
@@ -98,6 +98,52 @@
       AllowReplacements, MaxEditDistance);
 }
 
+unsigned llvm::StringRef::edit_distance_insensitive(
+    StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
+  size_t Size = size();
+  size_t OtherSize = Other.size();
+  constexpr unsigned SmallBufferSize = 64;
+  unsigned SmallBuffer[SmallBufferSize];
+  std::unique_ptr<unsigned[]> Allocated;
+  unsigned *Row = SmallBuffer;
+  if (OtherSize + 1 > SmallBufferSize) {
+    Row = new unsigned[OtherSize + 1];
+    Allocated.reset(Row);
+  }
+
+  for (unsigned i = 1; i <= OtherSize; ++i)
+    Row[i] = i;
+
+  for (size_t y = 1; y <= Size; ++y) {
+    Row[0] = y;
+    unsigned BestThisRow = Row[0];
+
+    unsigned Previous = y - 1;
+    for (size_t x = 1; x <= OtherSize; ++x) {
+      int OldRow = Row[x];
+      if (AllowReplacements) {
+        Row[x] = std::min(
+            Previous +
+                (toLower(data()[y - 1]) == toLower(Other[x - 1]) ? 0u : 1u),
+            std::min(Row[x - 1], Row[x]) + 1);
+      } else {
+        if (toLower(data()[y - 1]) == toLower(Other[x - 1]))
+          Row[x] = Previous;
+        else
+          Row[x] = std::min(Row[x - 1], Row[x]) + 1;
+      }
+      Previous = OldRow;
+      BestThisRow = std::min(BestThisRow, Row[x]);
+    }
+
+    if (MaxEditDistance && BestThisRow > MaxEditDistance)
+      return MaxEditDistance + 1;
+  }
+
+  unsigned Result = Row[OtherSize];
+  return Result;
+}
+
 //===----------------------------------------------------------------------===//
 // String Operations
 //===----------------------------------------------------------------------===//
Index: llvm/include/llvm/ADT/StringRef.h
===================================================================
--- llvm/include/llvm/ADT/StringRef.h
+++ llvm/include/llvm/ADT/StringRef.h
@@ -240,6 +240,10 @@
     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
                            unsigned MaxEditDistance = 0) const;
 
+    LLVM_NODISCARD unsigned
+    edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
+                              unsigned MaxEditDistance = 0) const;
+
     /// str - Get the contents as an std::string.
     LLVM_NODISCARD
     std::string str() const {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D126159.431219.patch
Type: text/x-patch
Size: 3121 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220522/6e41964d/attachment.bin>


More information about the llvm-commits mailing list