[PATCH] D126159: [ADT] Add edit_distance_insensitive to StringRef
Nathan James via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun May 22 01:23:11 PDT 2022
njames93 created this revision.
njames93 added reviewers: dblaikie, chandlerc.
Herald added a subscriber: hiraditya.
Herald added a project: All.
njames93 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
In some instances its advantageous to calculate edit distances without worrying about casing.
Currently to achieve this both strings need to be converted to the same case first, then edit distance can be calculated.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D126159
Files:
llvm/include/llvm/ADT/StringRef.h
llvm/lib/Support/StringRef.cpp
llvm/unittests/ADT/StringRefTest.cpp
Index: llvm/unittests/ADT/StringRefTest.cpp
===================================================================
--- llvm/unittests/ADT/StringRefTest.cpp
+++ llvm/unittests/ADT/StringRefTest.cpp
@@ -584,6 +584,15 @@
"people soiled our green "));
}
+TEST(StringRefTest, EditDistanceInsensitive) {
+ StringRef Hello("HELLO");
+ EXPECT_EQ(2U, Hello.edit_distance_insensitive("hill"));
+ EXPECT_EQ(0U, Hello.edit_distance_insensitive("hello"));
+
+ StringRef Industry("InDuStRy");
+ EXPECT_EQ(6U, Industry.edit_distance_insensitive("iNtErEsT"));
+}
+
TEST(StringRefTest, Misc) {
std::string Storage;
raw_string_ostream OS(Storage);
Index: llvm/lib/Support/StringRef.cpp
===================================================================
--- llvm/lib/Support/StringRef.cpp
+++ llvm/lib/Support/StringRef.cpp
@@ -98,6 +98,52 @@
AllowReplacements, MaxEditDistance);
}
+unsigned llvm::StringRef::edit_distance_insensitive(
+ StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
+ size_t Size = size();
+ size_t OtherSize = Other.size();
+ constexpr unsigned SmallBufferSize = 64;
+ unsigned SmallBuffer[SmallBufferSize];
+ std::unique_ptr<unsigned[]> Allocated;
+ unsigned *Row = SmallBuffer;
+ if (OtherSize + 1 > SmallBufferSize) {
+ Row = new unsigned[OtherSize + 1];
+ Allocated.reset(Row);
+ }
+
+ for (unsigned i = 1; i <= OtherSize; ++i)
+ Row[i] = i;
+
+ for (size_t y = 1; y <= Size; ++y) {
+ Row[0] = y;
+ unsigned BestThisRow = Row[0];
+
+ unsigned Previous = y - 1;
+ for (size_t x = 1; x <= OtherSize; ++x) {
+ int OldRow = Row[x];
+ if (AllowReplacements) {
+ Row[x] = std::min(
+ Previous +
+ (toLower(data()[y - 1]) == toLower(Other[x - 1]) ? 0u : 1u),
+ std::min(Row[x - 1], Row[x]) + 1);
+ } else {
+ if (toLower(data()[y - 1]) == toLower(Other[x - 1]))
+ Row[x] = Previous;
+ else
+ Row[x] = std::min(Row[x - 1], Row[x]) + 1;
+ }
+ Previous = OldRow;
+ BestThisRow = std::min(BestThisRow, Row[x]);
+ }
+
+ if (MaxEditDistance && BestThisRow > MaxEditDistance)
+ return MaxEditDistance + 1;
+ }
+
+ unsigned Result = Row[OtherSize];
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// String Operations
//===----------------------------------------------------------------------===//
Index: llvm/include/llvm/ADT/StringRef.h
===================================================================
--- llvm/include/llvm/ADT/StringRef.h
+++ llvm/include/llvm/ADT/StringRef.h
@@ -240,6 +240,10 @@
unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
unsigned MaxEditDistance = 0) const;
+ LLVM_NODISCARD unsigned
+ edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
+ unsigned MaxEditDistance = 0) const;
+
/// str - Get the contents as an std::string.
LLVM_NODISCARD
std::string str() const {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D126159.431219.patch
Type: text/x-patch
Size: 3121 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220522/6e41964d/attachment.bin>
More information about the llvm-commits
mailing list