[llvm] r286724 - [Support] Add StringRef::find_lower and contains_lower.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 13 18:45:46 PST 2016
For example, you can do Boyer-Moore in case sensitive manner, can't you?
On Sat, Nov 12, 2016 at 12:20 PM, Zachary Turner <zturner at google.com> wrote:
> Is there a better one that doesn't require copying the string?
>
> On Sat, Nov 12, 2016 at 12:02 PM Rui Ueyama <ruiu at google.com> wrote:
>
>> On Sat, Nov 12, 2016 at 9:17 AM, Zachary Turner via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>> Author: zturner
>> Date: Sat Nov 12 11:17:12 2016
>> New Revision: 286724
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=286724&view=rev
>> Log:
>> [Support] Add StringRef::find_lower and contains_lower.
>>
>> Differential Revision: https://reviews.llvm.org/D25299
>>
>> Modified:
>> llvm/trunk/include/llvm/ADT/StringRef.h
>> llvm/trunk/lib/Support/StringRef.cpp
>> llvm/trunk/unittests/ADT/StringRefTest.cpp
>>
>> Modified: llvm/trunk/include/llvm/ADT/StringRef.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/
>> ADT/StringRef.h?rev=286724&r1=286723&r2=286724&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/include/llvm/ADT/StringRef.h (original)
>> +++ llvm/trunk/include/llvm/ADT/StringRef.h Sat Nov 12 11:17:12 2016
>> @@ -286,6 +286,12 @@ namespace llvm {
>> return npos;
>> }
>>
>> + /// Search for the first character \p C in the string, ignoring case.
>> + ///
>> + /// \returns The index of the first occurrence of \p C, or npos if
>> not
>> + /// found.
>> + size_t find_lower(char C, size_t From = 0) const;
>> +
>> /// Search for the first character satisfying the predicate \p F
>> ///
>> /// \returns The index of the first character satisfying \p F
>> starting from
>> @@ -318,6 +324,12 @@ namespace llvm {
>> /// found.
>> size_t find(StringRef Str, size_t From = 0) const;
>>
>> + /// Search for the first string \p Str in the string, ignoring case.
>> + ///
>> + /// \returns The index of the first occurrence of \p Str, or npos if
>> not
>> + /// found.
>> + size_t find_lower(StringRef Str, size_t From = 0) const;
>> +
>> /// Search for the last character \p C in the string.
>> ///
>> /// \returns The index of the last occurrence of \p C, or npos if not
>> @@ -333,12 +345,24 @@ namespace llvm {
>> return npos;
>> }
>>
>> + /// Search for the last character \p C in the string, ignoring case.
>> + ///
>> + /// \returns The index of the last occurrence of \p C, or npos if not
>> + /// found.
>> + size_t rfind_lower(char C, size_t From = npos) const;
>> +
>> /// Search for the last string \p Str in the string.
>> ///
>> /// \returns The index of the last occurrence of \p Str, or npos if
>> not
>> /// found.
>> size_t rfind(StringRef Str) const;
>>
>> + /// Search for the last string \p Str in the string, ignoring case.
>> + ///
>> + /// \returns The index of the last occurrence of \p Str, or npos if
>> not
>> + /// found.
>> + size_t rfind_lower(StringRef Str) const;
>> +
>> /// Find the first character in the string that is \p C, or npos if
>> not
>> /// found. Same as find.
>> size_t find_first_of(char C, size_t From = 0) const {
>> @@ -393,6 +417,18 @@ namespace llvm {
>> LLVM_ATTRIBUTE_ALWAYS_INLINE
>> bool contains(char C) const { return find_first_of(C) != npos; }
>>
>> + /// Return true if the given string is a substring of *this, and
>> false
>> + /// otherwise.
>> + LLVM_ATTRIBUTE_ALWAYS_INLINE
>> + bool contains_lower(StringRef Other) const {
>> + return find_lower(Other) != npos;
>> + }
>> +
>> + /// Return true if the given character is contained in *this, and
>> false
>> + /// otherwise.
>> + LLVM_ATTRIBUTE_ALWAYS_INLINE
>> + bool contains_lower(char C) const { return find_lower(C) != npos; }
>> +
>> /// @}
>> /// @name Helpful Algorithms
>> /// @{
>>
>> Modified: llvm/trunk/lib/Support/StringRef.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/
>> StringRef.cpp?rev=286724&r1=286723&r2=286724&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Support/StringRef.cpp (original)
>> +++ llvm/trunk/lib/Support/StringRef.cpp Sat Nov 12 11:17:12 2016
>> @@ -69,6 +69,11 @@ bool StringRef::endswith_lower(StringRef
>> ascii_strncasecmp(end() - Suffix.Length, Suffix.Data,
>> Suffix.Length) == 0;
>> }
>>
>> +size_t StringRef::find_lower(char C, size_t From) const {
>> + char L = ascii_tolower(C);
>> + return find_if([L](char D) { return ascii_tolower(D) == L; }, From);
>> +}
>> +
>> /// compare_numeric - Compare strings, handle embedded numbers.
>> int StringRef::compare_numeric(StringRef RHS) const {
>> for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
>> @@ -182,6 +187,28 @@ size_t StringRef::find(StringRef Str, si
>> return npos;
>> }
>>
>> +size_t StringRef::find_lower(StringRef Str, size_t From) const {
>> + StringRef This = substr(From);
>> + while (This.size() >= Str.size()) {
>> + if (This.startswith_lower(Str))
>> + return From;
>> + This = This.drop_front();
>> + ++From;
>> + }
>> + return npos;
>> +}
>>
>>
>> This is a pretty naive algorithm. Isn't this too slow?
>>
>>
>> +
>> +size_t StringRef::rfind_lower(char C, size_t From) const {
>> + From = std::min(From, Length);
>> + size_t i = From;
>> + while (i != 0) {
>> + --i;
>> + if (ascii_tolower(Data[i]) == ascii_tolower(C))
>> + return i;
>> + }
>> + return npos;
>> +}
>> +
>> /// rfind - Search for the last string \arg Str in the string.
>> ///
>> /// \return - The index of the last occurrence of \arg Str, or npos if
>> not
>> @@ -196,6 +223,18 @@ size_t StringRef::rfind(StringRef Str) c
>> return i;
>> }
>> return npos;
>> +}
>> +
>> +size_t StringRef::rfind_lower(StringRef Str) const {
>> + size_t N = Str.size();
>> + if (N > Length)
>> + return npos;
>> + for (size_t i = Length - N + 1, e = 0; i != e;) {
>> + --i;
>> + if (substr(i, N).equals_lower(Str))
>> + return i;
>> + }
>> + return npos;
>> }
>>
>> /// find_first_of - Find the first character in the string that is in
>> \arg
>>
>> Modified: llvm/trunk/unittests/ADT/StringRefTest.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT
>> /StringRefTest.cpp?rev=286724&r1=286723&r2=286724&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/unittests/ADT/StringRefTest.cpp (original)
>> +++ llvm/trunk/unittests/ADT/StringRefTest.cpp Sat Nov 12 11:17:12 2016
>> @@ -410,21 +410,58 @@ TEST(StringRefTest, ConsumeBack) {
>> }
>>
>> TEST(StringRefTest, Find) {
>> - StringRef Str("hello");
>> - EXPECT_EQ(2U, Str.find('l'));
>> - EXPECT_EQ(StringRef::npos, Str.find('z'));
>> - EXPECT_EQ(StringRef::npos, Str.find("helloworld"));
>> - EXPECT_EQ(0U, Str.find("hello"));
>> - EXPECT_EQ(1U, Str.find("ello"));
>> - EXPECT_EQ(StringRef::npos, Str.find("zz"));
>> - EXPECT_EQ(2U, Str.find("ll", 2));
>> - EXPECT_EQ(StringRef::npos, Str.find("ll", 3));
>> - EXPECT_EQ(0U, Str.find(""));
>> - StringRef LongStr("hellx xello hell ello world foo bar hello");
>> - EXPECT_EQ(36U, LongStr.find("hello"));
>> - EXPECT_EQ(28U, LongStr.find("foo"));
>> - EXPECT_EQ(12U, LongStr.find("hell", 2));
>> - EXPECT_EQ(0U, LongStr.find(""));
>> + StringRef Str("helloHELLO");
>> + StringRef LongStr("hellx xello hell ello world foo bar hello HELLO");
>> +
>> + struct {
>> + StringRef Str;
>> + char C;
>> + std::size_t From;
>> + std::size_t Pos;
>> + std::size_t LowerPos;
>> + } CharExpectations[] = {
>> + {Str, 'h', 0U, 0U, 0U},
>> + {Str, 'e', 0U, 1U, 1U},
>> + {Str, 'l', 0U, 2U, 2U},
>> + {Str, 'l', 3U, 3U, 3U},
>> + {Str, 'o', 0U, 4U, 4U},
>> + {Str, 'L', 0U, 7U, 2U},
>> + {Str, 'z', 0U, StringRef::npos, StringRef::npos},
>> + };
>> +
>> + struct {
>> + StringRef Str;
>> + llvm::StringRef S;
>> + std::size_t From;
>> + std::size_t Pos;
>> + std::size_t LowerPos;
>> + } StrExpectations[] = {
>> + {Str, "helloword", 0, StringRef::npos, StringRef::npos},
>> + {Str, "hello", 0, 0U, 0U},
>> + {Str, "ello", 0, 1U, 1U},
>> + {Str, "zz", 0, StringRef::npos, StringRef::npos},
>> + {Str, "ll", 2U, 2U, 2U},
>> + {Str, "ll", 3U, StringRef::npos, 7U},
>> + {Str, "LL", 2U, 7U, 2U},
>> + {Str, "LL", 3U, 7U, 7U},
>> + {Str, "", 0U, 0U, 0U},
>> + {LongStr, "hello", 0U, 36U, 36U},
>> + {LongStr, "foo", 0U, 28U, 28U},
>> + {LongStr, "hell", 2U, 12U, 12U},
>> + {LongStr, "HELL", 2U, 42U, 12U},
>> + {LongStr, "", 0U, 0U, 0U}};
>> +
>> + for (auto &E : CharExpectations) {
>> + EXPECT_EQ(E.Pos, E.Str.find(E.C, E.From));
>> + EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.C, E.From));
>> + EXPECT_EQ(E.LowerPos, E.Str.find_lower(toupper(E.C), E.From));
>> + }
>> +
>> + for (auto &E : StrExpectations) {
>> + EXPECT_EQ(E.Pos, E.Str.find(E.S, E.From));
>> + EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S, E.From));
>> + EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S.upper(), E.From));
>> + }
>>
>> EXPECT_EQ(3U, Str.rfind('l'));
>> EXPECT_EQ(StringRef::npos, Str.rfind('z'));
>> @@ -433,10 +470,19 @@ TEST(StringRefTest, Find) {
>> EXPECT_EQ(1U, Str.rfind("ello"));
>> EXPECT_EQ(StringRef::npos, Str.rfind("zz"));
>>
>> + EXPECT_EQ(8U, Str.rfind_lower('l'));
>> + EXPECT_EQ(8U, Str.rfind_lower('L'));
>> + EXPECT_EQ(StringRef::npos, Str.rfind_lower('z'));
>> + EXPECT_EQ(StringRef::npos, Str.rfind_lower("HELLOWORLD"));
>> + EXPECT_EQ(5U, Str.rfind("HELLO"));
>> + EXPECT_EQ(6U, Str.rfind("ELLO"));
>> + EXPECT_EQ(StringRef::npos, Str.rfind("ZZ"));
>> +
>> EXPECT_EQ(2U, Str.find_first_of('l'));
>> EXPECT_EQ(1U, Str.find_first_of("el"));
>> EXPECT_EQ(StringRef::npos, Str.find_first_of("xyz"));
>>
>> + Str = "hello";
>> EXPECT_EQ(1U, Str.find_first_not_of('h'));
>> EXPECT_EQ(4U, Str.find_first_not_of("hel"));
>> EXPECT_EQ(StringRef::npos, Str.find_first_not_of("hello"));
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161113/3ad7e131/attachment.html>
More information about the llvm-commits
mailing list