[llvm] r286724 - [Support] Add StringRef::find_lower and contains_lower.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 13 18:45:46 PST 2016


For example, you can do Boyer-Moore in case sensitive manner, can't you?

On Sat, Nov 12, 2016 at 12:20 PM, Zachary Turner <zturner at google.com> wrote:

> Is there a better one that doesn't require copying the string?
>
> On Sat, Nov 12, 2016 at 12:02 PM Rui Ueyama <ruiu at google.com> wrote:
>
>> On Sat, Nov 12, 2016 at 9:17 AM, Zachary Turner via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>> Author: zturner
>> Date: Sat Nov 12 11:17:12 2016
>> New Revision: 286724
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=286724&view=rev
>> Log:
>> [Support] Add StringRef::find_lower and contains_lower.
>>
>> Differential Revision: https://reviews.llvm.org/D25299
>>
>> Modified:
>>     llvm/trunk/include/llvm/ADT/StringRef.h
>>     llvm/trunk/lib/Support/StringRef.cpp
>>     llvm/trunk/unittests/ADT/StringRefTest.cpp
>>
>> Modified: llvm/trunk/include/llvm/ADT/StringRef.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/
>> ADT/StringRef.h?rev=286724&r1=286723&r2=286724&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/include/llvm/ADT/StringRef.h (original)
>> +++ llvm/trunk/include/llvm/ADT/StringRef.h Sat Nov 12 11:17:12 2016
>> @@ -286,6 +286,12 @@ namespace llvm {
>>        return npos;
>>      }
>>
>> +    /// Search for the first character \p C in the string, ignoring case.
>> +    ///
>> +    /// \returns The index of the first occurrence of \p C, or npos if
>> not
>> +    /// found.
>> +    size_t find_lower(char C, size_t From = 0) const;
>> +
>>      /// Search for the first character satisfying the predicate \p F
>>      ///
>>      /// \returns The index of the first character satisfying \p F
>> starting from
>> @@ -318,6 +324,12 @@ namespace llvm {
>>      /// found.
>>      size_t find(StringRef Str, size_t From = 0) const;
>>
>> +    /// Search for the first string \p Str in the string, ignoring case.
>> +    ///
>> +    /// \returns The index of the first occurrence of \p Str, or npos if
>> not
>> +    /// found.
>> +    size_t find_lower(StringRef Str, size_t From = 0) const;
>> +
>>      /// Search for the last character \p C in the string.
>>      ///
>>      /// \returns The index of the last occurrence of \p C, or npos if not
>> @@ -333,12 +345,24 @@ namespace llvm {
>>        return npos;
>>      }
>>
>> +    /// Search for the last character \p C in the string, ignoring case.
>> +    ///
>> +    /// \returns The index of the last occurrence of \p C, or npos if not
>> +    /// found.
>> +    size_t rfind_lower(char C, size_t From = npos) const;
>> +
>>      /// Search for the last string \p Str in the string.
>>      ///
>>      /// \returns The index of the last occurrence of \p Str, or npos if
>> not
>>      /// found.
>>      size_t rfind(StringRef Str) const;
>>
>> +    /// Search for the last string \p Str in the string, ignoring case.
>> +    ///
>> +    /// \returns The index of the last occurrence of \p Str, or npos if
>> not
>> +    /// found.
>> +    size_t rfind_lower(StringRef Str) const;
>> +
>>      /// Find the first character in the string that is \p C, or npos if
>> not
>>      /// found. Same as find.
>>      size_t find_first_of(char C, size_t From = 0) const {
>> @@ -393,6 +417,18 @@ namespace llvm {
>>      LLVM_ATTRIBUTE_ALWAYS_INLINE
>>      bool contains(char C) const { return find_first_of(C) != npos; }
>>
>> +    /// Return true if the given string is a substring of *this, and
>> false
>> +    /// otherwise.
>> +    LLVM_ATTRIBUTE_ALWAYS_INLINE
>> +    bool contains_lower(StringRef Other) const {
>> +      return find_lower(Other) != npos;
>> +    }
>> +
>> +    /// Return true if the given character is contained in *this, and
>> false
>> +    /// otherwise.
>> +    LLVM_ATTRIBUTE_ALWAYS_INLINE
>> +    bool contains_lower(char C) const { return find_lower(C) != npos; }
>> +
>>      /// @}
>>      /// @name Helpful Algorithms
>>      /// @{
>>
>> Modified: llvm/trunk/lib/Support/StringRef.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/
>> StringRef.cpp?rev=286724&r1=286723&r2=286724&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Support/StringRef.cpp (original)
>> +++ llvm/trunk/lib/Support/StringRef.cpp Sat Nov 12 11:17:12 2016
>> @@ -69,6 +69,11 @@ bool StringRef::endswith_lower(StringRef
>>        ascii_strncasecmp(end() - Suffix.Length, Suffix.Data,
>> Suffix.Length) == 0;
>>  }
>>
>> +size_t StringRef::find_lower(char C, size_t From) const {
>> +  char L = ascii_tolower(C);
>> +  return find_if([L](char D) { return ascii_tolower(D) == L; }, From);
>> +}
>> +
>>  /// compare_numeric - Compare strings, handle embedded numbers.
>>  int StringRef::compare_numeric(StringRef RHS) const {
>>    for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
>> @@ -182,6 +187,28 @@ size_t StringRef::find(StringRef Str, si
>>    return npos;
>>  }
>>
>> +size_t StringRef::find_lower(StringRef Str, size_t From) const {
>> +  StringRef This = substr(From);
>> +  while (This.size() >= Str.size()) {
>> +    if (This.startswith_lower(Str))
>> +      return From;
>> +    This = This.drop_front();
>> +    ++From;
>> +  }
>> +  return npos;
>> +}
>>
>>
>> This is a pretty naive algorithm. Isn't this too slow?
>>
>>
>> +
>> +size_t StringRef::rfind_lower(char C, size_t From) const {
>> +  From = std::min(From, Length);
>> +  size_t i = From;
>> +  while (i != 0) {
>> +    --i;
>> +    if (ascii_tolower(Data[i]) == ascii_tolower(C))
>> +      return i;
>> +  }
>> +  return npos;
>> +}
>> +
>>  /// rfind - Search for the last string \arg Str in the string.
>>  ///
>>  /// \return - The index of the last occurrence of \arg Str, or npos if
>> not
>> @@ -196,6 +223,18 @@ size_t StringRef::rfind(StringRef Str) c
>>        return i;
>>    }
>>    return npos;
>> +}
>> +
>> +size_t StringRef::rfind_lower(StringRef Str) const {
>> +  size_t N = Str.size();
>> +  if (N > Length)
>> +    return npos;
>> +  for (size_t i = Length - N + 1, e = 0; i != e;) {
>> +    --i;
>> +    if (substr(i, N).equals_lower(Str))
>> +      return i;
>> +  }
>> +  return npos;
>>  }
>>
>>  /// find_first_of - Find the first character in the string that is in
>> \arg
>>
>> Modified: llvm/trunk/unittests/ADT/StringRefTest.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT
>> /StringRefTest.cpp?rev=286724&r1=286723&r2=286724&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/unittests/ADT/StringRefTest.cpp (original)
>> +++ llvm/trunk/unittests/ADT/StringRefTest.cpp Sat Nov 12 11:17:12 2016
>> @@ -410,21 +410,58 @@ TEST(StringRefTest, ConsumeBack) {
>>  }
>>
>>  TEST(StringRefTest, Find) {
>> -  StringRef Str("hello");
>> -  EXPECT_EQ(2U, Str.find('l'));
>> -  EXPECT_EQ(StringRef::npos, Str.find('z'));
>> -  EXPECT_EQ(StringRef::npos, Str.find("helloworld"));
>> -  EXPECT_EQ(0U, Str.find("hello"));
>> -  EXPECT_EQ(1U, Str.find("ello"));
>> -  EXPECT_EQ(StringRef::npos, Str.find("zz"));
>> -  EXPECT_EQ(2U, Str.find("ll", 2));
>> -  EXPECT_EQ(StringRef::npos, Str.find("ll", 3));
>> -  EXPECT_EQ(0U, Str.find(""));
>> -  StringRef LongStr("hellx xello hell ello world foo bar hello");
>> -  EXPECT_EQ(36U, LongStr.find("hello"));
>> -  EXPECT_EQ(28U, LongStr.find("foo"));
>> -  EXPECT_EQ(12U, LongStr.find("hell", 2));
>> -  EXPECT_EQ(0U, LongStr.find(""));
>> +  StringRef Str("helloHELLO");
>> +  StringRef LongStr("hellx xello hell ello world foo bar hello HELLO");
>> +
>> +  struct {
>> +    StringRef Str;
>> +    char C;
>> +    std::size_t From;
>> +    std::size_t Pos;
>> +    std::size_t LowerPos;
>> +  } CharExpectations[] = {
>> +      {Str, 'h', 0U, 0U, 0U},
>> +      {Str, 'e', 0U, 1U, 1U},
>> +      {Str, 'l', 0U, 2U, 2U},
>> +      {Str, 'l', 3U, 3U, 3U},
>> +      {Str, 'o', 0U, 4U, 4U},
>> +      {Str, 'L', 0U, 7U, 2U},
>> +      {Str, 'z', 0U, StringRef::npos, StringRef::npos},
>> +  };
>> +
>> +  struct {
>> +    StringRef Str;
>> +    llvm::StringRef S;
>> +    std::size_t From;
>> +    std::size_t Pos;
>> +    std::size_t LowerPos;
>> +  } StrExpectations[] = {
>> +      {Str, "helloword", 0, StringRef::npos, StringRef::npos},
>> +      {Str, "hello", 0, 0U, 0U},
>> +      {Str, "ello", 0, 1U, 1U},
>> +      {Str, "zz", 0, StringRef::npos, StringRef::npos},
>> +      {Str, "ll", 2U, 2U, 2U},
>> +      {Str, "ll", 3U, StringRef::npos, 7U},
>> +      {Str, "LL", 2U, 7U, 2U},
>> +      {Str, "LL", 3U, 7U, 7U},
>> +      {Str, "", 0U, 0U, 0U},
>> +      {LongStr, "hello", 0U, 36U, 36U},
>> +      {LongStr, "foo", 0U, 28U, 28U},
>> +      {LongStr, "hell", 2U, 12U, 12U},
>> +      {LongStr, "HELL", 2U, 42U, 12U},
>> +      {LongStr, "", 0U, 0U, 0U}};
>> +
>> +  for (auto &E : CharExpectations) {
>> +    EXPECT_EQ(E.Pos, E.Str.find(E.C, E.From));
>> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.C, E.From));
>> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(toupper(E.C), E.From));
>> +  }
>> +
>> +  for (auto &E : StrExpectations) {
>> +    EXPECT_EQ(E.Pos, E.Str.find(E.S, E.From));
>> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S, E.From));
>> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S.upper(), E.From));
>> +  }
>>
>>    EXPECT_EQ(3U, Str.rfind('l'));
>>    EXPECT_EQ(StringRef::npos, Str.rfind('z'));
>> @@ -433,10 +470,19 @@ TEST(StringRefTest, Find) {
>>    EXPECT_EQ(1U, Str.rfind("ello"));
>>    EXPECT_EQ(StringRef::npos, Str.rfind("zz"));
>>
>> +  EXPECT_EQ(8U, Str.rfind_lower('l'));
>> +  EXPECT_EQ(8U, Str.rfind_lower('L'));
>> +  EXPECT_EQ(StringRef::npos, Str.rfind_lower('z'));
>> +  EXPECT_EQ(StringRef::npos, Str.rfind_lower("HELLOWORLD"));
>> +  EXPECT_EQ(5U, Str.rfind("HELLO"));
>> +  EXPECT_EQ(6U, Str.rfind("ELLO"));
>> +  EXPECT_EQ(StringRef::npos, Str.rfind("ZZ"));
>> +
>>    EXPECT_EQ(2U, Str.find_first_of('l'));
>>    EXPECT_EQ(1U, Str.find_first_of("el"));
>>    EXPECT_EQ(StringRef::npos, Str.find_first_of("xyz"));
>>
>> +  Str = "hello";
>>    EXPECT_EQ(1U, Str.find_first_not_of('h'));
>>    EXPECT_EQ(4U, Str.find_first_not_of("hel"));
>>    EXPECT_EQ(StringRef::npos, Str.find_first_not_of("hello"));
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161113/3ad7e131/attachment.html>


More information about the llvm-commits mailing list