[llvm] r286724 - [Support] Add StringRef::find_lower and contains_lower.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 12 12:20:52 PST 2016


Is there a better one that doesn't require copying the string?
On Sat, Nov 12, 2016 at 12:02 PM Rui Ueyama <ruiu at google.com> wrote:

> On Sat, Nov 12, 2016 at 9:17 AM, Zachary Turner via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
> Author: zturner
> Date: Sat Nov 12 11:17:12 2016
> New Revision: 286724
>
> URL: http://llvm.org/viewvc/llvm-project?rev=286724&view=rev
> Log:
> [Support] Add StringRef::find_lower and contains_lower.
>
> Differential Revision: https://reviews.llvm.org/D25299
>
> Modified:
>     llvm/trunk/include/llvm/ADT/StringRef.h
>     llvm/trunk/lib/Support/StringRef.cpp
>     llvm/trunk/unittests/ADT/StringRefTest.cpp
>
> Modified: llvm/trunk/include/llvm/ADT/StringRef.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringRef.h?rev=286724&r1=286723&r2=286724&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/ADT/StringRef.h (original)
> +++ llvm/trunk/include/llvm/ADT/StringRef.h Sat Nov 12 11:17:12 2016
> @@ -286,6 +286,12 @@ namespace llvm {
>        return npos;
>      }
>
> +    /// Search for the first character \p C in the string, ignoring case.
> +    ///
> +    /// \returns The index of the first occurrence of \p C, or npos if not
> +    /// found.
> +    size_t find_lower(char C, size_t From = 0) const;
> +
>      /// Search for the first character satisfying the predicate \p F
>      ///
>      /// \returns The index of the first character satisfying \p F
> starting from
> @@ -318,6 +324,12 @@ namespace llvm {
>      /// found.
>      size_t find(StringRef Str, size_t From = 0) const;
>
> +    /// Search for the first string \p Str in the string, ignoring case.
> +    ///
> +    /// \returns The index of the first occurrence of \p Str, or npos if
> not
> +    /// found.
> +    size_t find_lower(StringRef Str, size_t From = 0) const;
> +
>      /// Search for the last character \p C in the string.
>      ///
>      /// \returns The index of the last occurrence of \p C, or npos if not
> @@ -333,12 +345,24 @@ namespace llvm {
>        return npos;
>      }
>
> +    /// Search for the last character \p C in the string, ignoring case.
> +    ///
> +    /// \returns The index of the last occurrence of \p C, or npos if not
> +    /// found.
> +    size_t rfind_lower(char C, size_t From = npos) const;
> +
>      /// Search for the last string \p Str in the string.
>      ///
>      /// \returns The index of the last occurrence of \p Str, or npos if
> not
>      /// found.
>      size_t rfind(StringRef Str) const;
>
> +    /// Search for the last string \p Str in the string, ignoring case.
> +    ///
> +    /// \returns The index of the last occurrence of \p Str, or npos if
> not
> +    /// found.
> +    size_t rfind_lower(StringRef Str) const;
> +
>      /// Find the first character in the string that is \p C, or npos if
> not
>      /// found. Same as find.
>      size_t find_first_of(char C, size_t From = 0) const {
> @@ -393,6 +417,18 @@ namespace llvm {
>      LLVM_ATTRIBUTE_ALWAYS_INLINE
>      bool contains(char C) const { return find_first_of(C) != npos; }
>
> +    /// Return true if the given string is a substring of *this, and false
> +    /// otherwise.
> +    LLVM_ATTRIBUTE_ALWAYS_INLINE
> +    bool contains_lower(StringRef Other) const {
> +      return find_lower(Other) != npos;
> +    }
> +
> +    /// Return true if the given character is contained in *this, and
> false
> +    /// otherwise.
> +    LLVM_ATTRIBUTE_ALWAYS_INLINE
> +    bool contains_lower(char C) const { return find_lower(C) != npos; }
> +
>      /// @}
>      /// @name Helpful Algorithms
>      /// @{
>
> Modified: llvm/trunk/lib/Support/StringRef.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringRef.cpp?rev=286724&r1=286723&r2=286724&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Support/StringRef.cpp (original)
> +++ llvm/trunk/lib/Support/StringRef.cpp Sat Nov 12 11:17:12 2016
> @@ -69,6 +69,11 @@ bool StringRef::endswith_lower(StringRef
>        ascii_strncasecmp(end() - Suffix.Length, Suffix.Data,
> Suffix.Length) == 0;
>  }
>
> +size_t StringRef::find_lower(char C, size_t From) const {
> +  char L = ascii_tolower(C);
> +  return find_if([L](char D) { return ascii_tolower(D) == L; }, From);
> +}
> +
>  /// compare_numeric - Compare strings, handle embedded numbers.
>  int StringRef::compare_numeric(StringRef RHS) const {
>    for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
> @@ -182,6 +187,28 @@ size_t StringRef::find(StringRef Str, si
>    return npos;
>  }
>
> +size_t StringRef::find_lower(StringRef Str, size_t From) const {
> +  StringRef This = substr(From);
> +  while (This.size() >= Str.size()) {
> +    if (This.startswith_lower(Str))
> +      return From;
> +    This = This.drop_front();
> +    ++From;
> +  }
> +  return npos;
> +}
>
>
> This is a pretty naive algorithm. Isn't this too slow?
>
>
> +
> +size_t StringRef::rfind_lower(char C, size_t From) const {
> +  From = std::min(From, Length);
> +  size_t i = From;
> +  while (i != 0) {
> +    --i;
> +    if (ascii_tolower(Data[i]) == ascii_tolower(C))
> +      return i;
> +  }
> +  return npos;
> +}
> +
>  /// rfind - Search for the last string \arg Str in the string.
>  ///
>  /// \return - The index of the last occurrence of \arg Str, or npos if not
> @@ -196,6 +223,18 @@ size_t StringRef::rfind(StringRef Str) c
>        return i;
>    }
>    return npos;
> +}
> +
> +size_t StringRef::rfind_lower(StringRef Str) const {
> +  size_t N = Str.size();
> +  if (N > Length)
> +    return npos;
> +  for (size_t i = Length - N + 1, e = 0; i != e;) {
> +    --i;
> +    if (substr(i, N).equals_lower(Str))
> +      return i;
> +  }
> +  return npos;
>  }
>
>  /// find_first_of - Find the first character in the string that is in \arg
>
> Modified: llvm/trunk/unittests/ADT/StringRefTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/StringRefTest.cpp?rev=286724&r1=286723&r2=286724&view=diff
>
> ==============================================================================
> --- llvm/trunk/unittests/ADT/StringRefTest.cpp (original)
> +++ llvm/trunk/unittests/ADT/StringRefTest.cpp Sat Nov 12 11:17:12 2016
> @@ -410,21 +410,58 @@ TEST(StringRefTest, ConsumeBack) {
>  }
>
>  TEST(StringRefTest, Find) {
> -  StringRef Str("hello");
> -  EXPECT_EQ(2U, Str.find('l'));
> -  EXPECT_EQ(StringRef::npos, Str.find('z'));
> -  EXPECT_EQ(StringRef::npos, Str.find("helloworld"));
> -  EXPECT_EQ(0U, Str.find("hello"));
> -  EXPECT_EQ(1U, Str.find("ello"));
> -  EXPECT_EQ(StringRef::npos, Str.find("zz"));
> -  EXPECT_EQ(2U, Str.find("ll", 2));
> -  EXPECT_EQ(StringRef::npos, Str.find("ll", 3));
> -  EXPECT_EQ(0U, Str.find(""));
> -  StringRef LongStr("hellx xello hell ello world foo bar hello");
> -  EXPECT_EQ(36U, LongStr.find("hello"));
> -  EXPECT_EQ(28U, LongStr.find("foo"));
> -  EXPECT_EQ(12U, LongStr.find("hell", 2));
> -  EXPECT_EQ(0U, LongStr.find(""));
> +  StringRef Str("helloHELLO");
> +  StringRef LongStr("hellx xello hell ello world foo bar hello HELLO");
> +
> +  struct {
> +    StringRef Str;
> +    char C;
> +    std::size_t From;
> +    std::size_t Pos;
> +    std::size_t LowerPos;
> +  } CharExpectations[] = {
> +      {Str, 'h', 0U, 0U, 0U},
> +      {Str, 'e', 0U, 1U, 1U},
> +      {Str, 'l', 0U, 2U, 2U},
> +      {Str, 'l', 3U, 3U, 3U},
> +      {Str, 'o', 0U, 4U, 4U},
> +      {Str, 'L', 0U, 7U, 2U},
> +      {Str, 'z', 0U, StringRef::npos, StringRef::npos},
> +  };
> +
> +  struct {
> +    StringRef Str;
> +    llvm::StringRef S;
> +    std::size_t From;
> +    std::size_t Pos;
> +    std::size_t LowerPos;
> +  } StrExpectations[] = {
> +      {Str, "helloword", 0, StringRef::npos, StringRef::npos},
> +      {Str, "hello", 0, 0U, 0U},
> +      {Str, "ello", 0, 1U, 1U},
> +      {Str, "zz", 0, StringRef::npos, StringRef::npos},
> +      {Str, "ll", 2U, 2U, 2U},
> +      {Str, "ll", 3U, StringRef::npos, 7U},
> +      {Str, "LL", 2U, 7U, 2U},
> +      {Str, "LL", 3U, 7U, 7U},
> +      {Str, "", 0U, 0U, 0U},
> +      {LongStr, "hello", 0U, 36U, 36U},
> +      {LongStr, "foo", 0U, 28U, 28U},
> +      {LongStr, "hell", 2U, 12U, 12U},
> +      {LongStr, "HELL", 2U, 42U, 12U},
> +      {LongStr, "", 0U, 0U, 0U}};
> +
> +  for (auto &E : CharExpectations) {
> +    EXPECT_EQ(E.Pos, E.Str.find(E.C, E.From));
> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.C, E.From));
> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(toupper(E.C), E.From));
> +  }
> +
> +  for (auto &E : StrExpectations) {
> +    EXPECT_EQ(E.Pos, E.Str.find(E.S, E.From));
> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S, E.From));
> +    EXPECT_EQ(E.LowerPos, E.Str.find_lower(E.S.upper(), E.From));
> +  }
>
>    EXPECT_EQ(3U, Str.rfind('l'));
>    EXPECT_EQ(StringRef::npos, Str.rfind('z'));
> @@ -433,10 +470,19 @@ TEST(StringRefTest, Find) {
>    EXPECT_EQ(1U, Str.rfind("ello"));
>    EXPECT_EQ(StringRef::npos, Str.rfind("zz"));
>
> +  EXPECT_EQ(8U, Str.rfind_lower('l'));
> +  EXPECT_EQ(8U, Str.rfind_lower('L'));
> +  EXPECT_EQ(StringRef::npos, Str.rfind_lower('z'));
> +  EXPECT_EQ(StringRef::npos, Str.rfind_lower("HELLOWORLD"));
> +  EXPECT_EQ(5U, Str.rfind("HELLO"));
> +  EXPECT_EQ(6U, Str.rfind("ELLO"));
> +  EXPECT_EQ(StringRef::npos, Str.rfind("ZZ"));
> +
>    EXPECT_EQ(2U, Str.find_first_of('l'));
>    EXPECT_EQ(1U, Str.find_first_of("el"));
>    EXPECT_EQ(StringRef::npos, Str.find_first_of("xyz"));
>
> +  Str = "hello";
>    EXPECT_EQ(1U, Str.find_first_not_of('h'));
>    EXPECT_EQ(4U, Str.find_first_not_of("hel"));
>    EXPECT_EQ(StringRef::npos, Str.find_first_not_of("hello"));
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161112/6f3972d7/attachment.html>


More information about the llvm-commits mailing list