[libcxx-commits] [libcxx] 6ba2d7b - [libc++] Fixes backreferences for extended grammar.

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Wed Feb 19 12:59:35 PST 2020


Author: Louis Dionne
Date: 2020-02-19T15:57:16-05:00
New Revision: 6ba2d7b166c2e07dfc8328a8253276710619b1fe

URL: https://github.com/llvm/llvm-project/commit/6ba2d7b166c2e07dfc8328a8253276710619b1fe
DIFF: https://github.com/llvm/llvm-project/commit/6ba2d7b166c2e07dfc8328a8253276710619b1fe.diff

LOG: [libc++] Fixes backreferences for extended grammar.

The regex backreferences were not properly parsed and used when using
the extended grammar. This change parses them. The issue was found while
working on PR34297.

Thanks to Mark de Wever for the patch!

Differential Revision: https://reviews.llvm.org/D62451

Added: 
    

Modified: 
    libcxx/include/regex
    libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp
    libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/regex b/libcxx/include/regex
index 5ac9e325e136..e349fa6846ae 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -2837,6 +2837,8 @@ private:
         __parse_awk_escape(_ForwardIterator __first, _ForwardIterator __last,
                           basic_string<_CharT>* __str = nullptr);
 
+    bool __test_back_ref(_CharT c);
+
     _LIBCPP_INLINE_VISIBILITY
     void __push_l_anchor();
     void __push_r_anchor();
@@ -3408,18 +3410,8 @@ basic_regex<_CharT, _Traits>::__parse_BACKREF(_ForwardIterator __first,
     if (__first != __last)
     {
         _ForwardIterator __temp = _VSTD::next(__first);
-        if (__temp != __last)
-        {
-            if (*__first == '\\')
-            {
-                int __val = __traits_.value(*__temp, 10);
-                if (__val >= 1 && __val <= 9)
-                {
-                    __push_back_ref(__val);
-                    __first = ++__temp;
-                }
-            }
-        }
+        if (__temp != __last && *__first == '\\' && __test_back_ref(*__temp))
+            __first = ++__temp;
     }
     return __first;
 }
@@ -3547,6 +3539,8 @@ basic_regex<_CharT, _Traits>::__parse_QUOTED_CHAR_ERE(_ForwardIterator __first,
                 default:
                     if (__get_grammar(__flags_) == awk)
                         __first = __parse_awk_escape(++__first, __last);
+                    else if(__test_back_ref(*__temp))
+                        __first = ++__temp;
                     break;
                 }
             }
@@ -4660,6 +4654,20 @@ basic_regex<_CharT, _Traits>::__parse_egrep(_ForwardIterator __first,
     return __first;
 }
 
+template <class _CharT, class _Traits>
+bool
+basic_regex<_CharT, _Traits>::__test_back_ref(_CharT c)
+{
+    unsigned __val = __traits_.value(c, 10);
+    if (__val >= 1 && __val <= 9)
+    {
+        __push_back_ref(__val);
+        return true;
+    }
+
+    return false;
+}
+
 template <class _CharT, class _Traits>
 void
 basic_regex<_CharT, _Traits>::__push_loop(size_t __min, size_t __max,

diff  --git a/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp
index 9415505faaf8..28e5ea8560b7 100644
--- a/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp
@@ -429,6 +429,24 @@ int main(int, char**)
         assert(!std::regex_match(s, m, std::regex("-(.*),\1-", std::regex_constants::extended)));
         assert(m.size() == 0);
     }
+    {
+        std::cmatch m;
+        const char s[] = "-ab,ab-";
+        assert(std::regex_match(s, m, std::regex("-(.*),\\1-", std::regex_constants::extended)));
+        assert(m.size() == 2);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == m[0].second);
+        assert(m.length(0) >= 0 && static_cast<size_t>(m.length(0)) == std::char_traits<char>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+        assert(m.length(1) == 2);
+        assert(m.position(1) == 1);
+        assert(m.str(1) == "ab");
+    }
     {
         std::cmatch m;
         const char s[] = "-ab,ab-";
@@ -1095,6 +1113,24 @@ int main(int, char**)
         assert(!std::regex_match(s, m, std::wregex(L"-(.*),\1-", std::regex_constants::extended)));
         assert(m.size() == 0);
     }
+    {
+        std::wcmatch m;
+        const wchar_t s[] = L"-ab,ab-";
+        assert(std::regex_match(s, m, std::wregex(L"-(.*),\\1-", std::regex_constants::extended)));
+        assert(m.size() == 2);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == m[0].second);
+        assert(m.length(0) >= 0 && static_cast<size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+        assert(m.length(1) == 2);
+        assert(m.position(1) == 1);
+        assert(m.str(1) == L"ab");
+    }
     {
         std::wcmatch m;
         const wchar_t s[] = L"-ab,ab-";

diff  --git a/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp
index 62e4822dbd17..0bc83fc747d3 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp
@@ -492,6 +492,24 @@ int main(int, char**)
         assert(!std::regex_search(s, m, std::regex("-(.*),\1-", std::regex_constants::extended)));
         assert(m.size() == 0);
     }
+    {
+        std::cmatch m;
+        const char s[] = "-ab,ab-";
+        assert(std::regex_search(s, m, std::regex("-(.*),\\1-", std::regex_constants::extended)));
+        assert(m.size() == 2);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == m[0].second);
+        assert(m.length(0) >= 0 && static_cast<size_t>(m.length(0)) == std::char_traits<char>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+        assert(m.length(1) == 2);
+        assert(m.position(1) == 1);
+        assert(m.str(1) == "ab");
+    }
     {
         std::cmatch m;
         const char s[] = "-ab,ab-";
@@ -1248,6 +1266,24 @@ int main(int, char**)
         assert(!std::regex_search(s, m, std::wregex(L"-(.*),\1-", std::regex_constants::extended)));
         assert(m.size() == 0);
     }
+    {
+        std::wcmatch m;
+        const wchar_t s[] = L"-ab,ab-";
+        assert(std::regex_search(s, m, std::wregex(L"-(.*),\\1-", std::regex_constants::extended)));
+        assert(m.size() == 2);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == m[0].second);
+        assert(m.length(0) >= 0 && static_cast<size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+        assert(m.length(1) == 2);
+        assert(m.position(1) == 1);
+        assert(m.str(1) == L"ab");
+    }
     {
         std::wcmatch m;
         const wchar_t s[] = L"-ab,ab-";


        


More information about the libcxx-commits mailing list