[cfe-commits] [libcxx] r107889 - in /libcxx/trunk: include/regex test/re/re.alg/re.alg.search/basic.pass.cpp
Howard Hinnant
hhinnant at apple.com
Thu Jul 8 10:43:58 PDT 2010
Author: hhinnant
Date: Thu Jul 8 12:43:58 2010
New Revision: 107889
URL: http://llvm.org/viewvc/llvm-project?rev=107889&view=rev
Log:
Marked subexpressions in a loop in basic posix working (only lightly tested so far)
Modified:
libcxx/trunk/include/regex
libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp
Modified: libcxx/trunk/include/regex
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/regex?rev=107889&r1=107888&r2=107889&view=diff
==============================================================================
--- libcxx/trunk/include/regex (original)
+++ libcxx/trunk/include/regex Thu Jul 8 12:43:58 2010
@@ -717,6 +717,9 @@
} // std
*/
+#include <sstream>
+#include <cassert>
+
#include <__config>
#include <stdexcept>
#include <__locale>
@@ -1224,10 +1227,9 @@
{
__end_state = -1000,
__consume_input, // -999
-// __try_state, // -998
__begin_marked_expr, // -998
__end_marked_expr, // -997
- __go_back, // -996
+ __pop_state, // -996
__accept_and_consume, // -995
__accept_but_not_consume, // -994
__reject, // -993
@@ -1239,7 +1241,6 @@
typedef __state<_CharT> __state;
int __do_;
- int __data_;
const __state* first;
const __state* second;
@@ -1252,6 +1253,18 @@
: __do_(0), first(__s1), second(__s2) {}
};
+template <class _CharT>
+ostream&
+operator<<(ostream& os, const __command<_CharT>& c)
+{
+ os << c.__do_;
+ if (c.first)
+ os << ", " << c.first->speak();
+ if (c.second)
+ os << ", " << c.second->speak();
+ return os;
+}
+
template <class _BidirectionalIterator> class sub_match;
// __state
@@ -1272,6 +1285,8 @@
vector<size_t>& __lc,
sub_match<const _CharT*>* __m,
regex_constants::match_flag_type __flags) const = 0;
+
+ virtual string speak() const = 0;
};
// __end_state
@@ -1290,6 +1305,8 @@
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const {return "end state";}
};
template <class _CharT>
@@ -1359,6 +1376,8 @@
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const {return "empty state";}
};
template <class _CharT>
@@ -1390,6 +1409,8 @@
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const {return "empty non-owning state";}
};
template <class _CharT>
@@ -1457,6 +1478,16 @@
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type __flags) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "loop {" << __min_ << ',' << __max_ << "}";
+ if (!__greedy_)
+ os << " not";
+ os << " greedy";
+ return os.str();
+ }
};
template <class _CharT>
@@ -1503,6 +1534,13 @@
vector<size_t>& __lc,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "zero loop " << __loop_id_;
+ return os.str();
+ }
};
template <class _CharT>
@@ -1537,6 +1575,13 @@
vector<size_t>& __lc,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "increment loop " << __loop_id_;
+ return os.str();
+ }
};
template <class _CharT>
@@ -1572,6 +1617,13 @@
vector<size_t>&,
sub_match<const _CharT*>* __sm,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "zero marked exprs [" << __begin_ << ',' << __end_ << ')';
+ return os.str();
+ }
};
template <class _CharT>
@@ -1599,29 +1651,36 @@
{
typedef __owns_one_state<_CharT> base;
- __begin_marked_subexpression(const __begin_marked_subexpression&);
- __begin_marked_subexpression& operator=(const __begin_marked_subexpression&);
+ unsigned __mexp_;
public:
typedef __command<_CharT> __command;
- explicit __begin_marked_subexpression(__state<_CharT>* __s)
- : base(__s) {}
+ explicit __begin_marked_subexpression(unsigned __mexp, __state<_CharT>* __s)
+ : base(__s), __mexp_(__mexp) {}
virtual __command __test(const _CharT*, const _CharT*,
const _CharT*,
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "begin marked expr " << __mexp_;
+ return os.str();
+ }
};
template <class _CharT>
__command<_CharT>
__begin_marked_subexpression<_CharT>::__test(const _CharT*, const _CharT* __c, const _CharT*,
vector<size_t>&,
- sub_match<const _CharT*>*,
+ sub_match<const _CharT*>* __s,
regex_constants::match_flag_type) const
{
- return __command(__command::__begin_marked_expr, this->first());
+ __s[__mexp_].first = __c;
+ return __command(__command::__accept_but_not_consume, this->first());
}
// __end_marked_subexpression
@@ -1632,29 +1691,37 @@
{
typedef __owns_one_state<_CharT> base;
- __end_marked_subexpression(const __end_marked_subexpression&);
- __end_marked_subexpression& operator=(const __end_marked_subexpression&);
+ unsigned __mexp_;
public:
typedef __command<_CharT> __command;
- explicit __end_marked_subexpression(__state<_CharT>* __s)
- : base(__s) {}
+ explicit __end_marked_subexpression(unsigned __mexp, __state<_CharT>* __s)
+ : base(__s), __mexp_(__mexp) {}
virtual __command __test(const _CharT*, const _CharT*,
const _CharT*,
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "end marked expr " << __mexp_;
+ return os.str();
+ }
};
template <class _CharT>
__command<_CharT>
__end_marked_subexpression<_CharT>::__test(const _CharT*, const _CharT* __c, const _CharT*,
vector<size_t>&,
- sub_match<const _CharT*>*,
+ sub_match<const _CharT*>* __s,
regex_constants::match_flag_type) const
{
- return __command(__command::__end_marked_expr, this->first());
+ __s[__mexp_].second = __c;
+ __s[__mexp_].matched = true;
+ return __command(__command::__accept_but_not_consume, this->first());
}
// __state_arg
@@ -1680,6 +1747,13 @@
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "state arg " << __arg_;
+ return os.str();
+ }
};
template <class _CharT>
@@ -1715,6 +1789,13 @@
vector<size_t>&,
sub_match<const _CharT*>*,
regex_constants::match_flag_type) const;
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "match char " << __c_;
+ return os.str();
+ }
};
template <class _CharT>
@@ -1876,7 +1957,8 @@
template <class _ForwardIterator>
_ForwardIterator
__parse_RE_dupl_symbol(_ForwardIterator __first, _ForwardIterator __last,
- __owns_one_state<_CharT>* __s);
+ __owns_one_state<_CharT>* __s,
+ unsigned __mexp_begin, unsigned __mexp_end);
template <class _ForwardIterator>
_ForwardIterator
__parse_ERE_dupl_symbol(_ForwardIterator __first, _ForwardIterator __last);
@@ -1923,8 +2005,10 @@
void __push_l_anchor() {}
void __push_r_anchor() {}
void __push_match_any() {}
- void __push_greedy_inf_repeat(size_t __min, __owns_one_state<_CharT>* __s)
- {__push_loop(__min, numeric_limits<size_t>::max(), __s);}
+ void __push_greedy_inf_repeat(size_t __min, __owns_one_state<_CharT>* __s,
+ unsigned __mexp_begin = 0, unsigned __mexp_end = 0)
+ {__push_loop(__min, numeric_limits<size_t>::max(), __s,
+ __mexp_begin, __mexp_end);}
void __push_exact_repeat(int __count) {}
void __push_loop(size_t __min, size_t __max, __owns_one_state<_CharT>* __s,
size_t __mexp_begin = 0, size_t __mexp_end = 0,
@@ -1969,6 +2053,7 @@
bool
__match_at_start_posix_subs(_BidirectionalIterator __first, _BidirectionalIterator __last,
match_results<_BidirectionalIterator, _Allocator>& __m,
+ vector<size_t>& __lc,
regex_constants::match_flag_type __flags) const;
template <class _B, class _A, class _C, class _T>
@@ -2151,9 +2236,11 @@
if (__first != __last)
{
__owns_one_state<_CharT>* __e = __end_;
+ unsigned __mexp_begin = __marked_count_;
_ForwardIterator __temp = __parse_nondupl_RE(__first, __last);
if (__temp != __first)
- __first = __parse_RE_dupl_symbol(__temp, __last, __e);
+ __first = __parse_RE_dupl_symbol(__temp, __last, __e,
+ __mexp_begin+1, __marked_count_+1);
}
return __first;
}
@@ -2462,13 +2549,15 @@
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_RE_dupl_symbol(_ForwardIterator __first,
_ForwardIterator __last,
- __owns_one_state<_CharT>* __s)
+ __owns_one_state<_CharT>* __s,
+ unsigned __mexp_begin,
+ unsigned __mexp_end)
{
if (__first != __last)
{
if (*__first == '*')
{
- __push_greedy_inf_repeat(0, __s);
+ __push_greedy_inf_repeat(0, __s, __mexp_begin, __mexp_end);
++__first;
}
else
@@ -2501,7 +2590,7 @@
if (__temp == __first)
throw regex_error(regex_constants::error_brace);
if (__max == -1)
- __push_greedy_inf_repeat(__min, __s);
+ __push_greedy_inf_repeat(__min, __s, __mexp_end, __mexp_end);
else
{
if (__max < __min)
@@ -2834,37 +2923,26 @@
void
basic_regex<_CharT, _Traits>::__push_char(value_type __c)
{
- __match_char<_CharT>* __s = new __match_char<_CharT>(__c, __end_->first());
- __end_->first() = __s;
- __end_ = __s;
+ __end_->first() = new __match_char<_CharT>(__c, __end_->first());
+ __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_begin_marked_subexpression()
{
- __begin_marked_subexpression<_CharT>* __s =
- new __begin_marked_subexpression<_CharT>(__end_->first());
- __end_->first() = __s;
- __end_ = __s;
- __state_arg<_CharT>* __a = new __state_arg<_CharT>(++__marked_count_,
+ __end_->first() = new __begin_marked_subexpression<_CharT>(++__marked_count_,
__end_->first());
- __end_->first() = __a;
- __end_ = __a;
+ __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_end_marked_subexpression(unsigned __sub)
{
- __end_marked_subexpression<_CharT>* __s =
- new __end_marked_subexpression<_CharT>(__end_->first());
- __end_->first() = __s;
- __end_ = __s;
- __state_arg<_CharT>* __a = new __state_arg<_CharT>(++__marked_count_,
- __end_->first());
- __end_->first() = __a;
- __end_ = __a;
+ __end_->first() = new __end_marked_subexpression<_CharT>(__sub,
+ __end_->first());
+ __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
typedef basic_regex<char> regex;
@@ -3402,9 +3480,7 @@
__prefix_.first = __f;
__prefix_.second = __f;
__prefix_.matched = false;
- __suffix_.first = __l;
- __suffix_.second = __l;
- __suffix_.matched = false;
+ __suffix_ = __unmatched_;
}
typedef match_results<const char*> cmatch;
@@ -3449,16 +3525,6 @@
vector<size_t>& __lc,
regex_constants::match_flag_type __flags) const
{
-/*
- How do you set __m.__matches[i].first and second?
- With const _CharT* [__first, __last), we need a reference
- _BidirectionalIterator to bounce off of. Something like:
- __m.__matches_[0].second = next(__m.__matches_[0].first, __current - __first_);
-
- Pre: __m.__matches_[0].first <-> __first ? or
- __m.__prefix_.first <-> first and
- __m.__suffix_.second <-> last ?
-*/
typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type;
__split_buffer<__command> __commands;
difference_type __j = 0;
@@ -3491,8 +3557,6 @@
break;
case __command::__accept_and_consume:
__commands.push_front(__command(__cmd.first));
- if (__cmd.second != nullptr)
- __commands.push_front(__command(__cmd.second));
break;
case __command::__accept_but_not_consume:
__commands.push_back(__command(__cmd.first));
@@ -3523,8 +3587,90 @@
basic_regex<_CharT, _Traits>::__match_at_start_posix_subs(
_BidirectionalIterator __first, _BidirectionalIterator __last,
match_results<_BidirectionalIterator, _Allocator>& __m,
+ vector<size_t>& __lc,
regex_constants::match_flag_type __flags) const
{
+ typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type;
+ vector<__command> __commands;
+ vector<_BidirectionalIterator> __current_stack;
+ vector<sub_match<_BidirectionalIterator> > __saved_matches;
+ vector<sub_match<_BidirectionalIterator> > __best_matches;
+ difference_type __j = 0;
+ difference_type __highest_j = 0;
+ difference_type _N = _STD::distance(__first, __last);
+ __state* __st = __start_.get();
+ if (__st)
+ {
+ __commands.push_back(__command(__st));
+ _BidirectionalIterator __current = __first;
+ do
+ {
+ __command __cmd = __commands.back();
+ __commands.pop_back();
+ if (__cmd.first != nullptr)
+ __cmd = __cmd.first->__test(__first, __current, __last, __lc,
+ __m.__matches_.data(), __flags);
+ switch (__cmd.__do_)
+ {
+ case __command::__end_state:
+ if (__highest_j < __j)
+ {
+ __highest_j = __j;
+ for (unsigned __i = 1; __i < __m.__matches_.size(); ++__i)
+ __best_matches.push_back(__m.__matches_[__i]);
+ }
+ break;
+ case __command::__pop_state:
+ for (unsigned __i = __m.__matches_.size(); __i > 1;)
+ {
+ assert(!__saved_matches.empty());
+ __m.__matches_[--__i] = __saved_matches.back();
+ __saved_matches.pop_back();
+ }
+ assert(!__current_stack.empty());
+ __current = __current_stack.back();
+ __current_stack.pop_back();
+ break;
+ case __command::__accept_and_consume:
+ __commands.push_back(__command(__cmd.first));
+ if (__current != __last)
+ {
+ ++__current;
+ ++__j;
+ }
+ break;
+ case __command::__accept_but_not_consume:
+ if (__cmd.second != nullptr)
+ {
+ __commands.push_back(__command(__cmd.second));
+ __commands.push_back(__command(__command::__pop_state));
+ __current_stack.push_back(__current);
+ for (unsigned __i = 1; __i < __m.__matches_.size(); ++__i)
+ __saved_matches.push_back(__m.__matches_[__i]);
+ }
+ __commands.push_back(__command(__cmd.first));
+ break;
+ case __command::__reject:
+ break;
+ default:
+ throw regex_error(regex_constants::error_temp);
+ break;
+ }
+ } while (!__commands.empty());
+ if (__highest_j != 0)
+ {
+ __m.__matches_[0].first = __first;
+ __m.__matches_[0].second = _STD::next(__first, __highest_j);
+ __m.__matches_[0].matched = true;
+ for (unsigned __i = __m.__matches_.size(); __i > 1;)
+ {
+ assert(!__best_matches.empty());
+ __m.__matches_[--__i] = __best_matches.back();
+ __best_matches.pop_back();
+ }
+ return true;
+ }
+ }
return false;
}
@@ -3541,7 +3687,7 @@
return __match_at_start_ecma(__first, __last, __m, __flags);
if (mark_count() == 0)
return __match_at_start_posix_nosubs(__first, __last, __m, __lc, __flags);
- return __match_at_start_posix_subs(__first, __last, __m, __flags);
+ return __match_at_start_posix_subs(__first, __last, __m, __lc, __flags);
}
template <class _CharT, class _Traits>
Modified: libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp?rev=107889&r1=107888&r2=107889&view=diff
==============================================================================
--- libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp (original)
+++ libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp Thu Jul 8 12:43:58 2010
@@ -117,26 +117,44 @@
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
-// {
-// std::cmatch m;
-// const char s[] = "abcdefghijk";
-// assert(std::regex_search(s, m, std::regex("cd\\(\\(e\\)fg\\)hi",
-// std::regex_constants::basic)));
-// assert(m.size() == 3);
-// assert(m.prefix().matched);
-// assert(m.prefix().first == s);
-// assert(m.prefix().second == m[0].first);
-// assert(m.suffix().matched);
-// assert(m.suffix().first == m[0].second);
-// assert(m.suffix().second == s+std::regex_traits<char>::length(s));
-// assert(m.length(0) == 7);
-// assert(m.position(0) == 2);
-// assert(m.str(0) == "cdefghi");
-// assert(m.length(1) == 3);
-// assert(m.position(1) == 4);
-// assert(m.str(1) == "efg");
-// assert(m.length(2) == 1);
-// assert(m.position(2) == 4);
-// assert(m.str(2) == "e");
-// }
+ {
+ std::cmatch m;
+ const char s[] = "ababc";
+ assert(std::regex_search(s, m, std::regex("\\(ab\\)*c", std::regex_constants::basic)));
+ assert(m.size() == 2);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s+5);
+ assert(m.length(0) == 5);
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ assert(m.length(1) == 2);
+ assert(m.position(1) == 2);
+ assert(m.str(1) == "ab");
+ }
+ {
+ std::cmatch m;
+ const char s[] = "abcdefghijk";
+ assert(std::regex_search(s, m, std::regex("cd\\(\\(e\\)fg\\)hi",
+ std::regex_constants::basic)));
+ assert(m.size() == 3);
+ assert(m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s+std::regex_traits<char>::length(s));
+ assert(m.length(0) == 7);
+ assert(m.position(0) == 2);
+ assert(m.str(0) == "cdefghi");
+ assert(m.length(1) == 3);
+ assert(m.position(1) == 4);
+ assert(m.str(1) == "efg");
+ assert(m.length(2) == 1);
+ assert(m.position(2) == 4);
+ assert(m.str(2) == "e");
+ }
}
More information about the cfe-commits
mailing list