[cfe-commits] [libcxx] r108280 - in /libcxx/trunk: include/regex test/re/re.alg/re.alg.search/basic.pass.cpp
Howard Hinnant
hhinnant at apple.com
Tue Jul 13 14:48:07 PDT 2010
Author: hhinnant
Date: Tue Jul 13 16:48:06 2010
New Revision: 108280
URL: http://llvm.org/viewvc/llvm-project?rev=108280&view=rev
Log:
Bracket expressions are working (lightly tested).
Modified:
libcxx/trunk/include/regex
libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp
Modified: libcxx/trunk/include/regex
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/regex?rev=108280&r1=108279&r2=108280&view=diff
==============================================================================
--- libcxx/trunk/include/regex (original)
+++ libcxx/trunk/include/regex Tue Jul 13 16:48:06 2010
@@ -2022,6 +2022,235 @@
}
}
+// __bracket_expression
+
+template <class _CharT, class _Traits>
+class __bracket_expression
+ : public __owns_one_state<_CharT>
+{
+ typedef __owns_one_state<_CharT> base;
+ typedef typename _Traits::string_type string_type;
+
+ _Traits __traits_;
+ vector<_CharT> __chars_;
+ vector<pair<string_type, string_type> > __ranges_;
+ vector<pair<_CharT, _CharT> > __digraphs_;
+ vector<string_type> __equivalences_;
+ ctype_base::mask __mask_;
+ bool __negate_;
+ bool __icase_;
+ bool __collate_;
+
+ __bracket_expression(const __bracket_expression&);
+ __bracket_expression& operator=(const __bracket_expression&);
+public:
+ typedef _STD::__state<_CharT> __state;
+
+ __bracket_expression(const _Traits& __traits, __node<_CharT>* __s,
+ bool __negate, bool __icase, bool __collate)
+ : base(__s), __traits_(__traits), __mask_(), __negate_(__negate),
+ __icase_(__icase), __collate_(__collate) {}
+
+ virtual void __exec(__state&) const;
+
+ void __add_char(_CharT __c)
+ {
+ if (__icase_)
+ __chars_.push_back(__traits_.translate_nocase(__c));
+ else if (__collate_)
+ __chars_.push_back(__traits_.translate(__c));
+ else
+ __chars_.push_back(__c);
+ }
+ void __add_range(string_type __b, string_type __e)
+ {
+ if (__collate_)
+ {
+ if (__icase_)
+ {
+ for (size_t __i = 0; __i < __b.size(); ++__i)
+ __b[__i] = __traits_.translate_nocase(__b[__i]);
+ for (size_t __i = 0; __i < __e.size(); ++__i)
+ __e[__i] = __traits_.translate_nocase(__e[__i]);
+ }
+ else
+ {
+ for (size_t __i = 0; __i < __b.size(); ++__i)
+ __b[__i] = __traits_.translate(__b[__i]);
+ for (size_t __i = 0; __i < __e.size(); ++__i)
+ __e[__i] = __traits_.translate(__e[__i]);
+ }
+ __ranges_.push_back(make_pair(
+ __traits_.transform(__b.begin(), __b.end()),
+ __traits_.transform(__e.begin(), __e.end())));
+ }
+ else
+ {
+ if (__b.size() != 1 || __e.size() != 1)
+ throw regex_error(regex_constants::error_collate);
+ if (__icase_)
+ {
+ __b[0] = __traits_.translate_nocase(__b[0]);
+ __e[0] = __traits_.translate_nocase(__e[0]);
+ }
+ __ranges_.push_back(make_pair(_STD::move(__b), _STD::move(__e)));
+ }
+ }
+ void __add_digraph(_CharT __c1, _CharT __c2)
+ {
+ if (__icase_)
+ __digraphs_.push_back(make_pair(__traits_.translate_nocase(__c1),
+ __traits_.translate_nocase(__c2)));
+ else if (__collate_)
+ __digraphs_.push_back(make_pair(__traits_.translate(__c1),
+ __traits_.translate(__c2)));
+ else
+ __digraphs_.push_back(make_pair(__c1, __c2));
+ }
+ void __add_equivalence(const string_type& __s)
+ {__equivalences_.push_back(__s);}
+ void __add_class(ctype_base::mask __mask)
+ {__mask_ |= __mask;}
+
+ virtual string speak() const
+ {
+ ostringstream os;
+ os << "__bracket_expression ";
+ return os.str();
+ }
+};
+
+template <class _CharT, class _Traits>
+void
+__bracket_expression<_CharT, _Traits>::__exec(__state& __s) const
+{
+ bool __found = false;
+ unsigned __consumed = 0;
+ if (__s.__current_ != __s.__last_)
+ {
+ ++__consumed;
+ const _CharT* __next = next(__s.__current_);
+ if (__next != __s.__last_)
+ {
+ pair<_CharT, _CharT> __ch2(*__s.__current_, *__next);
+ if (__icase_)
+ {
+ __ch2.first = __traits_.translate_nocase(__ch2.first);
+ __ch2.second = __traits_.translate_nocase(__ch2.second);
+ }
+ else if (__collate_)
+ {
+ __ch2.first = __traits_.translate(__ch2.first);
+ __ch2.second = __traits_.translate(__ch2.second);
+ }
+ if (!__traits_.lookup_collatename(&__ch2.first, &__ch2.first+2).empty())
+ {
+ // __ch2 is a digraph in this locale
+ ++__consumed;
+ for (size_t __i = 0; __i < __digraphs_.size(); ++__i)
+ {
+ if (__ch2 == __digraphs_[__i])
+ {
+ __found = true;
+ goto __exit;
+ }
+ }
+ if (__collate_ && !__ranges_.empty())
+ {
+ string_type __s2 = __traits_.transform(&__ch2.first,
+ &__ch2.first + 2);
+ for (size_t __i = 0; __i < __ranges_.size(); ++__i)
+ {
+ if (__ranges_[__i].first <= __s2 &&
+ __s2 <= __ranges_[__i].second)
+ {
+ __found = true;
+ goto __exit;
+ }
+ }
+ }
+ if (!__equivalences_.empty())
+ {
+ string_type __s2 = __traits_.transform_primary(&__ch2.first,
+ &__ch2.first + 2);
+ for (size_t __i = 0; __i < __equivalences_.size(); ++__i)
+ {
+ if (__s2 == __equivalences_[__i])
+ {
+ __found = true;
+ goto __exit;
+ }
+ }
+ }
+ if (__traits_.isctype(__ch2.first, __mask_) &&
+ __traits_.isctype(__ch2.second, __mask_))
+ {
+ __found = true;
+ goto __exit;
+ }
+ goto __exit;
+ }
+ }
+ // test *__s.__current_ as not a digraph
+ _CharT __ch = *__s.__current_;
+ if (__icase_)
+ __ch = __traits_.translate_nocase(__ch);
+ else if (__collate_)
+ __ch = __traits_.translate(__ch);
+ for (size_t __i = 0; __i < __chars_.size(); ++__i)
+ {
+ if (__ch == __chars_[__i])
+ {
+ __found = true;
+ goto __exit;
+ }
+ }
+ if (!__ranges_.empty())
+ {
+ string_type __s2 = __collate_ ?
+ __traits_.transform(&__ch, &__ch + 1) :
+ string_type(1, __ch);
+ for (size_t __i = 0; __i < __ranges_.size(); ++__i)
+ {
+ if (__ranges_[__i].first <= __s2 && __s2 <= __ranges_[__i].second)
+ {
+ __found = true;
+ goto __exit;
+ }
+ }
+ }
+ if (!__equivalences_.empty())
+ {
+ string_type __s2 = __traits_.transform_primary(&__ch, &__ch + 1);
+ for (size_t __i = 0; __i < __equivalences_.size(); ++__i)
+ {
+ if (__s2 == __equivalences_[__i])
+ {
+ __found = true;
+ goto __exit;
+ }
+ }
+ }
+ if (__traits_.isctype(__ch, __mask_))
+ __found = true;
+ }
+ else
+ __found = __negate_; // force reject
+__exit:
+ if (__found != __negate_)
+ {
+ _CharT __ch = *__s.__current_;
+ __s.__do_ = __state::__accept_and_consume;
+ __s.__current_ += __consumed;
+ __s.__node_ = this->first();
+ }
+ else
+ {
+ __s.__do_ = __state::__reject;
+ __s.__node_ = nullptr;
+ }
+}
+
template <class, class> class match_results;
template <class _CharT, class _Traits = regex_traits<_CharT> >
@@ -2186,19 +2415,24 @@
__parse_bracket_expression(_ForwardIterator __first, _ForwardIterator __last);
template <class _ForwardIterator>
_ForwardIterator
- __parse_follow_list(_ForwardIterator __first, _ForwardIterator __last);
+ __parse_follow_list(_ForwardIterator __first, _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml);
template <class _ForwardIterator>
_ForwardIterator
- __parse_expression_term(_ForwardIterator __first, _ForwardIterator __last);
+ __parse_expression_term(_ForwardIterator __first, _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml);
template <class _ForwardIterator>
_ForwardIterator
- __parse_equivalence_class(_ForwardIterator __first, _ForwardIterator __last);
+ __parse_equivalence_class(_ForwardIterator __first, _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml);
template <class _ForwardIterator>
_ForwardIterator
- __parse_character_class(_ForwardIterator __first, _ForwardIterator __last);
+ __parse_character_class(_ForwardIterator __first, _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml);
template <class _ForwardIterator>
_ForwardIterator
- __parse_collating_symbol(_ForwardIterator __first, _ForwardIterator __last);
+ __parse_collating_symbol(_ForwardIterator __first, _ForwardIterator __last,
+ basic_string<_CharT>& __col_sym);
template <class _ForwardIterator>
_ForwardIterator
__parse_DUP_COUNT(_ForwardIterator __first, _ForwardIterator __last, int& __c);
@@ -2232,14 +2466,8 @@
void __push_loop(size_t __min, size_t __max, __owns_one_state<_CharT>* __s,
size_t __mexp_begin = 0, size_t __mexp_end = 0,
bool __greedy = true);
- void __start_nonmatching_list() {}
- void __start_matching_list() {}
- void __end_nonmatching_list() {}
- void __end_matching_list() {}
+ __bracket_expression<_CharT, _Traits>* __start_matching_list(bool __negate);
void __push_char(value_type __c);
- void __push_char(const typename _Traits::string_type& __c) {}
- void __push_range() {}
- void __push_class_type(typename _Traits::char_class_type) {}
void __push_back_ref(int __i);
void __push_alternation() {}
void __push_begin_marked_subexpression();
@@ -2905,36 +3133,31 @@
{
if (++__first == __last)
throw regex_error(regex_constants::error_brack);
- bool __non_matching = false;
+ bool __negate = false;
if (*__first == '^')
{
++__first;
- __non_matching = true;
- __start_nonmatching_list();
+ __negate = true;
}
- else
- __start_matching_list();
+ __bracket_expression<_CharT, _Traits>* __ml = __start_matching_list(__negate);
+ // __ml owned by *this
if (__first == __last)
throw regex_error(regex_constants::error_brack);
if (*__first == ']')
{
- __push_char(']');
+ __ml->__add_char(']');
++__first;
}
- __first = __parse_follow_list(__first, __last);
+ __first = __parse_follow_list(__first, __last, __ml);
if (__first == __last)
throw regex_error(regex_constants::error_brack);
if (*__first == '-')
{
- __push_char('-');
+ __ml->__add_char('-');
++__first;
}
if (__first == __last || *__first != ']')
throw regex_error(regex_constants::error_brack);
- if (__non_matching)
- __end_nonmatching_list();
- else
- __end_matching_list();
++__first;
}
return __first;
@@ -2944,13 +3167,15 @@
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_follow_list(_ForwardIterator __first,
- _ForwardIterator __last)
+ _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml)
{
if (__first != __last)
{
while (true)
{
- _ForwardIterator __temp = __parse_expression_term(__first, __last);
+ _ForwardIterator __temp = __parse_expression_term(__first, __last,
+ __ml);
if (__temp == __first)
break;
__first = __temp;
@@ -2963,27 +3188,29 @@
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_expression_term(_ForwardIterator __first,
- _ForwardIterator __last)
+ _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml)
{
if (__first != __last && *__first != ']')
{
bool __parsed_one = false;
_ForwardIterator __temp = next(__first);
+ basic_string<_CharT> __start_range;
if (__temp != __last && *__first == '[')
{
if (*__temp == '=')
- return __parse_equivalence_class(++__temp, __last);
+ return __parse_equivalence_class(++__temp, __last, __ml);
else if (*__temp == ':')
- return __parse_character_class(++__temp, __last);
+ return __parse_character_class(++__temp, __last, __ml);
else if (*__temp == '.')
{
- __first = __parse_collating_symbol(++__temp, __last);
+ __first = __parse_collating_symbol(++__temp, __last, __start_range);
__parsed_one = true;
}
}
if (!__parsed_one)
{
- __push_char(*__first);
+ __start_range = *__first;
++__first;
}
if (__first != __last && *__first != ']')
@@ -2992,18 +3219,33 @@
if (__temp != __last && *__first == '-' && *__temp != ']')
{
// parse a range
+ basic_string<_CharT> __end_range;
__first = __temp;
++__temp;
if (__temp != __last && *__first == '[' && *__temp == '.')
- __first = __parse_collating_symbol(++__temp, __last);
+ __first = __parse_collating_symbol(++__temp, __last, __end_range);
else
{
- __push_char(*__first);
+ __end_range = *__first;
++__first;
}
- __push_range();
+ __ml->__add_range(_STD::move(__start_range), _STD::move(__end_range));
+ }
+ else
+ {
+ if (__start_range.size() == 1)
+ __ml->__add_char(__start_range[0]);
+ else
+ __ml->__add_digraph(__start_range[0], __start_range[1]);
}
}
+ else
+ {
+ if (__start_range.size() == 1)
+ __ml->__add_char(__start_range[0]);
+ else
+ __ml->__add_digraph(__start_range[0], __start_range[1]);
+ }
}
return __first;
}
@@ -3012,7 +3254,8 @@
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_equivalence_class(_ForwardIterator __first,
- _ForwardIterator __last)
+ _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml)
{
// Found [=
// This means =] must exist
@@ -3026,14 +3269,26 @@
string_type __collate_name =
__traits_.lookup_collatename(__first, __temp);
if (__collate_name.empty())
- throw regex_error(regex_constants::error_brack);
+ throw regex_error(regex_constants::error_collate);
string_type __equiv_name =
__traits_.transform_primary(__collate_name.begin(),
__collate_name.end());
if (!__equiv_name.empty())
- __push_char(__equiv_name);
+ __ml->__add_equivalence(__equiv_name);
else
- __push_char(__collate_name);
+ {
+ switch (__collate_name.size())
+ {
+ case 1:
+ __ml->__add_char(__collate_name[0]);
+ break;
+ case 2:
+ __ml->__add_digraph(__collate_name[0], __collate_name[1]);
+ break;
+ default:
+ throw regex_error(regex_constants::error_collate);
+ }
+ }
__first = next(__temp, 2);
return __first;
}
@@ -3042,7 +3297,8 @@
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_character_class(_ForwardIterator __first,
- _ForwardIterator __last)
+ _ForwardIterator __last,
+ __bracket_expression<_CharT, _Traits>* __ml)
{
// Found [:
// This means :] must exist
@@ -3057,7 +3313,7 @@
__traits_.lookup_classname(__first, __temp, __flags_ & icase);
if (__class_type == 0)
throw regex_error(regex_constants::error_brack);
- __push_class_type(__class_type);
+ __ml->__add_class(__class_type);
__first = next(__temp, 2);
return __first;
}
@@ -3066,7 +3322,8 @@
template <class _ForwardIterator>
_ForwardIterator
basic_regex<_CharT, _Traits>::__parse_collating_symbol(_ForwardIterator __first,
- _ForwardIterator __last)
+ _ForwardIterator __last,
+ basic_string<_CharT>& __col_sym)
{
// Found [.
// This means .] must exist
@@ -3077,11 +3334,15 @@
throw regex_error(regex_constants::error_brack);
// [__first, __temp) contains all text in [. ... .]
typedef typename _Traits::string_type string_type;
- string_type __collate_name =
- __traits_.lookup_collatename(__first, __temp);
- if (__collate_name.empty())
- throw regex_error(regex_constants::error_brack);
- __push_char(__collate_name);
+ __col_sym = __traits_.lookup_collatename(__first, __temp);
+ switch (__col_sym.size())
+ {
+ case 1:
+ case 2:
+ break;
+ default:
+ throw regex_error(regex_constants::error_collate);
+ }
__first = next(__temp, 2);
return __first;
}
@@ -3129,10 +3390,10 @@
void
basic_regex<_CharT, _Traits>::__push_char(value_type __c)
{
- if (flags() & regex_constants::icase)
+ if (flags() & icase)
__end_->first() = new __match_char_icase<_CharT, _Traits>
(__traits_, __c, __end_->first());
- else if (flags() & regex_constants::collate)
+ else if (flags() & collate)
__end_->first() = new __match_char_collate<_CharT, _Traits>
(__traits_, __c, __end_->first());
else
@@ -3178,10 +3439,10 @@
void
basic_regex<_CharT, _Traits>::__push_back_ref(int __i)
{
- if (flags() & regex_constants::icase)
+ if (flags() & icase)
__end_->first() = new __back_ref_icase<_CharT, _Traits>
(__traits_, __i, __end_->first());
- else if (flags() & regex_constants::collate)
+ else if (flags() & collate)
__end_->first() = new __back_ref_collate<_CharT, _Traits>
(__traits_, __i, __end_->first());
else
@@ -3189,6 +3450,19 @@
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
+template <class _CharT, class _Traits>
+__bracket_expression<_CharT, _Traits>*
+basic_regex<_CharT, _Traits>::__start_matching_list(bool __negate)
+{
+ __bracket_expression<_CharT, _Traits>* __r =
+ new __bracket_expression<_CharT, _Traits>(__traits_, __end_->first(),
+ __negate, __flags_ & icase,
+ __flags_ & collate);
+ __end_->first() = __r;
+ __end_ = __r;
+ return __r;
+}
+
typedef basic_regex<char> regex;
typedef basic_regex<wchar_t> wregex;
Modified: libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp?rev=108280&r1=108279&r2=108280&view=diff
==============================================================================
--- libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp (original)
+++ libcxx/trunk/test/re/re.alg/re.alg.search/basic.pass.cpp Tue Jul 13 16:48:06 2010
@@ -498,4 +498,105 @@
std::regex_constants::basic)));
assert(m.size() == 0);
}
+ {
+ std::cmatch m;
+ const char s[] = "a";
+ assert(std::regex_search(s, m, std::regex("^[a]$",
+ std::regex_constants::basic)));
+ assert(m.size() == 1);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == m[0].second);
+ assert(m.length(0) == 1);
+ assert(m.position(0) == 0);
+ assert(m.str(0) == "a");
+ }
+ {
+ std::cmatch m;
+ const char s[] = "a";
+ assert(std::regex_search(s, m, std::regex("^[ab]$",
+ std::regex_constants::basic)));
+ assert(m.size() == 1);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == m[0].second);
+ assert(m.length(0) == 1);
+ assert(m.position(0) == 0);
+ assert(m.str(0) == "a");
+ }
+ {
+ std::cmatch m;
+ const char s[] = "c";
+ assert(std::regex_search(s, m, std::regex("^[a-f]$",
+ std::regex_constants::basic)));
+ assert(m.size() == 1);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == m[0].second);
+ assert(m.length(0) == 1);
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ }
+ {
+ std::cmatch m;
+ const char s[] = "g";
+ assert(!std::regex_search(s, m, std::regex("^[a-f]$",
+ std::regex_constants::basic)));
+ assert(m.size() == 0);
+ }
+ {
+ std::cmatch m;
+ const char s[] = "Iraqi";
+ assert(std::regex_search(s, m, std::regex("q[^u]",
+ std::regex_constants::basic)));
+ assert(m.size() == 1);
+ assert(m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == m[0].second);
+ assert(m.length(0) == 2);
+ assert(m.position(0) == 3);
+ assert(m.str(0) == "qi");
+ }
+ {
+ std::cmatch m;
+ const char s[] = "Iraq";
+ assert(!std::regex_search(s, m, std::regex("q[^u]",
+ std::regex_constants::basic)));
+ assert(m.size() == 0);
+ }
+ {
+ std::cmatch m;
+ const char s[] = "AmB";
+ assert(std::regex_search(s, m, std::regex("A[[:lower:]]B",
+ std::regex_constants::basic)));
+ assert(m.size() == 1);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == m[0].second);
+ assert(m.length(0) == std::char_traits<char>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ }
+ {
+ std::cmatch m;
+ const char s[] = "AMB";
+ assert(!std::regex_search(s, m, std::regex("A[[:lower:]]B",
+ std::regex_constants::basic)));
+ assert(m.size() == 0);
+ }
}
More information about the cfe-commits
mailing list