libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2016 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex.tcc 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 namespace __detail 00034 { 00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00036 00037 // Result of merging regex_match and regex_search. 00038 // 00039 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 00040 // the other one if possible, for test purpose). 00041 // 00042 // That __match_mode is true means regex_match, else regex_search. 00043 template<typename _BiIter, typename _Alloc, 00044 typename _CharT, typename _TraitsT, 00045 _RegexExecutorPolicy __policy, 00046 bool __match_mode> 00047 bool 00048 __regex_algo_impl(_BiIter __s, 00049 _BiIter __e, 00050 match_results<_BiIter, _Alloc>& __m, 00051 const basic_regex<_CharT, _TraitsT>& __re, 00052 regex_constants::match_flag_type __flags) 00053 { 00054 if (__re._M_automaton == nullptr) 00055 return false; 00056 00057 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 00058 __m._M_begin = __s; 00059 __m._M_resize(__re._M_automaton->_M_sub_count()); 00060 for (auto& __it : __res) 00061 __it.matched = false; 00062 00063 bool __ret; 00064 if ((__re.flags() & regex_constants::__polynomial) 00065 || (__policy == _RegexExecutorPolicy::_S_alternate 00066 && !__re._M_automaton->_M_has_backref)) 00067 { 00068 _Executor<_BiIter, _Alloc, _TraitsT, false> 00069 __executor(__s, __e, __m, __re, __flags); 00070 if (__match_mode) 00071 __ret = __executor._M_match(); 00072 else 00073 __ret = __executor._M_search(); 00074 } 00075 else 00076 { 00077 _Executor<_BiIter, _Alloc, _TraitsT, true> 00078 __executor(__s, __e, __m, __re, __flags); 00079 if (__match_mode) 00080 __ret = __executor._M_match(); 00081 else 00082 __ret = __executor._M_search(); 00083 } 00084 if (__ret) 00085 { 00086 for (auto& __it : __res) 00087 if (!__it.matched) 00088 __it.first = __it.second = __e; 00089 auto& __pre = __m._M_prefix(); 00090 auto& __suf = __m._M_suffix(); 00091 if (__match_mode) 00092 { 00093 __pre.matched = false; 00094 __pre.first = __s; 00095 __pre.second = __s; 00096 __suf.matched = false; 00097 __suf.first = __e; 00098 __suf.second = __e; 00099 } 00100 else 00101 { 00102 __pre.first = __s; 00103 __pre.second = __res[0].first; 00104 __pre.matched = (__pre.first != __pre.second); 00105 __suf.first = __res[0].second; 00106 __suf.second = __e; 00107 __suf.matched = (__suf.first != __suf.second); 00108 } 00109 } 00110 else 00111 { 00112 __m._M_resize(0); 00113 for (auto& __it : __res) 00114 { 00115 __it.matched = false; 00116 __it.first = __it.second = __e; 00117 } 00118 } 00119 return __ret; 00120 } 00121 00122 _GLIBCXX_END_NAMESPACE_VERSION 00123 } 00124 00125 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00126 00127 template<typename _Ch_type> 00128 template<typename _Fwd_iter> 00129 typename regex_traits<_Ch_type>::string_type 00130 regex_traits<_Ch_type>:: 00131 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 00132 { 00133 typedef std::ctype<char_type> __ctype_type; 00134 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00135 00136 static const char* __collatenames[] = 00137 { 00138 "NUL", 00139 "SOH", 00140 "STX", 00141 "ETX", 00142 "EOT", 00143 "ENQ", 00144 "ACK", 00145 "alert", 00146 "backspace", 00147 "tab", 00148 "newline", 00149 "vertical-tab", 00150 "form-feed", 00151 "carriage-return", 00152 "SO", 00153 "SI", 00154 "DLE", 00155 "DC1", 00156 "DC2", 00157 "DC3", 00158 "DC4", 00159 "NAK", 00160 "SYN", 00161 "ETB", 00162 "CAN", 00163 "EM", 00164 "SUB", 00165 "ESC", 00166 "IS4", 00167 "IS3", 00168 "IS2", 00169 "IS1", 00170 "space", 00171 "exclamation-mark", 00172 "quotation-mark", 00173 "number-sign", 00174 "dollar-sign", 00175 "percent-sign", 00176 "ampersand", 00177 "apostrophe", 00178 "left-parenthesis", 00179 "right-parenthesis", 00180 "asterisk", 00181 "plus-sign", 00182 "comma", 00183 "hyphen", 00184 "period", 00185 "slash", 00186 "zero", 00187 "one", 00188 "two", 00189 "three", 00190 "four", 00191 "five", 00192 "six", 00193 "seven", 00194 "eight", 00195 "nine", 00196 "colon", 00197 "semicolon", 00198 "less-than-sign", 00199 "equals-sign", 00200 "greater-than-sign", 00201 "question-mark", 00202 "commercial-at", 00203 "A", 00204 "B", 00205 "C", 00206 "D", 00207 "E", 00208 "F", 00209 "G", 00210 "H", 00211 "I", 00212 "J", 00213 "K", 00214 "L", 00215 "M", 00216 "N", 00217 "O", 00218 "P", 00219 "Q", 00220 "R", 00221 "S", 00222 "T", 00223 "U", 00224 "V", 00225 "W", 00226 "X", 00227 "Y", 00228 "Z", 00229 "left-square-bracket", 00230 "backslash", 00231 "right-square-bracket", 00232 "circumflex", 00233 "underscore", 00234 "grave-accent", 00235 "a", 00236 "b", 00237 "c", 00238 "d", 00239 "e", 00240 "f", 00241 "g", 00242 "h", 00243 "i", 00244 "j", 00245 "k", 00246 "l", 00247 "m", 00248 "n", 00249 "o", 00250 "p", 00251 "q", 00252 "r", 00253 "s", 00254 "t", 00255 "u", 00256 "v", 00257 "w", 00258 "x", 00259 "y", 00260 "z", 00261 "left-curly-bracket", 00262 "vertical-line", 00263 "right-curly-bracket", 00264 "tilde", 00265 "DEL", 00266 }; 00267 00268 string __s; 00269 for (; __first != __last; ++__first) 00270 __s += __fctyp.narrow(*__first, 0); 00271 00272 for (const auto& __it : __collatenames) 00273 if (__s == __it) 00274 return string_type(1, __fctyp.widen( 00275 static_cast<char>(&__it - __collatenames))); 00276 00277 // TODO Add digraph support: 00278 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 00279 00280 return string_type(); 00281 } 00282 00283 template<typename _Ch_type> 00284 template<typename _Fwd_iter> 00285 typename regex_traits<_Ch_type>::char_class_type 00286 regex_traits<_Ch_type>:: 00287 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 00288 { 00289 typedef std::ctype<char_type> __ctype_type; 00290 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00291 00292 // Mappings from class name to class mask. 00293 static const pair<const char*, char_class_type> __classnames[] = 00294 { 00295 {"d", ctype_base::digit}, 00296 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 00297 {"s", ctype_base::space}, 00298 {"alnum", ctype_base::alnum}, 00299 {"alpha", ctype_base::alpha}, 00300 {"blank", ctype_base::blank}, 00301 {"cntrl", ctype_base::cntrl}, 00302 {"digit", ctype_base::digit}, 00303 {"graph", ctype_base::graph}, 00304 {"lower", ctype_base::lower}, 00305 {"print", ctype_base::print}, 00306 {"punct", ctype_base::punct}, 00307 {"space", ctype_base::space}, 00308 {"upper", ctype_base::upper}, 00309 {"xdigit", ctype_base::xdigit}, 00310 }; 00311 00312 string __s; 00313 for (; __first != __last; ++__first) 00314 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 00315 00316 for (const auto& __it : __classnames) 00317 if (__s == __it.first) 00318 { 00319 if (__icase 00320 && ((__it.second 00321 & (ctype_base::lower | ctype_base::upper)) != 0)) 00322 return ctype_base::alpha; 00323 return __it.second; 00324 } 00325 return 0; 00326 } 00327 00328 template<typename _Ch_type> 00329 bool 00330 regex_traits<_Ch_type>:: 00331 isctype(_Ch_type __c, char_class_type __f) const 00332 { 00333 typedef std::ctype<char_type> __ctype_type; 00334 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00335 00336 return __fctyp.is(__f._M_base, __c) 00337 // [[:w:]] 00338 || ((__f._M_extended & _RegexMask::_S_under) 00339 && __c == __fctyp.widen('_')); 00340 } 00341 00342 template<typename _Ch_type> 00343 int 00344 regex_traits<_Ch_type>:: 00345 value(_Ch_type __ch, int __radix) const 00346 { 00347 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 00348 long __v; 00349 if (__radix == 8) 00350 __is >> std::oct; 00351 else if (__radix == 16) 00352 __is >> std::hex; 00353 __is >> __v; 00354 return __is.fail() ? -1 : __v; 00355 } 00356 00357 template<typename _Bi_iter, typename _Alloc> 00358 template<typename _Out_iter> 00359 _Out_iter match_results<_Bi_iter, _Alloc>:: 00360 format(_Out_iter __out, 00361 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 00362 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 00363 match_flag_type __flags) const 00364 { 00365 __glibcxx_assert( ready() ); 00366 regex_traits<char_type> __traits; 00367 typedef std::ctype<char_type> __ctype_type; 00368 const __ctype_type& 00369 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 00370 00371 auto __output = [&](size_t __idx) 00372 { 00373 auto& __sub = (*this)[__idx]; 00374 if (__sub.matched) 00375 __out = std::copy(__sub.first, __sub.second, __out); 00376 }; 00377 00378 if (__flags & regex_constants::format_sed) 00379 { 00380 for (; __fmt_first != __fmt_last;) 00381 if (*__fmt_first == '&') 00382 { 00383 __output(0); 00384 ++__fmt_first; 00385 } 00386 else if (*__fmt_first == '\\') 00387 { 00388 if (++__fmt_first != __fmt_last 00389 && __fctyp.is(__ctype_type::digit, *__fmt_first)) 00390 __output(__traits.value(*__fmt_first++, 10)); 00391 else 00392 *__out++ = '\\'; 00393 } 00394 else 00395 *__out++ = *__fmt_first++; 00396 } 00397 else 00398 { 00399 while (1) 00400 { 00401 auto __next = std::find(__fmt_first, __fmt_last, '$'); 00402 if (__next == __fmt_last) 00403 break; 00404 00405 __out = std::copy(__fmt_first, __next, __out); 00406 00407 auto __eat = [&](char __ch) -> bool 00408 { 00409 if (*__next == __ch) 00410 { 00411 ++__next; 00412 return true; 00413 } 00414 return false; 00415 }; 00416 00417 if (++__next == __fmt_last) 00418 *__out++ = '$'; 00419 else if (__eat('$')) 00420 *__out++ = '$'; 00421 else if (__eat('&')) 00422 __output(0); 00423 else if (__eat('`')) 00424 { 00425 auto& __sub = _M_prefix(); 00426 if (__sub.matched) 00427 __out = std::copy(__sub.first, __sub.second, __out); 00428 } 00429 else if (__eat('\'')) 00430 { 00431 auto& __sub = _M_suffix(); 00432 if (__sub.matched) 00433 __out = std::copy(__sub.first, __sub.second, __out); 00434 } 00435 else if (__fctyp.is(__ctype_type::digit, *__next)) 00436 { 00437 long __num = __traits.value(*__next, 10); 00438 if (++__next != __fmt_last 00439 && __fctyp.is(__ctype_type::digit, *__next)) 00440 { 00441 __num *= 10; 00442 __num += __traits.value(*__next++, 10); 00443 } 00444 if (0 <= __num && __num < this->size()) 00445 __output(__num); 00446 } 00447 else 00448 *__out++ = '$'; 00449 __fmt_first = __next; 00450 } 00451 __out = std::copy(__fmt_first, __fmt_last, __out); 00452 } 00453 return __out; 00454 } 00455 00456 template<typename _Out_iter, typename _Bi_iter, 00457 typename _Rx_traits, typename _Ch_type> 00458 _Out_iter 00459 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 00460 const basic_regex<_Ch_type, _Rx_traits>& __e, 00461 const _Ch_type* __fmt, 00462 regex_constants::match_flag_type __flags) 00463 { 00464 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 00465 _IterT __i(__first, __last, __e, __flags); 00466 _IterT __end; 00467 if (__i == __end) 00468 { 00469 if (!(__flags & regex_constants::format_no_copy)) 00470 __out = std::copy(__first, __last, __out); 00471 } 00472 else 00473 { 00474 sub_match<_Bi_iter> __last; 00475 auto __len = char_traits<_Ch_type>::length(__fmt); 00476 for (; __i != __end; ++__i) 00477 { 00478 if (!(__flags & regex_constants::format_no_copy)) 00479 __out = std::copy(__i->prefix().first, __i->prefix().second, 00480 __out); 00481 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 00482 __last = __i->suffix(); 00483 if (__flags & regex_constants::format_first_only) 00484 break; 00485 } 00486 if (!(__flags & regex_constants::format_no_copy)) 00487 __out = std::copy(__last.first, __last.second, __out); 00488 } 00489 return __out; 00490 } 00491 00492 template<typename _Bi_iter, 00493 typename _Ch_type, 00494 typename _Rx_traits> 00495 bool 00496 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00497 operator==(const regex_iterator& __rhs) const 00498 { 00499 return (_M_match.empty() && __rhs._M_match.empty()) 00500 || (_M_begin == __rhs._M_begin 00501 && _M_end == __rhs._M_end 00502 && _M_pregex == __rhs._M_pregex 00503 && _M_flags == __rhs._M_flags 00504 && _M_match[0] == __rhs._M_match[0]); 00505 } 00506 00507 template<typename _Bi_iter, 00508 typename _Ch_type, 00509 typename _Rx_traits> 00510 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00511 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00512 operator++() 00513 { 00514 // In all cases in which the call to regex_search returns true, 00515 // match.prefix().first shall be equal to the previous value of 00516 // match[0].second, and for each index i in the half-open range 00517 // [0, match.size()) for which match[i].matched is true, 00518 // match[i].position() shall return distance(begin, match[i].first). 00519 // [28.12.1.4.5] 00520 if (_M_match[0].matched) 00521 { 00522 auto __start = _M_match[0].second; 00523 auto __prefix_first = _M_match[0].second; 00524 if (_M_match[0].first == _M_match[0].second) 00525 { 00526 if (__start == _M_end) 00527 { 00528 _M_match = value_type(); 00529 return *this; 00530 } 00531 else 00532 { 00533 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 00534 _M_flags 00535 | regex_constants::match_not_null 00536 | regex_constants::match_continuous)) 00537 { 00538 __glibcxx_assert(_M_match[0].matched); 00539 auto& __prefix = _M_match._M_prefix(); 00540 __prefix.first = __prefix_first; 00541 __prefix.matched = __prefix.first != __prefix.second; 00542 // [28.12.1.4.5] 00543 _M_match._M_begin = _M_begin; 00544 return *this; 00545 } 00546 else 00547 ++__start; 00548 } 00549 } 00550 _M_flags |= regex_constants::match_prev_avail; 00551 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 00552 { 00553 __glibcxx_assert(_M_match[0].matched); 00554 auto& __prefix = _M_match._M_prefix(); 00555 __prefix.first = __prefix_first; 00556 __prefix.matched = __prefix.first != __prefix.second; 00557 // [28.12.1.4.5] 00558 _M_match._M_begin = _M_begin; 00559 } 00560 else 00561 _M_match = value_type(); 00562 } 00563 return *this; 00564 } 00565 00566 template<typename _Bi_iter, 00567 typename _Ch_type, 00568 typename _Rx_traits> 00569 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00570 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00571 operator=(const regex_token_iterator& __rhs) 00572 { 00573 _M_position = __rhs._M_position; 00574 _M_subs = __rhs._M_subs; 00575 _M_n = __rhs._M_n; 00576 _M_suffix = __rhs._M_suffix; 00577 _M_has_m1 = __rhs._M_has_m1; 00578 _M_normalize_result(); 00579 return *this; 00580 } 00581 00582 template<typename _Bi_iter, 00583 typename _Ch_type, 00584 typename _Rx_traits> 00585 bool 00586 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00587 operator==(const regex_token_iterator& __rhs) const 00588 { 00589 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 00590 return true; 00591 if (_M_suffix.matched && __rhs._M_suffix.matched 00592 && _M_suffix == __rhs._M_suffix) 00593 return true; 00594 if (_M_end_of_seq() || _M_suffix.matched 00595 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 00596 return false; 00597 return _M_position == __rhs._M_position 00598 && _M_n == __rhs._M_n 00599 && _M_subs == __rhs._M_subs; 00600 } 00601 00602 template<typename _Bi_iter, 00603 typename _Ch_type, 00604 typename _Rx_traits> 00605 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00606 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00607 operator++() 00608 { 00609 _Position __prev = _M_position; 00610 if (_M_suffix.matched) 00611 *this = regex_token_iterator(); 00612 else if (_M_n + 1 < _M_subs.size()) 00613 { 00614 _M_n++; 00615 _M_result = &_M_current_match(); 00616 } 00617 else 00618 { 00619 _M_n = 0; 00620 ++_M_position; 00621 if (_M_position != _Position()) 00622 _M_result = &_M_current_match(); 00623 else if (_M_has_m1 && __prev->suffix().length() != 0) 00624 { 00625 _M_suffix.matched = true; 00626 _M_suffix.first = __prev->suffix().first; 00627 _M_suffix.second = __prev->suffix().second; 00628 _M_result = &_M_suffix; 00629 } 00630 else 00631 *this = regex_token_iterator(); 00632 } 00633 return *this; 00634 } 00635 00636 template<typename _Bi_iter, 00637 typename _Ch_type, 00638 typename _Rx_traits> 00639 void 00640 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00641 _M_init(_Bi_iter __a, _Bi_iter __b) 00642 { 00643 _M_has_m1 = false; 00644 for (auto __it : _M_subs) 00645 if (__it == -1) 00646 { 00647 _M_has_m1 = true; 00648 break; 00649 } 00650 if (_M_position != _Position()) 00651 _M_result = &_M_current_match(); 00652 else if (_M_has_m1) 00653 { 00654 _M_suffix.matched = true; 00655 _M_suffix.first = __a; 00656 _M_suffix.second = __b; 00657 _M_result = &_M_suffix; 00658 } 00659 else 00660 _M_result = nullptr; 00661 } 00662 00663 _GLIBCXX_END_NAMESPACE_VERSION 00664 } // namespace 00665