libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2010-2019 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_compiler.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00034 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00035 00036 template<typename> 00037 class regex_traits; 00038 00039 _GLIBCXX_END_NAMESPACE_CXX11 00040 00041 namespace __detail 00042 { 00043 /** 00044 * @addtogroup regex-detail 00045 * @{ 00046 */ 00047 00048 template<typename, bool, bool> 00049 struct _BracketMatcher; 00050 00051 /** 00052 * @brief Builds an NFA from an input iterator range. 00053 * 00054 * The %_TraitsT type should fulfill requirements [28.3]. 00055 */ 00056 template<typename _TraitsT> 00057 class _Compiler 00058 { 00059 public: 00060 typedef typename _TraitsT::char_type _CharT; 00061 typedef const _CharT* _IterT; 00062 typedef _NFA<_TraitsT> _RegexT; 00063 typedef regex_constants::syntax_option_type _FlagT; 00064 00065 _Compiler(_IterT __b, _IterT __e, 00066 const typename _TraitsT::locale_type& __traits, _FlagT __flags); 00067 00068 shared_ptr<const _RegexT> 00069 _M_get_nfa() 00070 { return std::move(_M_nfa); } 00071 00072 private: 00073 typedef _Scanner<_CharT> _ScannerT; 00074 typedef typename _TraitsT::string_type _StringT; 00075 typedef typename _ScannerT::_TokenT _TokenT; 00076 typedef _StateSeq<_TraitsT> _StateSeqT; 00077 typedef std::stack<_StateSeqT> _StackT; 00078 typedef std::ctype<_CharT> _CtypeT; 00079 00080 // accepts a specific token or returns false. 00081 bool 00082 _M_match_token(_TokenT __token); 00083 00084 void 00085 _M_disjunction(); 00086 00087 void 00088 _M_alternative(); 00089 00090 bool 00091 _M_term(); 00092 00093 bool 00094 _M_assertion(); 00095 00096 bool 00097 _M_quantifier(); 00098 00099 bool 00100 _M_atom(); 00101 00102 bool 00103 _M_bracket_expression(); 00104 00105 template<bool __icase, bool __collate> 00106 void 00107 _M_insert_any_matcher_ecma(); 00108 00109 template<bool __icase, bool __collate> 00110 void 00111 _M_insert_any_matcher_posix(); 00112 00113 template<bool __icase, bool __collate> 00114 void 00115 _M_insert_char_matcher(); 00116 00117 template<bool __icase, bool __collate> 00118 void 00119 _M_insert_character_class_matcher(); 00120 00121 template<bool __icase, bool __collate> 00122 void 00123 _M_insert_bracket_matcher(bool __neg); 00124 00125 // Returns true if successfully matched one term and should continue. 00126 // Returns false if the compiler should move on. 00127 template<bool __icase, bool __collate> 00128 bool 00129 _M_expression_term(pair<bool, _CharT>& __last_char, 00130 _BracketMatcher<_TraitsT, __icase, __collate>& 00131 __matcher); 00132 00133 int 00134 _M_cur_int_value(int __radix); 00135 00136 bool 00137 _M_try_char(); 00138 00139 _StateSeqT 00140 _M_pop() 00141 { 00142 auto ret = _M_stack.top(); 00143 _M_stack.pop(); 00144 return ret; 00145 } 00146 00147 _FlagT _M_flags; 00148 _ScannerT _M_scanner; 00149 shared_ptr<_RegexT> _M_nfa; 00150 _StringT _M_value; 00151 _StackT _M_stack; 00152 const _TraitsT& _M_traits; 00153 const _CtypeT& _M_ctype; 00154 }; 00155 00156 template<typename _Tp> 00157 struct __is_contiguous_iter : is_pointer<_Tp>::type { }; 00158 00159 template<typename _Tp, typename _Cont> 00160 struct 00161 __is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>> 00162 : true_type { }; 00163 00164 template<typename _Iter, typename _TraitsT> 00165 using __enable_if_contiguous_iter 00166 = __enable_if_t< __is_contiguous_iter<_Iter>::value, 00167 std::shared_ptr<const _NFA<_TraitsT>> >; 00168 00169 template<typename _Iter, typename _TraitsT> 00170 using __disable_if_contiguous_iter 00171 = __enable_if_t< !__is_contiguous_iter<_Iter>::value, 00172 std::shared_ptr<const _NFA<_TraitsT>> >; 00173 00174 template<typename _TraitsT, typename _FwdIter> 00175 inline __enable_if_contiguous_iter<_FwdIter, _TraitsT> 00176 __compile_nfa(_FwdIter __first, _FwdIter __last, 00177 const typename _TraitsT::locale_type& __loc, 00178 regex_constants::syntax_option_type __flags) 00179 { 00180 size_t __len = __last - __first; 00181 const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr; 00182 using _Cmplr = _Compiler<_TraitsT>; 00183 return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa(); 00184 } 00185 00186 template<typename _TraitsT, typename _FwdIter> 00187 inline __disable_if_contiguous_iter<_FwdIter, _TraitsT> 00188 __compile_nfa(_FwdIter __first, _FwdIter __last, 00189 const typename _TraitsT::locale_type& __loc, 00190 regex_constants::syntax_option_type __flags) 00191 { 00192 const basic_string<typename _TraitsT::char_type> __str(__first, __last); 00193 return __compile_nfa<_TraitsT>(__str.data(), __str.data() + __str.size(), 00194 __loc, __flags); 00195 } 00196 00197 // [28.13.14] 00198 template<typename _TraitsT, bool __icase, bool __collate> 00199 class _RegexTranslatorBase 00200 { 00201 public: 00202 typedef typename _TraitsT::char_type _CharT; 00203 typedef typename _TraitsT::string_type _StringT; 00204 typedef _StringT _StrTransT; 00205 00206 explicit 00207 _RegexTranslatorBase(const _TraitsT& __traits) 00208 : _M_traits(__traits) 00209 { } 00210 00211 _CharT 00212 _M_translate(_CharT __ch) const 00213 { 00214 if (__icase) 00215 return _M_traits.translate_nocase(__ch); 00216 else if (__collate) 00217 return _M_traits.translate(__ch); 00218 else 00219 return __ch; 00220 } 00221 00222 _StrTransT 00223 _M_transform(_CharT __ch) const 00224 { 00225 _StrTransT __str(1, __ch); 00226 return _M_traits.transform(__str.begin(), __str.end()); 00227 } 00228 00229 // See LWG 523. It's not efficiently implementable when _TraitsT is not 00230 // std::regex_traits<>, and __collate is true. See specializations for 00231 // implementations of other cases. 00232 bool 00233 _M_match_range(const _StrTransT& __first, const _StrTransT& __last, 00234 const _StrTransT& __s) const 00235 { return __first <= __s && __s <= __last; } 00236 00237 protected: 00238 bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const 00239 { 00240 typedef std::ctype<_CharT> __ctype_type; 00241 const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc()); 00242 auto __lower = __fctyp.tolower(__ch); 00243 auto __upper = __fctyp.toupper(__ch); 00244 return (__first <= __lower && __lower <= __last) 00245 || (__first <= __upper && __upper <= __last); 00246 } 00247 00248 const _TraitsT& _M_traits; 00249 }; 00250 00251 template<typename _TraitsT, bool __icase, bool __collate> 00252 class _RegexTranslator 00253 : public _RegexTranslatorBase<_TraitsT, __icase, __collate> 00254 { 00255 public: 00256 typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base; 00257 using _Base::_Base; 00258 }; 00259 00260 template<typename _TraitsT, bool __icase> 00261 class _RegexTranslator<_TraitsT, __icase, false> 00262 : public _RegexTranslatorBase<_TraitsT, __icase, false> 00263 { 00264 public: 00265 typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base; 00266 typedef typename _Base::_CharT _CharT; 00267 typedef _CharT _StrTransT; 00268 00269 using _Base::_Base; 00270 00271 _StrTransT 00272 _M_transform(_CharT __ch) const 00273 { return __ch; } 00274 00275 bool 00276 _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const 00277 { 00278 if (!__icase) 00279 return __first <= __ch && __ch <= __last; 00280 return this->_M_in_range_icase(__first, __last, __ch); 00281 } 00282 }; 00283 00284 template<typename _CharType> 00285 class _RegexTranslator<std::regex_traits<_CharType>, true, true> 00286 : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true> 00287 { 00288 public: 00289 typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true> 00290 _Base; 00291 typedef typename _Base::_CharT _CharT; 00292 typedef typename _Base::_StrTransT _StrTransT; 00293 00294 using _Base::_Base; 00295 00296 bool 00297 _M_match_range(const _StrTransT& __first, const _StrTransT& __last, 00298 const _StrTransT& __str) const 00299 { 00300 __glibcxx_assert(__first.size() == 1); 00301 __glibcxx_assert(__last.size() == 1); 00302 __glibcxx_assert(__str.size() == 1); 00303 return this->_M_in_range_icase(__first[0], __last[0], __str[0]); 00304 } 00305 }; 00306 00307 template<typename _TraitsT> 00308 class _RegexTranslator<_TraitsT, false, false> 00309 { 00310 public: 00311 typedef typename _TraitsT::char_type _CharT; 00312 typedef _CharT _StrTransT; 00313 00314 explicit 00315 _RegexTranslator(const _TraitsT&) 00316 { } 00317 00318 _CharT 00319 _M_translate(_CharT __ch) const 00320 { return __ch; } 00321 00322 _StrTransT 00323 _M_transform(_CharT __ch) const 00324 { return __ch; } 00325 00326 bool 00327 _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const 00328 { return __first <= __ch && __ch <= __last; } 00329 }; 00330 00331 template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate> 00332 struct _AnyMatcher; 00333 00334 template<typename _TraitsT, bool __icase, bool __collate> 00335 struct _AnyMatcher<_TraitsT, false, __icase, __collate> 00336 { 00337 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00338 typedef typename _TransT::_CharT _CharT; 00339 00340 explicit 00341 _AnyMatcher(const _TraitsT& __traits) 00342 : _M_translator(__traits) 00343 { } 00344 00345 bool 00346 operator()(_CharT __ch) const 00347 { 00348 static auto __nul = _M_translator._M_translate('\0'); 00349 return _M_translator._M_translate(__ch) != __nul; 00350 } 00351 00352 _TransT _M_translator; 00353 }; 00354 00355 template<typename _TraitsT, bool __icase, bool __collate> 00356 struct _AnyMatcher<_TraitsT, true, __icase, __collate> 00357 { 00358 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00359 typedef typename _TransT::_CharT _CharT; 00360 00361 explicit 00362 _AnyMatcher(const _TraitsT& __traits) 00363 : _M_translator(__traits) 00364 { } 00365 00366 bool 00367 operator()(_CharT __ch) const 00368 { return _M_apply(__ch, typename is_same<_CharT, char>::type()); } 00369 00370 bool 00371 _M_apply(_CharT __ch, true_type) const 00372 { 00373 auto __c = _M_translator._M_translate(__ch); 00374 auto __n = _M_translator._M_translate('\n'); 00375 auto __r = _M_translator._M_translate('\r'); 00376 return __c != __n && __c != __r; 00377 } 00378 00379 bool 00380 _M_apply(_CharT __ch, false_type) const 00381 { 00382 auto __c = _M_translator._M_translate(__ch); 00383 auto __n = _M_translator._M_translate('\n'); 00384 auto __r = _M_translator._M_translate('\r'); 00385 auto __u2028 = _M_translator._M_translate(u'\u2028'); 00386 auto __u2029 = _M_translator._M_translate(u'\u2029'); 00387 return __c != __n && __c != __r && __c != __u2028 && __c != __u2029; 00388 } 00389 00390 _TransT _M_translator; 00391 }; 00392 00393 template<typename _TraitsT, bool __icase, bool __collate> 00394 struct _CharMatcher 00395 { 00396 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00397 typedef typename _TransT::_CharT _CharT; 00398 00399 _CharMatcher(_CharT __ch, const _TraitsT& __traits) 00400 : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch)) 00401 { } 00402 00403 bool 00404 operator()(_CharT __ch) const 00405 { return _M_ch == _M_translator._M_translate(__ch); } 00406 00407 _TransT _M_translator; 00408 _CharT _M_ch; 00409 }; 00410 00411 /// Matches a character range (bracket expression) 00412 template<typename _TraitsT, bool __icase, bool __collate> 00413 struct _BracketMatcher 00414 { 00415 public: 00416 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00417 typedef typename _TransT::_CharT _CharT; 00418 typedef typename _TransT::_StrTransT _StrTransT; 00419 typedef typename _TraitsT::string_type _StringT; 00420 typedef typename _TraitsT::char_class_type _CharClassT; 00421 00422 public: 00423 _BracketMatcher(bool __is_non_matching, 00424 const _TraitsT& __traits) 00425 : _M_class_set(0), _M_translator(__traits), _M_traits(__traits), 00426 _M_is_non_matching(__is_non_matching) 00427 { } 00428 00429 bool 00430 operator()(_CharT __ch) const 00431 { 00432 _GLIBCXX_DEBUG_ASSERT(_M_is_ready); 00433 return _M_apply(__ch, _UseCache()); 00434 } 00435 00436 void 00437 _M_add_char(_CharT __c) 00438 { 00439 _M_char_set.push_back(_M_translator._M_translate(__c)); 00440 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00441 } 00442 00443 _StringT 00444 _M_add_collate_element(const _StringT& __s) 00445 { 00446 auto __st = _M_traits.lookup_collatename(__s.data(), 00447 __s.data() + __s.size()); 00448 if (__st.empty()) 00449 __throw_regex_error(regex_constants::error_collate, 00450 "Invalid collate element."); 00451 _M_char_set.push_back(_M_translator._M_translate(__st[0])); 00452 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00453 return __st; 00454 } 00455 00456 void 00457 _M_add_equivalence_class(const _StringT& __s) 00458 { 00459 auto __st = _M_traits.lookup_collatename(__s.data(), 00460 __s.data() + __s.size()); 00461 if (__st.empty()) 00462 __throw_regex_error(regex_constants::error_collate, 00463 "Invalid equivalence class."); 00464 __st = _M_traits.transform_primary(__st.data(), 00465 __st.data() + __st.size()); 00466 _M_equiv_set.push_back(__st); 00467 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00468 } 00469 00470 // __neg should be true for \D, \S and \W only. 00471 void 00472 _M_add_character_class(const _StringT& __s, bool __neg) 00473 { 00474 auto __mask = _M_traits.lookup_classname(__s.data(), 00475 __s.data() + __s.size(), 00476 __icase); 00477 if (__mask == 0) 00478 __throw_regex_error(regex_constants::error_collate, 00479 "Invalid character class."); 00480 if (!__neg) 00481 _M_class_set |= __mask; 00482 else 00483 _M_neg_class_set.push_back(__mask); 00484 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00485 } 00486 00487 void 00488 _M_make_range(_CharT __l, _CharT __r) 00489 { 00490 if (__l > __r) 00491 __throw_regex_error(regex_constants::error_range, 00492 "Invalid range in bracket expression."); 00493 _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), 00494 _M_translator._M_transform(__r))); 00495 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00496 } 00497 00498 void 00499 _M_ready() 00500 { 00501 std::sort(_M_char_set.begin(), _M_char_set.end()); 00502 auto __end = std::unique(_M_char_set.begin(), _M_char_set.end()); 00503 _M_char_set.erase(__end, _M_char_set.end()); 00504 _M_make_cache(_UseCache()); 00505 _GLIBCXX_DEBUG_ONLY(_M_is_ready = true); 00506 } 00507 00508 private: 00509 // Currently we only use the cache for char 00510 typedef typename std::is_same<_CharT, char>::type _UseCache; 00511 00512 static constexpr size_t 00513 _S_cache_size = 00514 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value)); 00515 00516 struct _Dummy { }; 00517 typedef typename std::conditional<_UseCache::value, 00518 std::bitset<_S_cache_size>, 00519 _Dummy>::type _CacheT; 00520 typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT; 00521 00522 bool 00523 _M_apply(_CharT __ch, false_type) const; 00524 00525 bool 00526 _M_apply(_CharT __ch, true_type) const 00527 { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; } 00528 00529 void 00530 _M_make_cache(true_type) 00531 { 00532 for (unsigned __i = 0; __i < _M_cache.size(); __i++) 00533 _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type()); 00534 } 00535 00536 void 00537 _M_make_cache(false_type) 00538 { } 00539 00540 private: 00541 std::vector<_CharT> _M_char_set; 00542 std::vector<_StringT> _M_equiv_set; 00543 std::vector<pair<_StrTransT, _StrTransT>> _M_range_set; 00544 std::vector<_CharClassT> _M_neg_class_set; 00545 _CharClassT _M_class_set; 00546 _TransT _M_translator; 00547 const _TraitsT& _M_traits; 00548 bool _M_is_non_matching; 00549 _CacheT _M_cache; 00550 #ifdef _GLIBCXX_DEBUG 00551 bool _M_is_ready = false; 00552 #endif 00553 }; 00554 00555 //@} regex-detail 00556 } // namespace __detail 00557 _GLIBCXX_END_NAMESPACE_VERSION 00558 } // namespace std 00559 00560 #include <bits/regex_compiler.tcc>