libstdc++
|
00001 // wstring_convert implementation -*- C++ -*- 00002 00003 // Copyright (C) 2015-2019 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/locale_conv.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{locale} 00028 */ 00029 00030 #ifndef _LOCALE_CONV_H 00031 #define _LOCALE_CONV_H 1 00032 00033 #if __cplusplus < 201103L 00034 # include <bits/c++0x_warning.h> 00035 #else 00036 00037 #include <streambuf> 00038 #include <bits/stringfwd.h> 00039 #include <bits/allocator.h> 00040 #include <bits/codecvt.h> 00041 #include <bits/unique_ptr.h> 00042 00043 namespace std _GLIBCXX_VISIBILITY(default) 00044 { 00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00046 00047 /** 00048 * @addtogroup locales 00049 * @{ 00050 */ 00051 00052 template<typename _OutStr, typename _InChar, typename _Codecvt, 00053 typename _State, typename _Fn> 00054 bool 00055 __do_str_codecvt(const _InChar* __first, const _InChar* __last, 00056 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, 00057 size_t& __count, _Fn __fn) 00058 { 00059 if (__first == __last) 00060 { 00061 __outstr.clear(); 00062 __count = 0; 00063 return true; 00064 } 00065 00066 size_t __outchars = 0; 00067 auto __next = __first; 00068 const auto __maxlen = __cvt.max_length() + 1; 00069 00070 codecvt_base::result __result; 00071 do 00072 { 00073 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); 00074 auto __outnext = &__outstr.front() + __outchars; 00075 auto const __outlast = &__outstr.back() + 1; 00076 __result = (__cvt.*__fn)(__state, __next, __last, __next, 00077 __outnext, __outlast, __outnext); 00078 __outchars = __outnext - &__outstr.front(); 00079 } 00080 while (__result == codecvt_base::partial && __next != __last 00081 && (__outstr.size() - __outchars) < __maxlen); 00082 00083 if (__result == codecvt_base::error) 00084 { 00085 __count = __next - __first; 00086 return false; 00087 } 00088 00089 if (__result == codecvt_base::noconv) 00090 { 00091 __outstr.assign(__first, __last); 00092 __count = __last - __first; 00093 } 00094 else 00095 { 00096 __outstr.resize(__outchars); 00097 __count = __next - __first; 00098 } 00099 00100 return true; 00101 } 00102 00103 // Convert narrow character string to wide. 00104 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00105 inline bool 00106 __str_codecvt_in(const char* __first, const char* __last, 00107 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00108 const codecvt<_CharT, char, _State>& __cvt, 00109 _State& __state, size_t& __count) 00110 { 00111 using _Codecvt = codecvt<_CharT, char, _State>; 00112 using _ConvFn 00113 = codecvt_base::result 00114 (_Codecvt::*)(_State&, const char*, const char*, const char*&, 00115 _CharT*, _CharT*, _CharT*&) const; 00116 _ConvFn __fn = &codecvt<_CharT, char, _State>::in; 00117 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00118 __count, __fn); 00119 } 00120 00121 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00122 inline bool 00123 __str_codecvt_in(const char* __first, const char* __last, 00124 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00125 const codecvt<_CharT, char, _State>& __cvt) 00126 { 00127 _State __state = {}; 00128 size_t __n; 00129 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); 00130 } 00131 00132 // Convert wide character string to narrow. 00133 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00134 inline bool 00135 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00136 basic_string<char, _Traits, _Alloc>& __outstr, 00137 const codecvt<_CharT, char, _State>& __cvt, 00138 _State& __state, size_t& __count) 00139 { 00140 using _Codecvt = codecvt<_CharT, char, _State>; 00141 using _ConvFn 00142 = codecvt_base::result 00143 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 00144 char*, char*, char*&) const; 00145 _ConvFn __fn = &codecvt<_CharT, char, _State>::out; 00146 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00147 __count, __fn); 00148 } 00149 00150 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00151 inline bool 00152 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00153 basic_string<char, _Traits, _Alloc>& __outstr, 00154 const codecvt<_CharT, char, _State>& __cvt) 00155 { 00156 _State __state = {}; 00157 size_t __n; 00158 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 00159 } 00160 00161 #ifdef _GLIBCXX_USE_CHAR8_T 00162 00163 // Convert wide character string to narrow. 00164 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00165 inline bool 00166 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00167 basic_string<char8_t, _Traits, _Alloc>& __outstr, 00168 const codecvt<_CharT, char8_t, _State>& __cvt, 00169 _State& __state, size_t& __count) 00170 { 00171 using _Codecvt = codecvt<_CharT, char8_t, _State>; 00172 using _ConvFn 00173 = codecvt_base::result 00174 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 00175 char8_t*, char8_t*, char8_t*&) const; 00176 _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out; 00177 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00178 __count, __fn); 00179 } 00180 00181 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00182 inline bool 00183 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00184 basic_string<char8_t, _Traits, _Alloc>& __outstr, 00185 const codecvt<_CharT, char8_t, _State>& __cvt) 00186 { 00187 _State __state = {}; 00188 size_t __n; 00189 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 00190 } 00191 00192 #endif // _GLIBCXX_USE_CHAR8_T 00193 00194 #ifdef _GLIBCXX_USE_WCHAR_T 00195 00196 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00197 00198 /// String conversions 00199 template<typename _Codecvt, typename _Elem = wchar_t, 00200 typename _Wide_alloc = allocator<_Elem>, 00201 typename _Byte_alloc = allocator<char>> 00202 class wstring_convert 00203 { 00204 public: 00205 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 00206 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 00207 typedef typename _Codecvt::state_type state_type; 00208 typedef typename wide_string::traits_type::int_type int_type; 00209 00210 /// Default constructor. 00211 wstring_convert() : _M_cvt(new _Codecvt()) { } 00212 00213 /** Constructor. 00214 * 00215 * @param __pcvt The facet to use for conversions. 00216 * 00217 * Takes ownership of @p __pcvt and will delete it in the destructor. 00218 */ 00219 explicit 00220 wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt) 00221 { 00222 if (!_M_cvt) 00223 __throw_logic_error("wstring_convert"); 00224 } 00225 00226 /** Construct with an initial converstion state. 00227 * 00228 * @param __pcvt The facet to use for conversions. 00229 * @param __state Initial conversion state. 00230 * 00231 * Takes ownership of @p __pcvt and will delete it in the destructor. 00232 * The object's conversion state will persist between conversions. 00233 */ 00234 wstring_convert(_Codecvt* __pcvt, state_type __state) 00235 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 00236 { 00237 if (!_M_cvt) 00238 __throw_logic_error("wstring_convert"); 00239 } 00240 00241 /** Construct with error strings. 00242 * 00243 * @param __byte_err A string to return on failed conversions. 00244 * @param __wide_err A wide string to return on failed conversions. 00245 */ 00246 explicit 00247 wstring_convert(const byte_string& __byte_err, 00248 const wide_string& __wide_err = wide_string()) 00249 : _M_cvt(new _Codecvt), 00250 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 00251 _M_with_strings(true) 00252 { 00253 if (!_M_cvt) 00254 __throw_logic_error("wstring_convert"); 00255 } 00256 00257 ~wstring_convert() = default; 00258 00259 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00260 // 2176. Special members for wstring_convert and wbuffer_convert 00261 wstring_convert(const wstring_convert&) = delete; 00262 wstring_convert& operator=(const wstring_convert&) = delete; 00263 00264 /// @{ Convert from bytes. 00265 wide_string 00266 from_bytes(char __byte) 00267 { 00268 char __bytes[2] = { __byte }; 00269 return from_bytes(__bytes, __bytes+1); 00270 } 00271 00272 wide_string 00273 from_bytes(const char* __ptr) 00274 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 00275 00276 wide_string 00277 from_bytes(const byte_string& __str) 00278 { 00279 auto __ptr = __str.data(); 00280 return from_bytes(__ptr, __ptr + __str.size()); 00281 } 00282 00283 wide_string 00284 from_bytes(const char* __first, const char* __last) 00285 { 00286 if (!_M_with_cvtstate) 00287 _M_state = state_type(); 00288 wide_string __out{ _M_wide_err_string.get_allocator() }; 00289 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, 00290 _M_count)) 00291 return __out; 00292 if (_M_with_strings) 00293 return _M_wide_err_string; 00294 __throw_range_error("wstring_convert::from_bytes"); 00295 } 00296 /// @} 00297 00298 /// @{ Convert to bytes. 00299 byte_string 00300 to_bytes(_Elem __wchar) 00301 { 00302 _Elem __wchars[2] = { __wchar }; 00303 return to_bytes(__wchars, __wchars+1); 00304 } 00305 00306 byte_string 00307 to_bytes(const _Elem* __ptr) 00308 { 00309 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 00310 } 00311 00312 byte_string 00313 to_bytes(const wide_string& __wstr) 00314 { 00315 auto __ptr = __wstr.data(); 00316 return to_bytes(__ptr, __ptr + __wstr.size()); 00317 } 00318 00319 byte_string 00320 to_bytes(const _Elem* __first, const _Elem* __last) 00321 { 00322 if (!_M_with_cvtstate) 00323 _M_state = state_type(); 00324 byte_string __out{ _M_byte_err_string.get_allocator() }; 00325 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, 00326 _M_count)) 00327 return __out; 00328 if (_M_with_strings) 00329 return _M_byte_err_string; 00330 __throw_range_error("wstring_convert::to_bytes"); 00331 } 00332 /// @} 00333 00334 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00335 // 2174. wstring_convert::converted() should be noexcept 00336 /// The number of elements successfully converted in the last conversion. 00337 size_t converted() const noexcept { return _M_count; } 00338 00339 /// The final conversion state of the last conversion. 00340 state_type state() const { return _M_state; } 00341 00342 private: 00343 unique_ptr<_Codecvt> _M_cvt; 00344 byte_string _M_byte_err_string; 00345 wide_string _M_wide_err_string; 00346 state_type _M_state = state_type(); 00347 size_t _M_count = 0; 00348 bool _M_with_cvtstate = false; 00349 bool _M_with_strings = false; 00350 }; 00351 00352 _GLIBCXX_END_NAMESPACE_CXX11 00353 00354 /// Buffer conversions 00355 template<typename _Codecvt, typename _Elem = wchar_t, 00356 typename _Tr = char_traits<_Elem>> 00357 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 00358 { 00359 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 00360 00361 public: 00362 typedef typename _Codecvt::state_type state_type; 00363 00364 /// Default constructor. 00365 wbuffer_convert() : wbuffer_convert(nullptr) { } 00366 00367 /** Constructor. 00368 * 00369 * @param __bytebuf The underlying byte stream buffer. 00370 * @param __pcvt The facet to use for conversions. 00371 * @param __state Initial conversion state. 00372 * 00373 * Takes ownership of @p __pcvt and will delete it in the destructor. 00374 */ 00375 explicit 00376 wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt, 00377 state_type __state = state_type()) 00378 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 00379 { 00380 if (!_M_cvt) 00381 __throw_logic_error("wbuffer_convert"); 00382 00383 _M_always_noconv = _M_cvt->always_noconv(); 00384 00385 if (_M_buf) 00386 { 00387 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 00388 this->setg(_M_get_area + _S_putback_length, 00389 _M_get_area + _S_putback_length, 00390 _M_get_area + _S_putback_length); 00391 } 00392 } 00393 00394 ~wbuffer_convert() = default; 00395 00396 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00397 // 2176. Special members for wstring_convert and wbuffer_convert 00398 wbuffer_convert(const wbuffer_convert&) = delete; 00399 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 00400 00401 streambuf* rdbuf() const noexcept { return _M_buf; } 00402 00403 streambuf* 00404 rdbuf(streambuf *__bytebuf) noexcept 00405 { 00406 auto __prev = _M_buf; 00407 _M_buf = __bytebuf; 00408 return __prev; 00409 } 00410 00411 /// The conversion state following the last conversion. 00412 state_type state() const noexcept { return _M_state; } 00413 00414 protected: 00415 int 00416 sync() 00417 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; } 00418 00419 typename _Wide_streambuf::int_type 00420 overflow(typename _Wide_streambuf::int_type __out) 00421 { 00422 if (!_M_buf || !_M_conv_put()) 00423 return _Tr::eof(); 00424 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 00425 return this->sputc(__out); 00426 return _Tr::not_eof(__out); 00427 } 00428 00429 typename _Wide_streambuf::int_type 00430 underflow() 00431 { 00432 if (!_M_buf) 00433 return _Tr::eof(); 00434 00435 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 00436 return _Tr::to_int_type(*this->gptr()); 00437 else 00438 return _Tr::eof(); 00439 } 00440 00441 streamsize 00442 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 00443 { 00444 if (!_M_buf || __n == 0) 00445 return 0; 00446 streamsize __done = 0; 00447 do 00448 { 00449 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 00450 __n - __done); 00451 _Tr::copy(this->pptr(), __s + __done, __nn); 00452 this->pbump(__nn); 00453 __done += __nn; 00454 } while (__done < __n && _M_conv_put()); 00455 return __done; 00456 } 00457 00458 private: 00459 // fill the get area from converted contents of the byte stream buffer 00460 bool 00461 _M_conv_get() 00462 { 00463 const streamsize __pb1 = this->gptr() - this->eback(); 00464 const streamsize __pb2 = _S_putback_length; 00465 const streamsize __npb = std::min(__pb1, __pb2); 00466 00467 _Tr::move(_M_get_area + _S_putback_length - __npb, 00468 this->gptr() - __npb, __npb); 00469 00470 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 00471 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 00472 if (__nbytes < 1) 00473 __nbytes = 1; 00474 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 00475 if (__nbytes < 1) 00476 return false; 00477 __nbytes += _M_unconv; 00478 00479 // convert _M_get_buf into _M_get_area 00480 00481 _Elem* __outbuf = _M_get_area + _S_putback_length; 00482 _Elem* __outnext = __outbuf; 00483 const char* __bnext = _M_get_buf; 00484 00485 codecvt_base::result __result; 00486 if (_M_always_noconv) 00487 __result = codecvt_base::noconv; 00488 else 00489 { 00490 _Elem* __outend = _M_get_area + _S_buffer_length; 00491 00492 __result = _M_cvt->in(_M_state, 00493 __bnext, __bnext + __nbytes, __bnext, 00494 __outbuf, __outend, __outnext); 00495 } 00496 00497 if (__result == codecvt_base::noconv) 00498 { 00499 // cast is safe because noconv means _Elem is same type as char 00500 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 00501 _Tr::copy(__outbuf, __get_buf, __nbytes); 00502 _M_unconv = 0; 00503 return true; 00504 } 00505 00506 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 00507 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 00508 00509 this->setg(__outbuf, __outbuf, __outnext); 00510 00511 return __result != codecvt_base::error; 00512 } 00513 00514 // unused 00515 bool 00516 _M_put(...) 00517 { return false; } 00518 00519 bool 00520 _M_put(const char* __p, streamsize __n) 00521 { 00522 if (_M_buf->sputn(__p, __n) < __n) 00523 return false; 00524 return true; 00525 } 00526 00527 // convert the put area and write to the byte stream buffer 00528 bool 00529 _M_conv_put() 00530 { 00531 _Elem* const __first = this->pbase(); 00532 const _Elem* const __last = this->pptr(); 00533 const streamsize __pending = __last - __first; 00534 00535 if (_M_always_noconv) 00536 return _M_put(__first, __pending); 00537 00538 char __outbuf[2 * _S_buffer_length]; 00539 00540 const _Elem* __next = __first; 00541 const _Elem* __start; 00542 do 00543 { 00544 __start = __next; 00545 char* __outnext = __outbuf; 00546 char* const __outlast = __outbuf + sizeof(__outbuf); 00547 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 00548 __outnext, __outlast, __outnext); 00549 if (__result == codecvt_base::error) 00550 return false; 00551 else if (__result == codecvt_base::noconv) 00552 return _M_put(__next, __pending); 00553 00554 if (!_M_put(__outbuf, __outnext - __outbuf)) 00555 return false; 00556 } 00557 while (__next != __last && __next != __start); 00558 00559 if (__next != __last) 00560 _Tr::move(__first, __next, __last - __next); 00561 00562 this->pbump(__first - __next); 00563 return __next != __first; 00564 } 00565 00566 streambuf* _M_buf; 00567 unique_ptr<_Codecvt> _M_cvt; 00568 state_type _M_state; 00569 00570 static const streamsize _S_buffer_length = 32; 00571 static const streamsize _S_putback_length = 3; 00572 _Elem _M_put_area[_S_buffer_length]; 00573 _Elem _M_get_area[_S_buffer_length]; 00574 streamsize _M_unconv = 0; 00575 char _M_get_buf[_S_buffer_length-_S_putback_length]; 00576 bool _M_always_noconv; 00577 }; 00578 00579 #endif // _GLIBCXX_USE_WCHAR_T 00580 00581 /// @} group locales 00582 00583 _GLIBCXX_END_NAMESPACE_VERSION 00584 } // namespace 00585 00586 #endif // __cplusplus 00587 00588 #endif /* _LOCALE_CONV_H */