libstdc++
locale_conv.h
Go to the documentation of this file.
00001 // wstring_convert implementation -*- C++ -*-
00002 
00003 // Copyright (C) 2015-2019 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/locale_conv.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{locale}
00028  */
00029 
00030 #ifndef _LOCALE_CONV_H
00031 #define _LOCALE_CONV_H 1
00032 
00033 #if __cplusplus < 201103L
00034 # include <bits/c++0x_warning.h>
00035 #else
00036 
00037 #include <streambuf>
00038 #include <bits/stringfwd.h>
00039 #include <bits/allocator.h>
00040 #include <bits/codecvt.h>
00041 #include <bits/unique_ptr.h>
00042 
00043 namespace std _GLIBCXX_VISIBILITY(default)
00044 {
00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00046 
00047   /**
00048    * @addtogroup locales
00049    * @{
00050    */
00051 
00052   template<typename _OutStr, typename _InChar, typename _Codecvt,
00053            typename _State, typename _Fn>
00054     bool
00055     __do_str_codecvt(const _InChar* __first, const _InChar* __last,
00056                      _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
00057                      size_t& __count, _Fn __fn)
00058     {
00059       if (__first == __last)
00060         {
00061           __outstr.clear();
00062           __count = 0;
00063           return true;
00064         }
00065 
00066       size_t __outchars = 0;
00067       auto __next = __first;
00068       const auto __maxlen = __cvt.max_length() + 1;
00069 
00070       codecvt_base::result __result;
00071       do
00072         {
00073           __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
00074           auto __outnext = &__outstr.front() + __outchars;
00075           auto const __outlast = &__outstr.back() + 1;
00076           __result = (__cvt.*__fn)(__state, __next, __last, __next,
00077                                         __outnext, __outlast, __outnext);
00078           __outchars = __outnext - &__outstr.front();
00079         }
00080       while (__result == codecvt_base::partial && __next != __last
00081              && (__outstr.size() - __outchars) < __maxlen);
00082 
00083       if (__result == codecvt_base::error)
00084         {
00085           __count = __next - __first;
00086           return false;
00087         }
00088 
00089       if (__result == codecvt_base::noconv)
00090         {
00091           __outstr.assign(__first, __last);
00092           __count = __last - __first;
00093         }
00094       else
00095         {
00096           __outstr.resize(__outchars);
00097           __count = __next - __first;
00098         }
00099 
00100       return true;
00101     }
00102 
00103   // Convert narrow character string to wide.
00104   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00105     inline bool
00106     __str_codecvt_in(const char* __first, const char* __last,
00107                      basic_string<_CharT, _Traits, _Alloc>& __outstr,
00108                      const codecvt<_CharT, char, _State>& __cvt,
00109                      _State& __state, size_t& __count)
00110     {
00111       using _Codecvt = codecvt<_CharT, char, _State>;
00112       using _ConvFn
00113         = codecvt_base::result
00114           (_Codecvt::*)(_State&, const char*, const char*, const char*&,
00115                         _CharT*, _CharT*, _CharT*&) const;
00116       _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
00117       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00118                               __count, __fn);
00119     }
00120 
00121   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00122     inline bool
00123     __str_codecvt_in(const char* __first, const char* __last,
00124                      basic_string<_CharT, _Traits, _Alloc>& __outstr,
00125                      const codecvt<_CharT, char, _State>& __cvt)
00126     {
00127       _State __state = {};
00128       size_t __n;
00129       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
00130     }
00131 
00132   // Convert wide character string to narrow.
00133   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00134     inline bool
00135     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00136                       basic_string<char, _Traits, _Alloc>& __outstr,
00137                       const codecvt<_CharT, char, _State>& __cvt,
00138                       _State& __state, size_t& __count)
00139     {
00140       using _Codecvt = codecvt<_CharT, char, _State>;
00141       using _ConvFn
00142         = codecvt_base::result
00143           (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
00144                         char*, char*, char*&) const;
00145       _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
00146       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00147                               __count, __fn);
00148     }
00149 
00150   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00151     inline bool
00152     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00153                       basic_string<char, _Traits, _Alloc>& __outstr,
00154                       const codecvt<_CharT, char, _State>& __cvt)
00155     {
00156       _State __state = {};
00157       size_t __n;
00158       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
00159     }
00160 
00161 #ifdef _GLIBCXX_USE_CHAR8_T
00162 
00163   // Convert wide character string to narrow.
00164   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00165     inline bool
00166     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00167                       basic_string<char8_t, _Traits, _Alloc>& __outstr,
00168                       const codecvt<_CharT, char8_t, _State>& __cvt,
00169                       _State& __state, size_t& __count)
00170     {
00171       using _Codecvt = codecvt<_CharT, char8_t, _State>;
00172       using _ConvFn
00173         = codecvt_base::result
00174           (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
00175                         char8_t*, char8_t*, char8_t*&) const;
00176       _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
00177       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00178                               __count, __fn);
00179     }
00180 
00181   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00182     inline bool
00183     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00184                       basic_string<char8_t, _Traits, _Alloc>& __outstr,
00185                       const codecvt<_CharT, char8_t, _State>& __cvt)
00186     {
00187       _State __state = {};
00188       size_t __n;
00189       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
00190     }
00191 
00192 #endif  // _GLIBCXX_USE_CHAR8_T
00193 
00194 #ifdef _GLIBCXX_USE_WCHAR_T
00195 
00196 _GLIBCXX_BEGIN_NAMESPACE_CXX11
00197 
00198   /// String conversions
00199   template<typename _Codecvt, typename _Elem = wchar_t,
00200            typename _Wide_alloc = allocator<_Elem>,
00201            typename _Byte_alloc = allocator<char>>
00202     class wstring_convert
00203     {
00204     public:
00205       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
00206       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
00207       typedef typename _Codecvt::state_type                        state_type;
00208       typedef typename wide_string::traits_type::int_type          int_type;
00209 
00210       /// Default constructor.
00211       wstring_convert() : _M_cvt(new _Codecvt()) { }
00212 
00213       /** Constructor.
00214        *
00215        * @param  __pcvt The facet to use for conversions.
00216        *
00217        * Takes ownership of @p __pcvt and will delete it in the destructor.
00218        */
00219       explicit
00220       wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
00221       {
00222         if (!_M_cvt)
00223           __throw_logic_error("wstring_convert");
00224       }
00225 
00226       /** Construct with an initial converstion state.
00227        *
00228        * @param  __pcvt The facet to use for conversions.
00229        * @param  __state Initial conversion state.
00230        *
00231        * Takes ownership of @p __pcvt and will delete it in the destructor.
00232        * The object's conversion state will persist between conversions.
00233        */
00234       wstring_convert(_Codecvt* __pcvt, state_type __state)
00235       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
00236       {
00237         if (!_M_cvt)
00238           __throw_logic_error("wstring_convert");
00239       }
00240 
00241       /** Construct with error strings.
00242        *
00243        * @param  __byte_err A string to return on failed conversions.
00244        * @param  __wide_err A wide string to return on failed conversions.
00245        */
00246       explicit
00247       wstring_convert(const byte_string& __byte_err,
00248                       const wide_string& __wide_err = wide_string())
00249       : _M_cvt(new _Codecvt),
00250         _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
00251         _M_with_strings(true)
00252       {
00253         if (!_M_cvt)
00254           __throw_logic_error("wstring_convert");
00255       }
00256 
00257       ~wstring_convert() = default;
00258 
00259       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00260       // 2176. Special members for wstring_convert and wbuffer_convert
00261       wstring_convert(const wstring_convert&) = delete;
00262       wstring_convert& operator=(const wstring_convert&) = delete;
00263 
00264       /// @{ Convert from bytes.
00265       wide_string
00266       from_bytes(char __byte)
00267       {
00268         char __bytes[2] = { __byte };
00269         return from_bytes(__bytes, __bytes+1);
00270       }
00271 
00272       wide_string
00273       from_bytes(const char* __ptr)
00274       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
00275 
00276       wide_string
00277       from_bytes(const byte_string& __str)
00278       {
00279         auto __ptr = __str.data();
00280         return from_bytes(__ptr, __ptr + __str.size());
00281       }
00282 
00283       wide_string
00284       from_bytes(const char* __first, const char* __last)
00285       {
00286         if (!_M_with_cvtstate)
00287           _M_state = state_type();
00288         wide_string __out{ _M_wide_err_string.get_allocator() };
00289         if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
00290                              _M_count))
00291           return __out;
00292         if (_M_with_strings)
00293           return _M_wide_err_string;
00294         __throw_range_error("wstring_convert::from_bytes");
00295       }
00296       /// @}
00297 
00298       /// @{ Convert to bytes.
00299       byte_string
00300       to_bytes(_Elem __wchar)
00301       {
00302         _Elem __wchars[2] = { __wchar };
00303         return to_bytes(__wchars, __wchars+1);
00304       }
00305 
00306       byte_string
00307       to_bytes(const _Elem* __ptr)
00308       {
00309         return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
00310       }
00311 
00312       byte_string
00313       to_bytes(const wide_string& __wstr)
00314       {
00315         auto __ptr = __wstr.data();
00316         return to_bytes(__ptr, __ptr + __wstr.size());
00317       }
00318 
00319       byte_string
00320       to_bytes(const _Elem* __first, const _Elem* __last)
00321       {
00322         if (!_M_with_cvtstate)
00323           _M_state = state_type();
00324         byte_string __out{ _M_byte_err_string.get_allocator() };
00325         if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
00326                               _M_count))
00327           return __out;
00328         if (_M_with_strings)
00329           return _M_byte_err_string;
00330         __throw_range_error("wstring_convert::to_bytes");
00331       }
00332       /// @}
00333 
00334       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00335       // 2174. wstring_convert::converted() should be noexcept
00336       /// The number of elements successfully converted in the last conversion.
00337       size_t converted() const noexcept { return _M_count; }
00338 
00339       /// The final conversion state of the last conversion.
00340       state_type state() const { return _M_state; }
00341 
00342     private:
00343       unique_ptr<_Codecvt>      _M_cvt;
00344       byte_string               _M_byte_err_string;
00345       wide_string               _M_wide_err_string;
00346       state_type                _M_state = state_type();
00347       size_t                    _M_count = 0;
00348       bool                      _M_with_cvtstate = false;
00349       bool                      _M_with_strings = false;
00350     };
00351 
00352 _GLIBCXX_END_NAMESPACE_CXX11
00353 
00354   /// Buffer conversions
00355   template<typename _Codecvt, typename _Elem = wchar_t,
00356            typename _Tr = char_traits<_Elem>>
00357     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
00358     {
00359       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
00360 
00361     public:
00362       typedef typename _Codecvt::state_type state_type;
00363 
00364       /// Default constructor.
00365       wbuffer_convert() : wbuffer_convert(nullptr) { }
00366 
00367       /** Constructor.
00368        *
00369        * @param  __bytebuf The underlying byte stream buffer.
00370        * @param  __pcvt    The facet to use for conversions.
00371        * @param  __state   Initial conversion state.
00372        *
00373        * Takes ownership of @p __pcvt and will delete it in the destructor.
00374        */
00375       explicit
00376       wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
00377                       state_type __state = state_type())
00378       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
00379       {
00380         if (!_M_cvt)
00381           __throw_logic_error("wbuffer_convert");
00382 
00383         _M_always_noconv = _M_cvt->always_noconv();
00384 
00385         if (_M_buf)
00386           {
00387             this->setp(_M_put_area, _M_put_area + _S_buffer_length);
00388             this->setg(_M_get_area + _S_putback_length,
00389                        _M_get_area + _S_putback_length,
00390                        _M_get_area + _S_putback_length);
00391           }
00392       }
00393 
00394       ~wbuffer_convert() = default;
00395 
00396       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00397       // 2176. Special members for wstring_convert and wbuffer_convert
00398       wbuffer_convert(const wbuffer_convert&) = delete;
00399       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
00400 
00401       streambuf* rdbuf() const noexcept { return _M_buf; }
00402 
00403       streambuf*
00404       rdbuf(streambuf *__bytebuf) noexcept
00405       {
00406         auto __prev = _M_buf;
00407         _M_buf = __bytebuf;
00408         return __prev;
00409       }
00410 
00411       /// The conversion state following the last conversion.
00412       state_type state() const noexcept { return _M_state; }
00413 
00414     protected:
00415       int
00416       sync()
00417       { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
00418 
00419       typename _Wide_streambuf::int_type
00420       overflow(typename _Wide_streambuf::int_type __out)
00421       {
00422         if (!_M_buf || !_M_conv_put())
00423           return _Tr::eof();
00424         else if (!_Tr::eq_int_type(__out, _Tr::eof()))
00425           return this->sputc(__out);
00426         return _Tr::not_eof(__out);
00427       }
00428 
00429       typename _Wide_streambuf::int_type
00430       underflow()
00431       {
00432         if (!_M_buf)
00433           return _Tr::eof();
00434 
00435         if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
00436           return _Tr::to_int_type(*this->gptr());
00437         else
00438           return _Tr::eof();
00439       }
00440 
00441       streamsize
00442       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
00443       {
00444         if (!_M_buf || __n == 0)
00445           return 0;
00446         streamsize __done = 0;
00447         do
00448         {
00449           auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
00450                                            __n - __done);
00451           _Tr::copy(this->pptr(), __s + __done, __nn);
00452           this->pbump(__nn);
00453           __done += __nn;
00454         } while (__done < __n && _M_conv_put());
00455         return __done;
00456       }
00457 
00458     private:
00459       // fill the get area from converted contents of the byte stream buffer
00460       bool
00461       _M_conv_get()
00462       {
00463         const streamsize __pb1 = this->gptr() - this->eback();
00464         const streamsize __pb2 = _S_putback_length;
00465         const streamsize __npb = std::min(__pb1, __pb2);
00466 
00467         _Tr::move(_M_get_area + _S_putback_length - __npb,
00468                   this->gptr() - __npb, __npb);
00469 
00470         streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
00471         __nbytes = std::min(__nbytes, _M_buf->in_avail());
00472         if (__nbytes < 1)
00473           __nbytes = 1;
00474         __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
00475         if (__nbytes < 1)
00476           return false;
00477         __nbytes += _M_unconv;
00478 
00479         // convert _M_get_buf into _M_get_area
00480 
00481         _Elem* __outbuf = _M_get_area + _S_putback_length;
00482         _Elem* __outnext = __outbuf;
00483         const char* __bnext = _M_get_buf;
00484 
00485         codecvt_base::result __result;
00486         if (_M_always_noconv)
00487           __result = codecvt_base::noconv;
00488         else
00489           {
00490             _Elem* __outend = _M_get_area + _S_buffer_length;
00491 
00492             __result = _M_cvt->in(_M_state,
00493                                   __bnext, __bnext + __nbytes, __bnext,
00494                                   __outbuf, __outend, __outnext);
00495           }
00496 
00497         if (__result == codecvt_base::noconv)
00498           {
00499             // cast is safe because noconv means _Elem is same type as char
00500             auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
00501             _Tr::copy(__outbuf, __get_buf, __nbytes);
00502             _M_unconv = 0;
00503             return true;
00504           }
00505 
00506         if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
00507           char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
00508 
00509         this->setg(__outbuf, __outbuf, __outnext);
00510 
00511         return __result != codecvt_base::error;
00512       }
00513 
00514       // unused
00515       bool
00516       _M_put(...)
00517       { return false; }
00518 
00519       bool
00520       _M_put(const char* __p, streamsize __n)
00521       {
00522         if (_M_buf->sputn(__p, __n) < __n)
00523           return false;
00524         return true;
00525       }
00526 
00527       // convert the put area and write to the byte stream buffer
00528       bool
00529       _M_conv_put()
00530       {
00531         _Elem* const __first = this->pbase();
00532         const _Elem* const __last = this->pptr();
00533         const streamsize __pending = __last - __first;
00534 
00535         if (_M_always_noconv)
00536           return _M_put(__first, __pending);
00537 
00538         char __outbuf[2 * _S_buffer_length];
00539 
00540         const _Elem* __next = __first;
00541         const _Elem* __start;
00542         do
00543           {
00544             __start = __next;
00545             char* __outnext = __outbuf;
00546             char* const __outlast = __outbuf + sizeof(__outbuf);
00547             auto __result = _M_cvt->out(_M_state, __next, __last, __next,
00548                                         __outnext, __outlast, __outnext);
00549             if (__result == codecvt_base::error)
00550               return false;
00551             else if (__result == codecvt_base::noconv)
00552               return _M_put(__next, __pending);
00553 
00554             if (!_M_put(__outbuf, __outnext - __outbuf))
00555               return false;
00556           }
00557         while (__next != __last && __next != __start);
00558 
00559         if (__next != __last)
00560           _Tr::move(__first, __next, __last - __next);
00561 
00562         this->pbump(__first - __next);
00563         return __next != __first;
00564       }
00565 
00566       streambuf*                _M_buf;
00567       unique_ptr<_Codecvt>      _M_cvt;
00568       state_type                _M_state;
00569 
00570       static const streamsize   _S_buffer_length = 32;
00571       static const streamsize   _S_putback_length = 3;
00572       _Elem                     _M_put_area[_S_buffer_length];
00573       _Elem                     _M_get_area[_S_buffer_length];
00574       streamsize                _M_unconv = 0;
00575       char                      _M_get_buf[_S_buffer_length-_S_putback_length];
00576       bool                      _M_always_noconv;
00577     };
00578 
00579 #endif  // _GLIBCXX_USE_WCHAR_T
00580 
00581   /// @} group locales
00582 
00583 _GLIBCXX_END_NAMESPACE_VERSION
00584 } // namespace
00585 
00586 #endif // __cplusplus
00587 
00588 #endif /* _LOCALE_CONV_H */