Go to the documentation of this file.
37 return mIter == mString->mData.begin();
42 return mIter == mString->mData.end();
47 return mIter - mString->mData.begin();
52 mIter = mString->mData.begin() + index;
58 return mString->getChar( current_index );
64 int change = mString->setChar( current_index, uc );
65 _jump_to( current_index );
72 if ( _test_end() )
return;
78 lead_half = mIter[-1];
88 if ( _test_begin() )
return;
93 lead_half = mIter[-1];
206 return _getCharacter();
211 return _setCharacter( uc );
325 return _getCharacter();
543 #if MYGUI_IS_NATIVE_WCHAR_T
609 return mData.max_size();
614 mData.reserve(
size );
619 mData.resize( num, val );
624 mData.swap( from.mData );
629 return mData.empty();
634 return mData.c_str();
644 return mData.capacity();
657 tmp.mData.swap(
data );
669 #if MYGUI_IS_NATIVE_WCHAR_T
673 mData.push_back(
static_cast<code_point>( val ) );
679 mData.push_back( val );
684 mData.push_back(
static_cast<code_point>( val ) );
700 return *m_buffer.mStrBuffer;
706 return m_buffer.mStrBuffer->c_str();
711 _load_buffer_UTF32();
712 return *m_buffer.mUTF32StrBuffer;
717 _load_buffer_UTF32();
718 return m_buffer.mUTF32StrBuffer->c_str();
724 return *m_buffer.mWStrBuffer;
730 return m_buffer.mWStrBuffer->c_str();
735 return mData.at( loc );
740 return mData.at( loc );
753 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
770 if ( newSize > existingSize ) {
772 insert( loc + 1, 1, cp[1] );
775 if ( newSize < existingSize ) {
783 if ( l == 2 )
at( loc + 1 ) = cp[1];
790 i.
mIter = mData.begin();
806 i.
mIter = mData.end();
822 i.
mIter = mData.end();
838 i.
mIter = mData.begin();
859 mData.assign( str.mData );
871 mData.assign( str, num );
877 mData.assign( str.mData, index,
len );
883 mData.assign( num, ch );
890 mData.reserve( wstr.length() );
891 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
893 std::wstring::const_iterator i, ie = wstr.end();
894 for ( i = wstr.begin(); i != ie; i++ ) {
896 mData.push_back( tmp );
898 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
901 std::wstring::const_iterator i, ie = wstr.end();
902 for ( i = wstr.begin(); i != ie; i++ ) {
905 if ( l > 0 ) mData.push_back( cp[0] );
906 if ( l > 1 ) mData.push_back( cp[1] );
912 #if MYGUI_IS_NATIVE_WCHAR_T
938 unsigned char utf8buf[7];
945 std::string::const_iterator i, ie = str.end();
946 for ( i = str.begin(); i != ie; i++ ) {
948 for (
size_t j = 0; j < utf8len; j++ ) {
949 utf8buf[j] = (
static_cast<unsigned char>( *( i + j ) ) );
951 utf8buf[utf8len] = 0;
956 append( utf16buff, utf16len );
963 std::string tmp(
c_str );
976 mData.append( str.mData );
988 mData.append( str.mData, index,
len );
994 mData.append( str, num );
1000 mData.append( num, ch );
1010 #if MYGUI_IS_NATIVE_WCHAR_T
1013 std::wstring tmp( w_str, num );
1061 mData.insert( index, str.mData );
1067 mData.insert( index1, str.mData, index2, num );
1078 mData.insert( index, str, num );
1082 #if MYGUI_IS_NATIVE_WCHAR_T
1100 mData.insert( index, num, ch );
1104 #if MYGUI_IS_NATIVE_WCHAR_T
1123 return insert( index, num, cp[0] );
1127 insert( index, 1, cp[1] );
1128 insert( index, 1, cp[0] );
1135 mData.insert( i.
mIter, num, ch );
1137 #if MYGUI_IS_NATIVE_WCHAR_T
1183 mData.erase( index );
1185 mData.erase( index, num );
1191 mData.replace( index1, num1, str.mData, 0,
npos );
1197 mData.replace( index1, num1, str.mData, 0, num2 );
1203 mData.replace( index1, num1, str.mData, index2, num2 );
1213 return replace( index1, num1, str, 0, num );
1218 mData.replace( index, num1, num2, ch );
1228 return replace( index1, num1, num, ch );
1233 return mData.compare( str.mData );
1238 return mData.compare( str );
1243 return mData.compare( index,
length, str.mData );
1248 return mData.compare( index,
length, str.mData, index2, length2 );
1253 return mData.compare( index,
length, str, length2 );
1256 #if MYGUI_IS_NATIVE_WCHAR_T
1259 UString tmp( w_str, length2 );
1272 return mData.find( str.
c_str(), index );
1287 #if MYGUI_IS_NATIVE_WCHAR_T
1291 return mData.find( tmp.c_str(), index,
length );
1302 return mData.find( ch, index );
1305 #if MYGUI_IS_NATIVE_WCHAR_T
1321 return mData.rfind( str.
c_str(), index );
1327 return mData.rfind( tmp.
c_str(), index, num );
1333 return mData.rfind( tmp.
c_str(), index, num );
1336 #if MYGUI_IS_NATIVE_WCHAR_T
1340 return mData.rfind( tmp.c_str(), index, num );
1351 return mData.rfind( ch, index );
1354 #if MYGUI_IS_NATIVE_WCHAR_T
1372 while ( i < num && ( index + i ) <
len ) {
1393 #if MYGUI_IS_NATIVE_WCHAR_T
1411 while ( i < num && ( index + i ) <
len ) {
1432 #if MYGUI_IS_NATIVE_WCHAR_T
1450 if ( index >
len ) index =
len - 1;
1452 while ( i < num && ( index - i ) !=
npos ) {
1474 #if MYGUI_IS_NATIVE_WCHAR_T
1492 if ( index >
len ) index =
len - 1;
1494 while ( i < num && ( index - i ) !=
npos ) {
1521 #if MYGUI_IS_NATIVE_WCHAR_T
1562 #if MYGUI_IS_NATIVE_WCHAR_T
1606 UString::operator std::string()
const
1608 return std::string(
asUTF8() );
1612 UString::operator std::wstring()
const
1614 return std::wstring(
asWStr() );
1620 if ( 0xD800 <= cp && cp <= 0xDFFF )
1627 if ( 0xD800 <= cp && cp <= 0xDBFF )
1634 if ( 0xDC00 <= cp && cp <= 0xDFFF )
1641 if ( 0xD800 <= cp && cp <= 0xDBFF )
1657 bool wordPair =
false;
1660 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1662 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1671 unsigned short cU = cp1, cL = cp2;
1675 out_uc = ( cU & 0x03FF ) << 10;
1676 out_uc |= ( cL & 0x03FF );
1684 if ( in_uc <= 0xFFFF ) {
1693 tmp =
static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
1698 tmp =
static_cast<unsigned short>(uc & 0x03FF);
1707 return ( cp & ~_cont_mask ) != _cont;
1712 if ( !( cp & 0x80 ) )
return 1;
1713 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
1714 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
1715 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
1716 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
1717 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
1733 if ( !( uc & ~0x0000007F ) )
return 1;
1734 if ( !( uc & ~0x000007FF ) )
return 2;
1735 if ( !( uc & ~0x0000FFFF ) )
return 3;
1736 if ( !( uc & ~0x001FFFFF ) )
return 4;
1737 if ( !( uc & ~0x03FFFFFF ) )
return 5;
1738 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
1756 c = in_cp[i] & _lead5_mask;
1759 c = in_cp[i] & _lead4_mask;
1762 c = in_cp[i] & _lead3_mask;
1765 c = in_cp[i] & _lead2_mask;
1768 c = in_cp[i] & _lead1_mask;
1773 for ( ++i; i <
len; i++ )
1775 if (( in_cp[i] & ~_cont_mask ) != _cont )
1782 c |= ( in_cp[i] & _cont_mask );
1795 for (
size_t i =
len - 1; i > 0; i-- ) {
1796 out_cp[i] =
static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1803 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1806 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1809 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1812 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1815 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1819 out_cp[0] =
static_cast<unsigned char>(( c ) & 0x7F);
1829 std::string tmp(
reinterpret_cast<const char*
>(
c_str ) );
1835 std::string::const_iterator i, ie = str.end();
1841 if (( *i ) & 0x80 ) {
1842 unsigned char c = ( *i );
1843 size_t contBytes = 0;
1846 if (( c & ~_lead1_mask ) == _lead1 ) {
1854 }
else if (( c & ~_lead2_mask ) == _lead2 ) {
1856 if ( c == _lead2 ) {
1858 if (( c & _lead2 ) == _cont )
1865 }
else if (( c & ~_lead3_mask ) == _lead3 ) {
1867 if ( c == _lead3 ) {
1869 if (( c & _lead3 ) == _cont )
1876 }
else if (( c & ~_lead4_mask ) == _lead4 ) {
1878 if ( c == _lead4 ) {
1880 if (( c & _lead4 ) == _cont )
1887 }
else if (( c & ~_lead5_mask ) == _lead5 ) {
1889 if ( c == _lead5 ) {
1891 if (( c & _lead5 ) == _cont )
1900 while ( contBytes-- ) {
1902 if (( c & ~_cont_mask ) != _cont )
1915 void UString::_init()
1917 m_buffer.mVoidBuffer =
nullptr;
1918 m_bufferType = bt_none;
1922 void UString::_cleanBuffer()
const
1924 if ( m_buffer.mVoidBuffer !=
nullptr ) {
1925 switch ( m_bufferType ) {
1927 delete m_buffer.mStrBuffer;
1930 delete m_buffer.mWStrBuffer;
1932 case bt_utf32string:
1933 delete m_buffer.mUTF32StrBuffer;
1938 assert(
"This should never happen - mVoidBuffer should never contain something if we "
1939 "don't know the type");
1942 m_buffer.mVoidBuffer =
nullptr;
1944 m_bufferType = bt_none;
1948 void UString::_getBufferStr()
const
1950 if ( m_bufferType != bt_string ) {
1952 m_buffer.mStrBuffer =
new std::string();
1953 m_bufferType = bt_string;
1955 m_buffer.mStrBuffer->clear();
1958 void UString::_getBufferWStr()
const
1960 if ( m_bufferType != bt_wstring ) {
1962 m_buffer.mWStrBuffer =
new std::wstring();
1963 m_bufferType = bt_wstring;
1965 m_buffer.mWStrBuffer->clear();
1968 void UString::_getBufferUTF32Str()
const
1970 if ( m_bufferType != bt_utf32string ) {
1973 m_bufferType = bt_utf32string;
1975 m_buffer.mUTF32StrBuffer->clear();
1978 void UString::_load_buffer_UTF8()
const
1981 std::string& buffer = ( *m_buffer.mStrBuffer );
1982 buffer.reserve(
length() );
1984 unsigned char utf8buf[6];
1985 char* charbuf = (
char* )utf8buf;
1991 c = i.getCharacter();
1995 buffer.push_back( charbuf[j++] );
1999 void UString::_load_buffer_WStr()
const
2002 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2003 buffer.reserve(
length() );
2004 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2006 for ( i =
begin(); i != ie; ++i ) {
2007 buffer.push_back((
wchar_t )( *i ) );
2009 #else // wchar_t fits UTF-32
2013 c = i.getCharacter();
2014 buffer.push_back((
wchar_t )c );
2019 void UString::_load_buffer_UTF32()
const
2021 _getBufferUTF32Str();
2022 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2023 buffer.reserve(
length() );
2029 c = i.getCharacter();
2030 buffer.push_back( c );
bool operator==(const UString &right) const
equality operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
_rev_iterator & operator--()
pre-decrement
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
_rev_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
void _jump_to(size_type index)
const code_point * data() const
returns a pointer to the first character in the current string
const value_type & operator[](difference_type n) const
dereference at offset operator
void push_back(unicode_char val)
appends val to the end of the string
_fwd_iterator & operator++()
pre-increment
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
void _become(const _base_iterator &i)
_const_fwd_iterator operator+(difference_type n)
addition operator
uint32 unicode_char
a single 32-bit Unicode character
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
float len(float x, float y)
const value_type & operator[](difference_type n) const
dereference at offset operator
_fwd_iterator iterator
iterator
code_point & operator[](size_type index)
code point dereference operator
size_t size_type
size type used to indicate string size and character positions within the string
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
unicode_char _getCharacter() const
forward iterator for UString
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
bool operator!=(const UString &right) const
inequality operator
_fwd_iterator & operator=(const _fwd_iterator &i)
bool operator<(const UString &right) const
less than operator
UString()
default constructor, creates an empty string
_const_rev_iterator & operator--()
pre-decrement
const code_point * c_str() const
returns a pointer to the first character in the current string
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
base iterator class for UString
code_point value_type
value type typedef for use in iterators
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
uint16 code_point
a single UTF-16 code point
iterator begin()
returns an iterator to the first element of the string
UString & append(const UString &str)
appends str on to the end of the current string
size_type _get_index() const
value_type & operator*() const
dereference operator
_fwd_iterator operator-(difference_type n)
subtraction operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
_fwd_iterator operator+(difference_type n)
addition operator
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
const forward iterator for UString
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
std::basic_string< code_point > dstring
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
_rev_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator operator-(difference_type n)
subtraction operator
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
forward iterator for UString
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
static const size_type npos
the usual constant representing: not found, no limit, etc
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
void clear()
deletes all of the elements in the string
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
size_type length() const
Returns the number of code points in the current string.
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
bool operator>(const UString &right) const
greater than operator
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
_fwd_iterator & operator--()
pre-decrement
const value_type & operator*() const
dereference operator
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
void swap(UString &from)
exchanges the elements of the current string with those of from
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator const_iterator
const iterator
_const_fwd_iterator & operator++()
pre-increment
const value_type & operator*() const
dereference operator
value_type & operator*() const
dereference operator
size_type size() const
Returns the number of code points in the current string.
void reserve(size_type size)
sets the capacity of the string to at least size code points
value_type & operator[](difference_type n) const
dereference at offset operator
void _seekFwd(size_type c)
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
_const_rev_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_rev_iterator & operator++()
pre-increment
int compare(const UString &str) const
compare str to the current string
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
_const_rev_iterator operator+(difference_type n)
addition operator
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
ptrdiff_t difference_type
bool operator>=(const UString &right) const
greater than or equal operator
const reverse iterator for UString
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
bool empty() const
returns true if the string has no elements, false otherwise
int _setCharacter(unicode_char uc)
void _seekRev(size_type c)
iterator end()
returns an iterator just past the end of the string
_const_fwd_iterator & operator=(const _const_fwd_iterator &i)
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
bool operator<=(const UString &right) const
less than or equal operator
_const_fwd_iterator & operator--()
pre-decrement
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
_rev_iterator & operator+=(difference_type n)
addition assignment operator
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
_const_rev_iterator & operator++()
pre-increment
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
value_type & operator[](difference_type n) const
dereference at offset operator