1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 4 ********************************************************************** 5 * Copyright (C) 1998-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 11 * Modification History: 13 * Date Name Description 14 * 09/25/98 stephen Creation. 15 * 11/11/98 stephen Changed per 11/9 code review. 16 * 04/20/99 stephen Overhauled per 4/16 code review. 17 * 11/18/99 aliu Made to inherit from Replaceable. Added method 18 * handleReplaceBetween(); other methods unchanged. 19 * 06/25/01 grhoten Remove dependency on iostream. 20 ****************************************************************************** 33 #if U_SHOW_CPLUSPLUS_API 36 #include <string_view> 56 #if !UCONFIG_NO_BREAK_ITERATION 57 classBreakIterator;
// unicode/brkiter.h 63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. 75 char16_t *dest, int32_t destCapacity,
76 const char16_t *src, int32_t srcLength,
82 classLocale;
// unicode/locid.h 83 classStringCharacterIterator;
84 classUnicodeStringAppendable;
// unicode/appendable.h 86 /* The <iostream> include has been moved to unicode/ustream.h */ 98 #define US_INV icu::UnicodeString::kInvariant 120 #if !U_CHAR16_IS_TYPEDEF 121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length) 123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length) 135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 144 #ifndef UNISTR_FROM_CHAR_EXPLICIT 145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 146 // Auto-"explicit" in ICU library code. 147 # define UNISTR_FROM_CHAR_EXPLICIT explicit 149 // Empty by default for source code compatibility. 150 # define UNISTR_FROM_CHAR_EXPLICIT 164 #ifndef UNISTR_FROM_STRING_EXPLICIT 165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 166 // Auto-"explicit" in ICU library code. 167 # define UNISTR_FROM_STRING_EXPLICIT explicit 169 // Empty by default for source code compatibility. 170 # define UNISTR_FROM_STRING_EXPLICIT 207 #ifndef UNISTR_OBJECT_SIZE 208 # define UNISTR_OBJECT_SIZE 64 324 //======================================== 325 // Read-only operations 326 //======================================== 328 /* Comparison - bitwise only - for international comparison use collation */ 354 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
356 std::u16string_view sv(internal::toU16StringView(text));
357 uint32_t len;
// unsigned to avoid a compiler warning 358 return !isBogus() && (len =
length()) == sv.length() && doEquals(sv.data(), len);
387 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
456 inline int8_t compare(int32_t start,
477 inline int8_t compare(int32_t start,
481 int32_t srcLength)
const;
496 int32_t srcLength)
const;
512 inline int8_t compare(int32_t start,
514 const char16_t *srcChars)
const;
533 inline int8_t compare(int32_t start,
535 const char16_t *srcChars,
537 int32_t srcLength)
const;
556 inline int8_t compareBetween(int32_t start,
560 int32_t srcLimit)
const;
579 inline int8_t compareCodePointOrder(
constUnicodeString& text)
const;
600 inline int8_t compareCodePointOrder(int32_t start,
625 inline int8_t compareCodePointOrder(int32_t start,
629 int32_t srcLength)
const;
650 int32_t srcLength)
const;
671 inline int8_t compareCodePointOrder(int32_t start,
673 const char16_t *srcChars)
const;
696 inline int8_t compareCodePointOrder(int32_t start,
698 const char16_t *srcChars,
700 int32_t srcLength)
const;
723 inline int8_t compareCodePointOrderBetween(int32_t start,
727 int32_t srcLimit)
const;
747 inline int8_t caseCompare(
constUnicodeString& text, uint32_t options)
const;
769 inline int8_t caseCompare(int32_t start,
772 uint32_t options)
const;
796 inline int8_t caseCompare(int32_t start,
801 uint32_t options)
const;
824 uint32_t options)
const;
846 inline int8_t caseCompare(int32_t start,
848 const char16_t *srcChars,
849 uint32_t options)
const;
873 inline int8_t caseCompare(int32_t start,
875 const char16_t *srcChars,
878 uint32_t options)
const;
902 inline int8_t caseCompareBetween(int32_t start,
907 uint32_t options)
const;
930 int32_t srcLength)
const;
941 int32_t srcLength)
const;
952 inlineUBool startsWith(
const char16_t *srcChars,
954 int32_t srcLength)
const;
977 int32_t srcLength)
const;
988 int32_t srcLength)
const;
1000 inlineUBool endsWith(
const char16_t *srcChars,
1002 int32_t srcLength)
const;
1005 /* Searching - bitwise only */ 1027 int32_t start)
const;
1042 int32_t length)
const;
1064 int32_t length)
const;
1077 inline int32_t indexOf(
const char16_t *srcChars,
1079 int32_t start)
const;
1096 int32_t length)
const;
1118 int32_t length)
const;
1127 inline int32_t indexOf(char16_t c)
const;
1137 inline int32_t indexOf(
UChar32 c)
const;
1147 inline int32_t indexOf(char16_t c,
1148 int32_t start)
const;
1159 inline int32_t indexOf(
UChar32 c,
1160 int32_t start)
const;
1172 inline int32_t indexOf(char16_t c,
1174 int32_t length)
const;
1187 inline int32_t indexOf(
UChar32 c,
1189 int32_t length)
const;
1211 int32_t start)
const;
1226 int32_t length)
const;
1248 int32_t length)
const;
1260 inline int32_t lastIndexOf(
const char16_t *srcChars,
1262 int32_t start)
const;
1279 int32_t length)
const;
1301 int32_t length)
const;
1310 inline int32_t lastIndexOf(char16_t c)
const;
1320 inline int32_t lastIndexOf(
UChar32 c)
const;
1330 inline int32_t lastIndexOf(char16_t c,
1331 int32_t start)
const;
1342 inline int32_t lastIndexOf(
UChar32 c,
1343 int32_t start)
const;
1355 inline int32_t lastIndexOf(char16_t c,
1357 int32_t length)
const;
1370 inline int32_t lastIndexOf(
UChar32 c,
1372 int32_t length)
const;
1375 /* Character access */ 1385 inline char16_t
charAt(int32_t offset)
const;
1394 inline char16_t operator[] (int32_t offset)
const;
1496 /* Substring extraction */ 1513 inlinevoid extract(int32_t start,
1516 int32_t dstStart = 0)
const;
1552 inlinevoid extract(int32_t start,
1570 int32_t dstStart = 0)
const;
1606 int32_t startLength,
1608 int32_t targetCapacity,
1611 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1633 int32_t startLength,
1635 uint32_t targetLength)
const;
1639 #if !UCONFIG_NO_CONVERSION 1666 inline int32_t extract(int32_t start,
1667 int32_t startLength,
1669 constchar* codepage =
nullptr)
const;
1701 int32_t startLength,
1703 uint32_t targetLength,
1704 constchar *codepage)
const;
1782 template<
typename StringClass>
1789 #ifndef U_HIDE_DRAFT_API 1801 template<
typename StringClass>
1808 #endif// U_HIDE_DRAFT_API 1827 /* Length operations */ 1837 inline int32_t
length()
const;
1886 inlineUBool isEmpty()
const;
1897 inline int32_t getCapacity()
const;
1899 /* Other operations */ 1906 inline int32_t hashCode()
const;
1920 inlineUBool isBogus()
const;
1922 #ifndef U_HIDE_DRAFT_API 1924 // These type aliases are private; there is no guarantee that they will remain 1925 // aliases to the same types in subsequent versions of ICU. 1926 // Note that whether `std::u16string_view::const_iterator` is a pointer or a 1927 // class that models contiguous_iterator is platform-dependent. 1928 using unspecified_iterator = std::u16string_view::const_iterator;
1929 using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
1937 unspecified_iterator
begin()
const{
return std::u16string_view(*this).begin(); }
1943 unspecified_iterator
end()
const{
return std::u16string_view(*this).end(); }
1949 unspecified_reverse_iterator
rbegin()
const{
return std::u16string_view(*this).rbegin(); }
1955 unspecified_reverse_iterator
rend()
const{
return std::u16string_view(*this).rend(); }
1956 #endif// U_HIDE_DRAFT_API 1958 //======================================== 1960 //======================================== 1962 /* Assignment operations */ 2021 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2024 return doReplace(0,
length(), internal::toU16StringView(src));
2050 friendinlinevoid U_EXPORT2
2170 int32_t textLength);
2193 int32_t buffCapacity);
2247 /* Append operations */ 2286 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2288 return doAppend(internal::toU16StringView(src));
2357 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2359 return doAppend(internal::toU16StringView(src));
2378 #ifndef U_HIDE_DRAFT_API 2387 #endif// U_HIDE_DRAFT_API 2389 /* Insert operations */ 2434 const char16_t *srcChars,
2474 /* Replace operations */ 2534 const char16_t *srcChars,
2647 virtualvoidcopy(int32_t start, int32_t limit, int32_t dest)
override;
2649 /* Search and replace operations */ 2705 /* Remove operations */ 2726 int32_t length =
static_cast<int32_t
>(
INT32_MAX));
2737 int32_t limit =
static_cast<int32_t
>(
INT32_MAX));
2750 /* Length operations */ 2764 char16_t padChar = 0x0020);
2778 char16_t padChar = 0x0020);
2786 inlineUBool truncate(int32_t targetLength);
2795 /* Miscellaneous operations */ 2849 #if !UCONFIG_NO_BREAK_ITERATION 2957 //======================================== 2958 // Access to the internal buffer 2959 //======================================== 3058 inlineconst char16_t *getBuffer()
const;
3101 inlineoperator std::u16string_view()
const{
3102 return {getBuffer(),
static_cast<std::u16string_view::size_type
>(
length())};
3105 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3115 inlineoperator std::wstring_view()
const{
3116 const char16_t *p = getBuffer();
3117 #ifdef U_ALIASING_BARRIER 3120 return {
reinterpret_cast<constwchar_t *
>(p), (std::wstring_view::size_type)
length() };
3122 #endif// U_SIZEOF_WCHAR_T 3124 //======================================== 3126 //======================================== 3168 #ifdef U_HIDE_DRAFT_API 3190 #endif// U_HIDE_DRAFT_API 3192 #if !U_CHAR16_IS_TYPEDEF && \ 3193 (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000)) 3217 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)) 3239 UnicodeString(ConstChar16Ptr(text), -1) {}
3271 int32_t textLength);
3273 #if !U_CHAR16_IS_TYPEDEF 3294 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3323 inlineUnicodeString(
const std::nullptr_t text, int32_t textLength);
3337 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3339 fUnion.fFields.fLengthAndFlags = kShortString;
3340 doAppend(internal::toU16StringViewNullable(text));
3375 int32_t textLength);
3397 #if !U_CHAR16_IS_TYPEDEF 3410 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3432 inlineUnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3434 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 3473 #if !UCONFIG_NO_CONVERSION 3535 constchar *src, int32_t srcLength,
3661 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3663 return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3686 return readOnlyAliasFromUnicodeString(text);
3717 /* Miscellaneous operations */ 3790 //======================================== 3791 // Implementation methods 3792 //======================================== 3816 staticUnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3819 // For char* constructors. Could be made public. 3821 // For extract(char*). 3822 // We could make a toUTF8(target, capacity, errorCode) public but not 3823 // this version: New API will be cleaner if we make callers create substrings 3824 // rather than having start+length on every method, 3825 // and it should take a UErrorCode&. 3827 toUTF8(int32_t start, int32_t len,
3828 char *target, int32_t capacity)
const;
3835 return doEquals(text.getArrayStart(), len);
3837 UBool doEquals(
const char16_t *text, int32_t len)
const;
3840 doEqualsSubstring(int32_t start,
3844 int32_t srcLength)
const;
3846 UBool doEqualsSubstring(int32_t start,
3848 const char16_t *srcChars,
3850 int32_t srcLength)
const;
3853 doCompare(int32_t start,
3857 int32_t srcLength)
const;
3859 int8_t doCompare(int32_t start,
3861 const char16_t *srcChars,
3863 int32_t srcLength)
const;
3866 doCompareCodePointOrder(int32_t start,
3870 int32_t srcLength)
const;
3872 int8_t doCompareCodePointOrder(int32_t start,
3874 const char16_t *srcChars,
3876 int32_t srcLength)
const;
3879 doCaseCompare(int32_t start,
3884 uint32_t options)
const;
3887 doCaseCompare(int32_t start,
3889 const char16_t *srcChars,
3892 uint32_t options)
const;
3894 int32_t doIndexOf(char16_t c,
3896 int32_t length)
const;
3900 int32_t length)
const;
3902 int32_t doLastIndexOf(char16_t c,
3904 int32_t length)
const;
3906 int32_t doLastIndexOf(
UChar32 c,
3908 int32_t length)
const;
3910 void doExtract(int32_t start,
3913 int32_t dstStart)
const;
3915 inlinevoid doExtract(int32_t start,
3919 inline char16_t doCharAt(int32_t offset)
const;
3929 const char16_t *srcChars,
3932 UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3935 UnicodeString& doAppend(
const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3941 // calculate hash code 3942 int32_t doHashCode()
const;
3944 // get pointer to start of array 3945 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3946 inline char16_t* getArrayStart();
3947 inlineconst char16_t* getArrayStart()
const;
3949 inlineUBool hasShortLength()
const;
3950 inline int32_t getShortLength()
const;
3952 // A UnicodeString object (not necessarily its current buffer) 3953 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3954 inlineUBool isWritable()
const;
3956 // Is the current buffer writable? 3957 inlineUBool isBufferWritable()
const;
3959 // None of the following does releaseArray(). 3960 inlinevoid setZeroLength();
3961 inlinevoid setShortLength(int32_t len);
3962 inlinevoid setLength(int32_t len);
3963 inlinevoid setToEmpty();
3964 inlinevoid setArray(char16_t *array, int32_t len, int32_t capacity);
// sets length but not flags 3966 // allocate the array; result may be the stack buffer 3967 // sets refCount to 1 if appropriate 3968 // sets fArray, fCapacity, and flags 3970 // returns boolean for success or failure 3971 UBool allocate(int32_t capacity);
3973 // release the array if owned 3974 void releaseArray();
3976 // turn a bogus string into an empty one 3979 // implements assignment operator, copy constructor, and fastCopyFrom() 3982 // Copies just the fields without memory management. 3985 // Pin start and limit to acceptable values. 3986 inlinevoid pinIndex(int32_t& start)
const;
3987 inlinevoid pinIndices(int32_t& start,
3988 int32_t& length)
const;
3990 #if !UCONFIG_NO_CONVERSION 3992 /* Internal extract() using UConverter. */ 3993 int32_t doExtract(int32_t start, int32_t length,
3994 char *dest, int32_t destCapacity,
3999 * Real constructor for converting from codepage data. 4000 * It assumes that it is called with !fRefCounted. 4002 * If `codepage==0`, then the default converter 4003 * is used for the platform encoding. 4004 * If `codepage` is an empty string (`""`), 4005 * then a simple conversion is performed on the codepage-invariant 4006 * subset ("invariant characters") of the platform encoding. See utypes.h. 4008 void doCodepageCreate(
constchar *codepageData,
4010 constchar *codepage);
4013 * Worker function for creating a UnicodeString from 4014 * a codepage string using a UConverter. 4017 doCodepageCreate(
constchar *codepageData,
4025 * This function is called when write access to the array 4028 * We need to make a copy of the array if 4029 * the buffer is read-only, or 4030 * the buffer is refCounted (shared), and refCount>1, or 4031 * the buffer is too small. 4033 * Return false if memory could not be allocated. 4035 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
4036 int32_t growCapacity = -1,
4037 UBool doCopyArray =
true,
4038 int32_t** pBufferToDelete =
nullptr,
4039 UBool forceClone =
false);
4047 caseMap(int32_t caseLocale, uint32_t options,
4055 int32_t removeRef();
4056 int32_t refCount()
const;
4066 kInvalidUChar=0xffff,
// U+FFFF returned by charAt(invalid index) 4067 kInvalidHashCode=0,
// invalid hash code 4068 kEmptyHashCode=1,
// hash code for empty string 4070 // bit flag values for fLengthAndFlags 4071 kIsBogus=1,
// this string is bogus, i.e., not valid or nullptr 4072 kUsingStackBuffer=2,
// using fUnion.fStackFields instead of fUnion.fFields 4073 kRefCounted=4,
// there is a refCount field before the characters in fArray 4074 kBufferIsReadonly=8,
// do not write to this buffer 4075 kOpenGetBuffer=16,
// getBuffer(minCapacity) was called (is "open"), 4076 // and releaseBuffer(newLength) must be called 4077 kAllStorageFlags=0x1f,
4079 kLengthShift=5,
// remaining 11 bits for non-negative short length, or negative if long 4080 kLength1=1<<kLengthShift,
4081 kMaxShortLength=0x3ff,
// max non-negative short length (leaves top bit 0) 4082 kLengthIsLarge=0xffe0,
// short length < 0, real length is in fUnion.fFields.fLength 4084 // combined values for convenience 4085 kShortString=kUsingStackBuffer,
4086 kLongString=kRefCounted,
4087 kReadonlyAlias=kBufferIsReadonly,
4091 friendclassUnicodeStringAppendable;
4093 unionStackBufferOrFields;
// forward declaration necessary before friend declaration 4094 friendunionStackBufferOrFields;
// make US_STACKBUF_SIZE visible inside fUnion 4097 * The following are all the class fields that are stored 4098 * in each UnicodeString object. 4099 * Note that UnicodeString has virtual functions, 4100 * therefore there is an implicit vtable pointer 4101 * as the first real field. 4102 * The fields should be aligned such that no padding is necessary. 4103 * On 32-bit machines, the size should be 32 bytes, 4104 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 4106 * We use a hack to achieve this. 4108 * With at least some compilers, each of the following is forced to 4109 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 4110 * rounded up with additional padding if the fields do not already fit that requirement: 4111 * - sizeof(class UnicodeString) 4112 * - offsetof(UnicodeString, fUnion) 4114 * - sizeof(fStackFields) 4116 * We optimize for the longest possible internal buffer for short strings. 4117 * fUnion.fStackFields begins with 2 bytes for storage flags 4118 * and the length of relatively short strings, 4119 * followed by the buffer for short string contents. 4120 * There is no padding inside fStackFields. 4122 * Heap-allocated and aliased strings use fUnion.fFields. 4123 * Both fStackFields and fFields must begin with the same fields for flags and short length, 4124 * that is, those must have the same memory offsets inside the object, 4125 * because the flags must be inspected in order to decide which half of fUnion is being used. 4126 * We assume that the compiler does not reorder the fields. 4128 * (Padding at the end of fFields is ok: 4129 * As long as it is no larger than fStackFields, it is not wasted space.) 4131 * For some of the history of the UnicodeString class fields layout, see 4132 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" 4133 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" 4134 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" 4136 // (implicit) *vtable; 4137 unionStackBufferOrFields {
4138 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. 4139 // Each struct of the union must begin with fLengthAndFlags. 4141 int16_t fLengthAndFlags;
// bit fields: see constants above 4142 char16_t fBuffer[US_STACKBUF_SIZE];
// buffer for short strings 4145 int16_t fLengthAndFlags;
// bit fields: see constants above 4146 int32_t fLength;
// number of characters in fArray if >127; else undefined 4147 int32_t fCapacity;
// capacity of fArray (in char16_ts) 4148 // array pointer last to minimize padding for machines with P128 data model 4149 // or pointer sizes that are not a power of 2 4150 char16_t *fArray;
// the Unicode data 4176 template<
typename S,
typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
4181 #ifndef U_FORCE_HIDE_INTERNAL_API 4187 //======================================== 4189 //======================================== 4191 //======================================== 4193 //======================================== 4196 UnicodeString::pinIndex(int32_t& start)
const 4201 }
elseif(start > length()) {
4207 UnicodeString::pinIndices(int32_t& start,
4208 int32_t& _length)
const 4211 int32_t len = length();
4214 }
elseif(start > len) {
4219 }
elseif(_length > (len - start)) {
4220 _length = (len - start);
4225 UnicodeString::getArrayStart() {
4226 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4227 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4230 inlineconst char16_t*
4231 UnicodeString::getArrayStart()
const{
4232 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4233 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4236 //======================================== 4237 // Default constructor 4238 //======================================== 4241 UnicodeString::UnicodeString() {
4242 fUnion.fStackFields.fLengthAndFlags=kShortString;
4245 inline UnicodeString::UnicodeString(
const std::nullptr_t
/*text*/) {
4246 fUnion.fStackFields.fLengthAndFlags=kShortString;
4249 inline UnicodeString::UnicodeString(
const std::nullptr_t
/*text*/, int32_t
/*length*/) {
4250 fUnion.fStackFields.fLengthAndFlags=kShortString;
4253 inline UnicodeString::UnicodeString(std::nullptr_t
/*buffer*/, int32_t
/*buffLength*/, int32_t
/*buffCapacity*/) {
4254 fUnion.fStackFields.fLengthAndFlags=kShortString;
4257 //======================================== 4258 // Read-only implementation methods 4259 //======================================== 4261 UnicodeString::hasShortLength()
const{
4262 return fUnion.fFields.fLengthAndFlags>=0;
4266 UnicodeString::getShortLength()
const{
4267 // fLengthAndFlags must be non-negative -> short length >= 0 4268 // and arithmetic or logical shift does not matter. 4269 return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4273 UnicodeString::length()
const{
4274 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4278 UnicodeString::getCapacity()
const{
4279 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4280 US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4284 UnicodeString::hashCode()
const 4285 {
return doHashCode(); }
4288 UnicodeString::isBogus()
const 4289 {
return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4292 UnicodeString::isWritable()
const 4293 {
return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4296 UnicodeString::isBufferWritable()
const 4299 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4300 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4303 inlineconst char16_t *
4304 UnicodeString::getBuffer()
const{
4305 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4307 }
elseif(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4308 return fUnion.fStackFields.fBuffer;
4310 return fUnion.fFields.fArray;
4314 //======================================== 4315 // Read-only alias methods 4316 //======================================== 4318 UnicodeString::doCompare(int32_t start,
4322 int32_t srcLength)
const 4325 returnstatic_cast<int8_t
>(!isBogus());
// 0 if both are bogus, 1 otherwise 4327 srcText.pinIndices(srcStart, srcLength);
4328 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4333 UnicodeString::doEqualsSubstring(int32_t start,
4335 const UnicodeString& srcText,
4337 int32_t srcLength)
const 4339 if(srcText.isBogus()) {
4342 srcText.pinIndices(srcStart, srcLength);
4343 return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4353 int32_t len = length(), textLength = text.
length();
4354 return !text.
isBogus() && len == textLength && doEquals(text, len);
4360 {
return (!
operator==(text)); }
4364 {
return doCompare(0, length(), text, 0, text.
length()) == 1; }
4368 {
return doCompare(0, length(), text, 0, text.
length()) == -1; }
4372 {
return doCompare(0, length(), text, 0, text.
length()) != -1; }
4376 {
return doCompare(0, length(), text, 0, text.
length()) != 1; }
4380 {
return doCompare(0, length(), text, 0, text.
length()); }
4383 UnicodeString::compare(int32_t start,
4386 {
return doCompare(start, _length, srcText, 0, srcText.
length()); }
4390 int32_t srcLength)
const 4391 {
return doCompare(0, length(), srcChars, 0, srcLength); }
4394 UnicodeString::compare(int32_t start,
4398 int32_t srcLength)
const 4399 {
return doCompare(start, _length, srcText, srcStart, srcLength); }
4402 UnicodeString::compare(int32_t start,
4404 const char16_t *srcChars)
const 4405 {
return doCompare(start, _length, srcChars, 0, _length); }
4408 UnicodeString::compare(int32_t start,
4410 const char16_t *srcChars,
4412 int32_t srcLength)
const 4413 {
return doCompare(start, _length, srcChars, srcStart, srcLength); }
4416 UnicodeString::compareBetween(int32_t start,
4420 int32_t srcLimit)
const 4421 {
return doCompare(start, limit - start,
4422 srcText, srcStart, srcLimit - srcStart); }
4425 UnicodeString::doCompareCodePointOrder(int32_t start,
4429 int32_t srcLength)
const 4432 returnstatic_cast<int8_t
>(!isBogus());
// 0 if both are bogus, 1 otherwise 4434 srcText.pinIndices(srcStart, srcLength);
4435 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4441 {
return doCompareCodePointOrder(0, length(), text, 0, text.
length()); }
4444 UnicodeString::compareCodePointOrder(int32_t start,
4447 {
return doCompareCodePointOrder(start, _length, srcText, 0, srcText.
length()); }
4451 int32_t srcLength)
const 4452 {
return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4455 UnicodeString::compareCodePointOrder(int32_t start,
4459 int32_t srcLength)
const 4460 {
return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4463 UnicodeString::compareCodePointOrder(int32_t start,
4465 const char16_t *srcChars)
const 4466 {
return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4469 UnicodeString::compareCodePointOrder(int32_t start,
4471 const char16_t *srcChars,
4473 int32_t srcLength)
const 4474 {
return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4477 UnicodeString::compareCodePointOrderBetween(int32_t start,
4481 int32_t srcLimit)
const 4482 {
return doCompareCodePointOrder(start, limit - start,
4483 srcText, srcStart, srcLimit - srcStart); }
4486 UnicodeString::doCaseCompare(int32_t start,
4491 uint32_t options)
const 4494 returnstatic_cast<int8_t
>(!isBogus());
// 0 if both are bogus, 1 otherwise 4496 srcText.pinIndices(srcStart, srcLength);
4497 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4503 return doCaseCompare(0, length(), text, 0, text.
length(), options);
4507 UnicodeString::caseCompare(int32_t start,
4510 uint32_t options)
const{
4511 return doCaseCompare(start, _length, srcText, 0, srcText.
length(), options);
4517 uint32_t options)
const{
4518 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4522 UnicodeString::caseCompare(int32_t start,
4527 uint32_t options)
const{
4528 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4532 UnicodeString::caseCompare(int32_t start,
4534 const char16_t *srcChars,
4535 uint32_t options)
const{
4536 return doCaseCompare(start, _length, srcChars, 0, _length, options);
4540 UnicodeString::caseCompare(int32_t start,
4542 const char16_t *srcChars,
4545 uint32_t options)
const{
4546 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4550 UnicodeString::caseCompareBetween(int32_t start,
4555 uint32_t options)
const{
4556 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4564 int32_t _length)
const 4567 srcText.pinIndices(srcStart, srcLength);
4569 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4577 {
return indexOf(text, 0, text.
length(), 0, length()); }
4581 int32_t start)
const{
4583 return indexOf(text, 0, text.
length(), start, length() - start);
4589 int32_t _length)
const 4590 {
return indexOf(text, 0, text.
length(), start, _length); }
4593 UnicodeString::indexOf(
const char16_t *srcChars,
4595 int32_t start)
const{
4597 return indexOf(srcChars, 0, srcLength, start, length() - start);
4604 int32_t _length)
const 4605 {
return indexOf(srcChars, 0, srcLength, start, _length); }
4608 UnicodeString::indexOf(char16_t c,
4610 int32_t _length)
const 4611 {
return doIndexOf(c, start, _length); }
4616 int32_t _length)
const 4617 {
return doIndexOf(c, start, _length); }
4620 UnicodeString::indexOf(char16_t c)
const 4621 {
return doIndexOf(c, 0, length()); }
4625 {
return indexOf(c, 0, length()); }
4628 UnicodeString::indexOf(char16_t c,
4629 int32_t start)
const{
4631 return doIndexOf(c, start, length() - start);
4636 int32_t start)
const{
4638 return indexOf(c, start, length() - start);
4645 int32_t _length)
const 4646 {
return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4649 UnicodeString::lastIndexOf(
const char16_t *srcChars,
4651 int32_t start)
const{
4653 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4661 int32_t _length)
const 4664 srcText.pinIndices(srcStart, srcLength);
4666 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4675 int32_t _length)
const 4676 {
return lastIndexOf(text, 0, text.
length(), start, _length); }
4680 int32_t start)
const{
4682 return lastIndexOf(text, 0, text.
length(), start, length() - start);
4687 {
return lastIndexOf(text, 0, text.
length(), 0, length()); }
4690 UnicodeString::lastIndexOf(char16_t c,
4692 int32_t _length)
const 4693 {
return doLastIndexOf(c, start, _length); }
4698 int32_t _length)
const{
4699 return doLastIndexOf(c, start, _length);
4703 UnicodeString::lastIndexOf(char16_t c)
const 4704 {
return doLastIndexOf(c, 0, length()); }
4708 return lastIndexOf(c, 0, length());
4712 UnicodeString::lastIndexOf(char16_t c,
4713 int32_t start)
const{
4715 return doLastIndexOf(c, start, length() - start);
4720 int32_t start)
const{
4722 return lastIndexOf(c, start, length() - start);
4727 {
return doEqualsSubstring(0, text.
length(), text, 0, text.
length()); }
4732 int32_t srcLength)
const 4733 {
return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4738 srcLength =
u_strlen(toUCharPtr(srcChars));
4740 return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4744 UnicodeString::startsWith(
const char16_t *srcChars, int32_t srcStart, int32_t srcLength)
const{
4746 srcLength =
u_strlen(toUCharPtr(srcChars));
4748 return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4753 {
return doEqualsSubstring(length() - text.
length(), text.
length(),
4754 text, 0, text.
length()); }
4759 int32_t srcLength)
const{
4760 srcText.pinIndices(srcStart, srcLength);
4761 return doEqualsSubstring(length() - srcLength, srcLength,
4762 srcText, srcStart, srcLength);
4767 int32_t srcLength)
const{
4769 srcLength =
u_strlen(toUCharPtr(srcChars));
4771 return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4775 UnicodeString::endsWith(
const char16_t *srcChars,
4777 int32_t srcLength)
const{
4779 srcLength =
u_strlen(toUCharPtr(srcChars + srcStart));
4781 return doEqualsSubstring(length() - srcLength, srcLength,
4782 srcChars, srcStart, srcLength);
4785 //======================================== 4787 //======================================== 4789 UnicodeString::replace(int32_t start,
4792 {
return doReplace(start, _length, srcText, 0, srcText.
length()); }
4795 UnicodeString::replace(int32_t start,
4800 {
return doReplace(start, _length, srcText, srcStart, srcLength); }
4803 UnicodeString::replace(int32_t start,
4807 {
return doReplace(start, _length, srcChars, 0, srcLength); }
4810 UnicodeString::replace(int32_t start,
4812 const char16_t *srcChars,
4815 {
return doReplace(start, _length, srcChars, srcStart, srcLength); }
4818 UnicodeString::replace(int32_t start,
4821 {
return doReplace(start, _length, &srcChar, 0, 1); }
4824 UnicodeString::replaceBetween(int32_t start,
4827 {
return doReplace(start, limit - start, srcText, 0, srcText.
length()); }
4830 UnicodeString::replaceBetween(int32_t start,
4835 {
return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4840 {
return findAndReplace(0, length(), oldText, 0, oldText.
length(),
4841 newText, 0, newText.
length()); }
4844 UnicodeString::findAndReplace(int32_t start,
4848 {
return findAndReplace(start, _length, oldText, 0, oldText.
length(),
4849 newText, 0, newText.
length()); }
4851 // ============================ 4853 // ============================ 4855 UnicodeString::doExtract(int32_t start,
4858 { target.
replace(0, target.
length(), *
this, start, _length); }
4861 UnicodeString::extract(int32_t start,
4864 int32_t targetStart)
const 4865 { doExtract(start, _length, target, targetStart); }
4868 UnicodeString::extract(int32_t start,
4871 { doExtract(start, _length, target); }
4873 #if !UCONFIG_NO_CONVERSION 4876 UnicodeString::extract(int32_t start,
4879 constchar *codepage)
const 4882 // This dstSize value will be checked explicitly 4883 return extract(start, _length, dst, dst !=
nullptr ? 0xffffffff : 0, codepage);
4889 UnicodeString::extractBetween(int32_t start,
4892 int32_t dstStart)
const{
4895 doExtract(start, limit - start, dst, dstStart);
4899 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit)
const{
4900 return tempSubString(start, limit - start);
4904 UnicodeString::doCharAt(int32_t offset)
const 4906 if (
static_cast<uint32_t
>(offset) <
static_cast<uint32_t
>(length())) {
4907 return getArrayStart()[offset];
4909 return kInvalidUChar;
4914 UnicodeString::charAt(int32_t offset)
const 4915 {
return doCharAt(offset); }
4918 UnicodeString::operator[] (int32_t offset)
const 4919 {
return doCharAt(offset); }
4922 UnicodeString::isEmpty()
const{
4923 // Arithmetic or logical right shift does not matter: only testing for 0. 4924 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4927 //======================================== 4928 // Write implementation methods 4929 //======================================== 4931 UnicodeString::setZeroLength() {
4932 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4936 UnicodeString::setShortLength(int32_t len) {
4937 // requires 0 <= len <= kMaxShortLength 4938 fUnion.fFields.fLengthAndFlags =
4939 static_cast<int16_t
>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4943 UnicodeString::setLength(int32_t len) {
4944 if(len <= kMaxShortLength) {
4945 setShortLength(len);
4947 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4948 fUnion.fFields.fLength = len;
4953 UnicodeString::setToEmpty() {
4954 fUnion.fFields.fLengthAndFlags = kShortString;
4958 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4960 fUnion.fFields.fArray = array;
4961 fUnion.fFields.fCapacity = capacity;
4964 inline UnicodeString&
4965 UnicodeString::operator= (char16_t ch)
4966 {
return doReplace(0, length(), &ch, 0, 1); }
4970 {
return replace(0, length(), ch); }
4978 return doReplace(0, length(), srcText, srcStart, srcLength);
4986 srcText.pinIndex(srcStart);
4987 return doReplace(0, length(), srcText, srcStart, srcText.
length() - srcStart);
4993 return copyFrom(srcText);
4997 UnicodeString::setTo(
const char16_t *srcChars,
5001 return doReplace(0, length(), srcChars, 0, srcLength);
5005 UnicodeString::setTo(char16_t srcChar)
5008 return doReplace(0, length(), &srcChar, 0, 1);
5015 return replace(0, length(), srcChar);
5022 {
return doAppend(srcText, srcStart, srcLength); }
5026 {
return doAppend(srcText, 0, srcText.
length()); }
5029 UnicodeString::append(
const char16_t *srcChars,
5032 {
return doAppend(srcChars, srcStart, srcLength); }
5037 {
return doAppend(srcChars, 0, srcLength); }
5040 UnicodeString::append(char16_t srcChar)
5041 {
return doAppend(&srcChar, 0, 1); }
5044 UnicodeString::operator+= (char16_t ch)
5045 {
return doAppend(&ch, 0, 1); }
5054 {
return doAppend(srcText, 0, srcText.
length()); }
5057 UnicodeString::insert(int32_t start,
5061 {
return doReplace(start, 0, srcText, srcStart, srcLength); }
5064 UnicodeString::insert(int32_t start,
5066 {
return doReplace(start, 0, srcText, 0, srcText.
length()); }
5069 UnicodeString::insert(int32_t start,
5070 const char16_t *srcChars,
5073 {
return doReplace(start, 0, srcChars, srcStart, srcLength); }
5076 UnicodeString::insert(int32_t start,
5079 {
return doReplace(start, 0, srcChars, 0, srcLength); }
5082 UnicodeString::insert(int32_t start,
5084 {
return doReplace(start, 0, &srcChar, 0, 1); }
5087 UnicodeString::insert(int32_t start,
5089 {
return replace(start, 0, srcChar); }
5093 UnicodeString::remove()
5095 // remove() of a bogus string makes the string empty and non-bogus 5105 UnicodeString::remove(int32_t start,
5109 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 5112 return doReplace(start, _length,
nullptr, 0, 0);
5116 UnicodeString::removeBetween(int32_t start,
5118 {
return doReplace(start, limit - start,
nullptr, 0, 0); }
5121 UnicodeString::retainBetween(int32_t start, int32_t limit) {
5123 return doReplace(0, start,
nullptr, 0, 0);
5127 UnicodeString::truncate(int32_t targetLength)
5129 if(isBogus() && targetLength == 0) {
5130 // truncate(0) of a bogus string makes the string empty and non-bogus 5133 }
elseif (
static_cast<uint32_t
>(targetLength) <
static_cast<uint32_t
>(length())) {
5134 setLength(targetLength);
5142 UnicodeString::reverse()
5143 {
return doReverse(0, length()); }
5146 UnicodeString::reverse(int32_t start,
5148 {
return doReverse(start, _length); }
5152 #endif/* U_SHOW_CPLUSPLUS_API */ C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
#define U_ALIASING_BARRIER(ptr)
Barrier for pointer anti-aliasing optimizations even across function boundaries.
The BreakIterator class implements methods for finding the location of boundaries in text.
A ByteSink can be filled with bytes.
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Records lengths of string edits but not replacement text.
A Locale object represents a specific geographical, political, or cultural region.
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
int32_t length() const
Returns the number of 16-bit code units in the text.
Implementation of ByteSink that writes to a "string".
A string-like object that points to a sized piece of memory.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
void push_back(char16_t c)
Appends the code unit c to the UnicodeString object.
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
bool operator==(const S &text) const
Equality operator.
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
UnicodeString & operator=(const S &src)
Assignment operator.
UnicodeString & append(const S &src)
Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view ...
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString & toLower()
Convert the characters in this to lower case following the conventions of the default locale.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const UnicodeString &text)
Readonly-aliasing factory method.
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
bool operator!=(const S &text) const
Inequality operator.
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t length() const
Return the length of the UnicodeString object.
unspecified_reverse_iterator rend() const
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text)
Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U...
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
StringClass toUTF8String() const
Convert the UnicodeString to a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const S &text)
Readonly-aliasing factory method.
UnicodeString & operator+=(const S &src)
Append operator.
unspecified_iterator begin() const
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UBool isBogus() const
Determine if this object contains a valid string.
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
unspecified_reverse_iterator rbegin() const
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
char16_t value_type
C++ boilerplate.
unspecified_iterator end() const
virtual UBool hasMetaData() const override
Replaceable API.
UnicodeString & toUpper()
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & trim()
Trims leading and trailing whitespace from this UnicodeString.
U_CAPI int32_t u_strlen(const UChar *s)
U_COMMON_API UnicodeString unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2)
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
UnicodeString operator+(const UnicodeString &s1, const S &s2)
Creates a new UnicodeString from the concatenation of a UnicodeString and s2 which is,...
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
int8_t UBool
The ICU boolean type, a signed-byte integer.
#define U_CAPI
This is used to declare a function as a public ICU C API.
char16_t UChar
The base type for UTF-16 code units and pointers.
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.