1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: https://www.unicode.org/copyright.html 5 // created: 2024aug12 Markus W. Scherer 7 #ifndef __UTFITERATOR_H__ 8 #define __UTFITERATOR_H__ 12 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H) 15 #if defined(__cpp_lib_ranges) 19 #include <string_view> 20 #include <type_traits> 135 #ifndef U_HIDE_DRAFT_API 170 namespaceU_HEADER_ONLY_NAMESPACE {
173 #if U_CPLUSPLUS_VERSION >= 20 176 template<
typename Iter>
180 template<
typename Iter>
184 template<
typename Iter>
188 template<
typename Iter>
192 template<
typename Range>
193 constexpr
boolrange = std::ranges::range<Range>;
198 template<
typename Iter>
202 template<
typename Iter>
206 template<
typename Iter>
209 std::forward_iterator_tag,
210 typename std::iterator_traits<Iter>::iterator_category>;
213 template<
typename Iter>
216 std::bidirectional_iterator_tag,
217 typename std::iterator_traits<Iter>::iterator_category>;
220 template<
typename Range,
typename =
void>
224 template<
typename Range>
227 std::void_t<decltype(std::declval<Range>().begin()),
228 decltype(std::declval<Range>().end())>> : std::true_type {};
231 template<
typename Range>
240 template <
typename... Args>
247 template<
typename CP32,
bool skipSurrogates>
249 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
273 if (skipSurrogates && c_ == 0xd800) {
301 template<
typename CP32>
303 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
332 template<
typename CP32>
334 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
366 template<
typename CP32,
typename UnitIter,
typename =
void>
368 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
394 UnitIter
begin()
const{
return start_; }
401 UnitIter
end()
const{
return limit_; }
409 #if U_CPLUSPLUS_VERSION >= 20 415 template<std::contiguous_iterator Iter = UnitIter>
416 std::basic_string_view<Unit>
stringView()
const{
417 return std::basic_string_view<Unit>(
begin(),
end());
425 template<typename Iter = UnitIter, typename Unit = typename std::iterator_traits<Iter>::value_type>
426 std::enable_if_t<std::is_pointer_v<Iter> ||
427 std::is_same_v<Iter, typename std::basic_string<Unit>::iterator> ||
428 std::is_same_v<Iter, typename std::basic_string<Unit>::const_iterator> ||
429 std::is_same_v<Iter, typename std::basic_string_view<Unit>::iterator> ||
430 std::is_same_v<Iter, typename std::basic_string_view<Unit>::const_iterator>,
431 std::basic_string_view<Unit>>
433 return std::basic_string_view<Unit>(&*start_, len_);
438 // Order of fields with padding and access frequency in mind. 446 // Partial template specialization for single-pass input iterator. 447 // No UnitIter field, no getter for it, no stringView(). 448 template<
typename CP32,
typename UnitIter>
449 classUnsafeCodeUnits<
452 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
453 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
462 uint8_t
length()
const{
return len_; }
465 // Order of fields with padding and access frequency in mind. 469 #endif// U_IN_DOXYGEN 486 template<
typename CP32,
typename UnitIter,
typename =
void>
509 // Partial template specialization for single-pass input iterator. 510 // No UnitIter field, no getter for it, no stringView(). 511 template<
typename CP32,
typename UnitIter>
515 std::enable_if_t<!prv::forward_iterator<UnitIter>>> :
516 public UnsafeCodeUnits<CP32, UnitIter> {
529 #endif// U_IN_DOXYGEN 531 // Validating implementations ---------------------------------------------- *** 535 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
538 // Note: readAndInc() functions take both a p0 and a p iterator. 539 // They must have the same value. 540 // For a multi-pass UnitIter, the caller must copy its p into a local variable p0, 541 // and readAndInc() copies p0 and the incremented p into the CodeUnits. 542 // For a single-pass UnitIter, which may not be default-constructible nor coypable, 543 // the caller can pass p into both references, and readAndInc() does not use p0 544 // and constructs CodeUnits without them. 545 // Moving the p0 variable into the call site avoids having to declare it inside readAndInc() 546 // which may not be possible for a single-pass iterator. 549 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
553 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
554 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
556 "For 8-bit strings, the SURROGATE option does not have an equivalent.");
558 // Handle ill-formed UTF-8 566 U_FORCE_INLINEstaticvoid inc(UnitIter &p,
const LimitIter &limit) {
567 // Very similar to U8_FWD_1(). 572 if ((0xe0 <= b && b < 0xf0)) {
581 }
else/* b >= 0xf0 */ {
592 // Very similar to U8_BACK_1(). 607 if (0xe0 <= b2 && b2 <= 0xf4) {
626 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
627 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
628 // Very similar to U8_NEXT_OR_FFFD(). 629 CP32 c = uint8_t(*p);
632 if constexpr (isMultiPass) {
633 return {c, 1,
true, p0, p};
641 // fetch/validate/assemble all but last trail byte 643 (c < 0xf0 ?
// U+0800..U+FFFF except surrogates 646 :
// U+10000..U+10FFFF 649 (c = (c << 6) | (t & 0x3f), ++length, ++p != limit) &&
650 (t = *p - 0x80) <= 0x3f) &&
651 // valid second-to-last trail byte 652 (c = (c << 6) | t, ++length, ++p != limit)
654 c >= 0xc2 && (c &= 0x1f, 1)) &&
656 (t = *p - 0x80) <= 0x3f) {
660 if constexpr (isMultiPass) {
661 return {c, length,
true, p0, p};
663 return {c, length,
true};
666 if constexpr (isMultiPass) {
667 return {sub(), length,
false, p0, p};
669 return {sub(), length,
false};
673 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
674 // Very similar to U8_PREV_OR_FFFD(). 676 CP32 c = uint8_t(*--p);
678 return {c, 1,
true, p, p0};
686 c = ((b1 - 0xc0) << 6) | (c & 0x3f);
687 return {c, 2,
true, p, p0};
691 // Truncated 3- or 4-byte sequence. 693 return {sub(), 2,
false, p, p0};
696 // Extract the value bits from the last trail byte. 699 if (0xe0 <= b2 && b2 <= 0xf4) {
704 c = (b2 << 12) | ((b1 & 0x3f) << 6) | c;
705 return {c, 3,
true, p, p0};
708 // Truncated 4-byte sequence. 710 return {sub(), 3,
false, p, p0};
714 if (0xf0 <= b3 && b3 <= 0xf4) {
718 c = (b3 << 18) | ((b2 & 0x3f) << 12) | ((b1 & 0x3f) << 6) | c;
719 return {c, 4,
true, p, p0};
725 return {sub(), 1,
false, p, p0};
730 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
734 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
735 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
737 // Handle ill-formed UTF-16: One unpaired surrogate. 746 U_FORCE_INLINEstaticvoid inc(UnitIter &p,
const LimitIter &limit) {
747 // Very similar to U16_FWD_1(). 756 // Very similar to U16_BACK_1(). 764 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
765 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
766 // Very similar to U16_NEXT_OR_FFFD(). 767 CP32 c =
static_cast<CP32
>(*p);
770 if constexpr (isMultiPass) {
771 return {c, 1,
true, p0, p};
780 if constexpr (isMultiPass) {
781 return {c, 2,
true, p0, p};
786 if constexpr (isMultiPass) {
787 return {sub(c), 1,
false, p0, p};
789 return {sub(c), 1,
false};
795 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
796 // Very similar to U16_PREV_OR_FFFD(). 798 CP32 c =
static_cast<CP32
>(*--p);
800 return {c, 1,
true, p, p0};
807 return {c, 2,
true, p, p0};
809 return {sub(c), 1,
false, p, p0};
815 // UTF-32: trivial, but still validating 816 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
820 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
821 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
823 // Handle ill-formed UTF-32 824 U_FORCE_INLINEstatic CP32 sub(
bool forSurrogate, CP32 surrogate) {
832 U_FORCE_INLINEstaticvoid inc(UnitIter &p,
const LimitIter &
/*limit*/) {
841 UnitIter &p0, UnitIter &p,
const LimitIter &
/*limit*/) {
842 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
846 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
847 if constexpr (isMultiPass) {
848 return {c, 1,
true, p0, p};
853 if constexpr (isMultiPass) {
854 return {sub(uc < 0xe000, c), 1,
false, p0, p};
856 return {sub(uc < 0xe000, c), 1,
false};
861 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> decAndRead(UnitIter
/*start*/, UnitIter &p) {
865 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
866 return {c, 1,
true, p, p0};
868 return {sub(uc < 0xe000, c), 1,
false, p, p0};
873 // Non-validating implementations ------------------------------------------ *** 875 template<
typename CP32,
typename UnitIter,
typename =
void>
879 template<
typename CP32,
typename UnitIter>
883 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
884 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
887 // Very similar to U8_FWD_1_UNSAFE(). 893 // Very similar to U8_BACK_1_UNSAFE(). 897 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
898 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
899 // Very similar to U8_NEXT_UNSAFE(). 900 CP32 c = uint8_t(*p);
903 if constexpr (isMultiPass) {
904 return {c, 1, p0, p};
909 c = ((c & 0x1f) << 6) | (*p & 0x3f);
911 if constexpr (isMultiPass) {
912 return {c, 2, p0, p};
917 // No need for (c&0xf) because the upper bits are truncated 918 // after <<12 in the cast to uint16_t. 919 c = uint16_t(c << 12) | ((*p & 0x3f) << 6);
923 if constexpr (isMultiPass) {
924 return {c, 3, p0, p};
929 c = ((c & 7) << 18) | ((*p & 0x3f) << 12);
931 c |= (*p & 0x3f) << 6;
935 if constexpr (isMultiPass) {
936 return {c, 4, p0, p};
943 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
944 // Very similar to U8_PREV_UNSAFE(). 946 CP32 c = uint8_t(*--p);
948 return {c, 1, p, p0};
950 // U8_IS_TRAIL(c) if well-formed 953 for (uint8_t shift = 6;;) {
957 c |= uint32_t{b} << shift;
960 c |= (uint32_t{b} & 0x3f) << shift;
966 return {c, count, p, p0};
971 template<
typename CP32,
typename UnitIter>
975 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
976 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
979 // Very similar to U16_FWD_1_UNSAFE(). 988 // Very similar to U16_BACK_1_UNSAFE(). 994 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
995 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
996 // Very similar to U16_NEXT_UNSAFE(). 997 CP32 c =
static_cast<CP32
>(*p);
1000 if constexpr (isMultiPass) {
1001 return {c, 1, p0, p};
1009 if constexpr (isMultiPass) {
1010 return {c, 2, p0, p};
1017 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1018 // Very similar to U16_PREV_UNSAFE(). 1020 CP32 c =
static_cast<CP32
>(*--p);
1022 return {c, 1, p, p0};
1026 return {c, 2, p, p0};
1032 template<
typename CP32,
typename UnitIter>
1036 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
1037 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1047 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
1048 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
1051 if constexpr (isMultiPass) {
1052 return {c, 1, p0, p};
1058 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1061 return {c, 1, p, p0};
1067 // Validating iterators ---------------------------------------------------- *** 1093 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
1095 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1096 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1098 // Proxy type for operator->() (required by LegacyInputIterator) 1099 // so that we don't promise always returning CodeUnits. 1120 prv::bidirectional_iterator<UnitIter>,
1121 std::bidirectional_iterator_tag,
1122 std::forward_iterator_tag>;
1138 p_(p), start_(start), limit_(limit), units_(0, 0, false, p, p) {}
1151 p_(p), start_(p), limit_(limit), units_(0, 0, false, p, p) {}
1187 return getLogicalPosition() == other.getLogicalPosition();
1196 // Asymmetric equality & nonequality with a sentinel type. 1206 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1209 return iter.getLogicalPosition() == s;
1212 #if U_CPLUSPLUS_VERSION < 20 1213 // C++17: Need to define all four combinations of == / != vs. parameter order. 1214 // Once we require C++20, we could remove all but the first == because 1215 // the compiler would generate the rest. 1225 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1228 return iter.getLogicalPosition() == s;
1238 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1249 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1263 units_ = Impl::readAndInc(p0, p_, limit_);
1280 units_ = Impl::readAndInc(p0, p_, limit_);
1283 return Proxy(units_);
1294 // operator*() called readAndInc() so p_ is already ahead. 1296 }
elseif (state_ == 0) {
1297 Impl::inc(p_, limit_);
1298 }
else/* state_ < 0 */ {
1299 // operator--() called decAndRead() so we know how far to skip. 1316 // operator*() called readAndInc() so p_ is already ahead. 1320 }
elseif (state_ == 0) {
1322 units_ = Impl::readAndInc(p0, p_, limit_);
1325 // keep this->state_ == 0 1327 }
else/* state_ < 0 */ {
1329 // operator--() called decAndRead() so we know how far to skip. 1343 template<
typename Iter = UnitIter>
1345 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator &>
1348 // operator*() called readAndInc() so p_ is ahead of the logical position. 1349 p_ = units_.begin();
1351 units_ = Impl::decAndRead(start_, p_);
1363 template<
typename Iter = UnitIter>
1365 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator>
1373 friendclassstd::reverse_iterator<
UTFIterator<CP32, behavior, UnitIter>>;
1376 return state_ <= 0 ? p_ : units_.begin();
1379 // operator*() etc. are logically const. 1380 mutable UnitIter p_;
1381 // In a validating iterator, we need start_ & limit_ so that when we read a code point 1382 // (forward or backward) we can test if there are enough code units. 1385 // Keep state so that we call readAndInc() only once for both operator*() and ++ 1386 // to make it easy for the compiler to optimize. 1387 mutable CodeUnits<CP32, UnitIter> units_;
1388 // >0: units_ = readAndInc(), p_ = units limit 1389 // which means that p_ is ahead of its logical position 1391 // <0: units_ = decAndRead(), p_ = units start 1392 mutable int8_t state_ = 0;
1395 #ifndef U_IN_DOXYGEN 1396 // Partial template specialization for single-pass input iterator. 1397 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
1400 UnitIter, LimitIter,
1401 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
1402 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1403 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1405 // Proxy type for post-increment return value, to make *iter++ work. 1406 // Also for operator->() (required by LegacyInputIterator) 1407 // so that we don't promise always returning CodeUnits. 1410 explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
1411 CodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
1412 CodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
1414 CodeUnits<CP32, UnitIter> units_;
1426 // Constructs an iterator start or limit sentinel. 1427 // Requires p to be copyable. 1437 return p_ == other.p_ && ahead_ == other.ahead_;
1438 // Strictly speaking, we should check if the logical position is the same. 1439 // However, we cannot advance, or do arithmetic with, a single-pass UnitIter. 1445 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1448 return !iter.ahead_ && iter.p_ == s;
1451 #if U_CPLUSPLUS_VERSION < 20 1454 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1457 return !iter.ahead_ && iter.p_ == s;
1462 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1468 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1475 units_ = Impl::readAndInc(p_, p_, limit_);
1483 units_ = Impl::readAndInc(p_, p_, limit_);
1486 return Proxy(units_);
1491 // operator*() called readAndInc() so p_ is already ahead. 1494 Impl::inc(p_, limit_);
1501 // operator*() called readAndInc() so p_ is already ahead. 1504 units_ = Impl::readAndInc(p_, p_, limit_);
1505 // keep this->ahead_ == false 1507 return Proxy(units_);
1511 // operator*() etc. are logically const. 1512 mutable UnitIter p_;
1513 // In a validating iterator, we need limit_ so that when we read a code point 1514 // we can test if there are enough code units. 1516 // Keep state so that we call readAndInc() only once for both operator*() and ++ 1517 // so that we can use a single-pass input iterator for UnitIter. 1518 mutable CodeUnits<CP32, UnitIter> units_ = {0, 0,
false};
1519 // true: units_ = readAndInc(), p_ = units limit 1520 // which means that p_ is ahead of its logical position 1521 // false: initial state 1522 mutablebool ahead_ =
false;
1524 #endif// U_IN_DOXYGEN 1526 }
// namespace U_HEADER_ONLY_NAMESPACE 1528 #ifndef U_IN_DOXYGEN 1529 // Bespoke specialization of reverse_iterator. 1530 // The default implementation implements reverse operator*() and ++ in a way 1531 // that does most of the same work twice for reading variable-length sequences. 1532 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1533 classstd::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter>> {
1534 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1535 using Impl = U_HEADER_ONLY_NAMESPACE::UTFImpl<CP32, behavior, UnitIter>;
1538 // Proxy type for operator->() (required by LegacyInputIterator) 1539 // so that we don't promise always returning CodeUnits. 1542 explicit Proxy(CodeUnits_ units) : units_(units) {}
1543 CodeUnits_ &operator*() {
return units_; }
1544 CodeUnits_ *operator->() {
return &units_; }
1550 using value_type = CodeUnits_;
1551 using reference = value_type;
1552 using pointer = Proxy;
1554 using iterator_category = std::bidirectional_iterator_tag;
1557 p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
1558 units_(0, 0, false, p_, p_) {}
1559 U_FORCE_INLINE reverse_iterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
1561 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
1562 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
1564 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
1565 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
1568 return getLogicalPosition() == other.getLogicalPosition();
1574 units_ = Impl::decAndRead(start_, p_);
1582 units_ = Impl::decAndRead(start_, p_);
1585 return Proxy(units_);
1590 // operator*() called decAndRead() so p_ is already behind. 1592 }
elseif (state_ == 0) {
1593 Impl::dec(start_, p_);
1594 }
else/* state_ > 0 */ {
1595 // operator--() called readAndInc() so we know how far to skip. 1596 p_ = units_.begin();
1602 U_FORCE_INLINE reverse_iterator operator++(
int) {
// post-increment 1604 // operator*() called decAndRead() so p_ is already behind. 1605 reverse_iterator result(*
this);
1608 }
elseif (state_ == 0) {
1609 units_ = Impl::decAndRead(start_, p_);
1610 reverse_iterator result(*
this);
1612 // keep this->state_ == 0 1614 }
else/* state_ > 0 */ {
1615 reverse_iterator result(*
this);
1616 // operator--() called readAndInc() so we know how far to skip. 1617 p_ = units_.begin();
1625 // operator*() called decAndRead() so p_ is behind the logical position. 1629 units_ = Impl::readAndInc(p0, p_, limit_);
1634 U_FORCE_INLINE reverse_iterator operator--(
int) {
// post-decrement 1635 reverse_iterator result(*
this);
1642 return state_ >= 0 ? p_ : units_.end();
1645 // operator*() etc. are logically const. 1646 mutable UnitIter p_;
1647 // In a validating iterator, we need start_ & limit_ so that when we read a code point 1648 // (forward or backward) we can test if there are enough code units. 1651 // Keep state so that we call decAndRead() only once for both operator*() and ++ 1652 // to make it easy for the compiler to optimize. 1653 mutable CodeUnits_ units_;
1654 // >0: units_ = readAndInc(), p_ = units limit 1656 // <0: units_ = decAndRead(), p_ = units start 1657 // which means that p_ is behind its logical position 1658 mutable int8_t state_ = 0;
1660 #endif// U_IN_DOXYGEN 1662 namespaceU_HEADER_ONLY_NAMESPACE {
1687 typename UnitIter,
typename LimitIter = UnitIter>
1690 std::move(start), std::move(p), std::move(limit));
1714 typename UnitIter,
typename LimitIter = UnitIter>
1717 std::move(p), std::move(limit));
1720 // Note: We should only enable the following factory function for a copyable UnitIter. 1721 // In C++17, we would have to partially specialize with enable_if_t testing for forward_iterator, 1722 // but a function template partial specialization is not allowed. 1723 // In C++20, we might be able to require the std::copyable concept. 1744 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1776 template<
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
1778 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1791 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
1801 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
1815 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1822 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1824 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1832 using UnitIter = decltype(unitRange.begin());
1833 using LimitIter = decltype(unitRange.end());
1834 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1835 // Return the code unit sentinel. 1836 return unitRange.end();
1837 }
elseif constexpr (prv::bidirectional_iterator<UnitIter>) {
1838 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1840 // The input iterator specialization has no three-argument constructor. 1841 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1849 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1851 using UnitIter = decltype(unitRange.begin());
1852 using LimitIter = decltype(unitRange.end());
1853 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1854 // Return the code unit sentinel. 1855 return unitRange.end();
1856 }
elseif constexpr (prv::bidirectional_iterator<UnitIter>) {
1857 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1859 // The input iterator specialization has no three-argument constructor. 1860 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1869 return std::make_reverse_iterator(
end());
1877 return std::make_reverse_iterator(
begin());
1885 template<
typename CP32, UTFIllFormedBehavior behavior>
1888 __cpp_lib_bind_back >= 2022'02
// http://wg21.link/P2387R3. 1889 : std::ranges::range_adaptor_closure<UTFStringCodePointsAdaptor<CP32, behavior>>
1893 template<
typename Range>
1895 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10// We need https://wg21.link/P2415R2. 1897 std::forward<Range>(unitRange));
1899 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
1900 // Take basic_string_view by copy, not by reference. In C++20 this is handled by 1901 // all_t<Range>, which is Range if Range is a view. 1903 std::forward<Range>(unitRange));
1925 template<
typename CP32, UTFIllFormedBehavior behavior>
1928 // Non-validating iterators ------------------------------------------------ *** 1951 template<
typename CP32,
typename UnitIter,
typename =
void>
1953 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1954 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
1956 // Proxy type for operator->() (required by LegacyInputIterator) 1957 // so that we don't promise always returning UnsafeCodeUnits. 1978 prv::bidirectional_iterator<UnitIter>,
1979 std::bidirectional_iterator_tag,
1980 std::forward_iterator_tag>;
2015 return getLogicalPosition() == other.getLogicalPosition();
2032 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2035 return iter.getLogicalPosition() == s;
2038 #if U_CPLUSPLUS_VERSION < 20 2047 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2050 return iter.getLogicalPosition() == s;
2060 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2071 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2085 units_ = Impl::readAndInc(p0, p_);
2102 units_ = Impl::readAndInc(p0, p_);
2105 return Proxy(units_);
2116 // operator*() called readAndInc() so p_ is already ahead. 2118 }
elseif (state_ == 0) {
2120 }
else/* state_ < 0 */ {
2121 // operator--() called decAndRead() so we know how far to skip. 2138 // operator*() called readAndInc() so p_ is already ahead. 2142 }
elseif (state_ == 0) {
2144 units_ = Impl::readAndInc(p0, p_);
2147 // keep this->state_ == 0 2149 }
else/* state_ < 0 */ {
2151 // operator--() called decAndRead() so we know how far to skip. 2165 template<
typename Iter = UnitIter>
2170 // operator*() called readAndInc() so p_ is ahead of the logical position. 2171 p_ = units_.begin();
2173 units_ = Impl::decAndRead(p_);
2185 template<
typename Iter = UnitIter>
2198 return state_ <= 0 ? p_ : units_.begin();
2201 // operator*() etc. are logically const. 2202 mutable UnitIter p_;
2203 // Keep state so that we call readAndInc() only once for both operator*() and ++ 2204 // to make it easy for the compiler to optimize. 2205 mutable UnsafeCodeUnits<CP32, UnitIter> units_;
2206 // >0: units_ = readAndInc(), p_ = units limit 2207 // which means that p_ is ahead of its logical position 2209 // <0: units_ = decAndRead(), p_ = units start 2210 mutable int8_t state_ = 0;
2213 #ifndef U_IN_DOXYGEN 2214 // Partial template specialization for single-pass input iterator. 2215 template<
typename CP32,
typename UnitIter>
2216 classUnsafeUTFIterator<
2219 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
2220 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2221 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
2223 // Proxy type for post-increment return value, to make *iter++ work. 2224 // Also for operator->() (required by LegacyInputIterator) 2225 // so that we don't promise always returning UnsafeCodeUnits. 2228 explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
2229 UnsafeCodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
2230 UnsafeCodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
2232 UnsafeCodeUnits<CP32, UnitIter> units_;
2236 usingvalue_type = UnsafeCodeUnits<CP32, UnitIter>;
2251 return p_ == other.p_ && ahead_ == other.ahead_;
2252 // Strictly speaking, we should check if the logical position is the same. 2253 // However, we cannot advance, or do arithmetic with, a single-pass UnitIter. 2259 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2262 return !iter.ahead_ && iter.p_ == s;
2265 #if U_CPLUSPLUS_VERSION < 20 2268 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2271 return !iter.ahead_ && iter.p_ == s;
2276 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2282 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2289 units_ = Impl::readAndInc(p_, p_);
2297 units_ = Impl::readAndInc(p_, p_);
2300 return Proxy(units_);
2305 // operator*() called readAndInc() so p_ is already ahead. 2315 // operator*() called readAndInc() so p_ is already ahead. 2318 units_ = Impl::readAndInc(p_, p_);
2319 // keep this->ahead_ == false 2321 return Proxy(units_);
2325 // operator*() etc. are logically const. 2326 mutable UnitIter p_;
2327 // Keep state so that we call readAndInc() only once for both operator*() and ++ 2328 // so that we can use a single-pass input iterator for UnitIter. 2329 mutable UnsafeCodeUnits<CP32, UnitIter> units_ = {0, 0};
2330 // true: units_ = readAndInc(), p_ = units limit 2331 // which means that p_ is ahead of its logical position 2332 // false: initial state 2333 mutablebool ahead_ =
false;
2335 #endif// U_IN_DOXYGEN 2337 }
// namespace U_HEADER_ONLY_NAMESPACE 2339 #ifndef U_IN_DOXYGEN 2340 // Bespoke specialization of reverse_iterator. 2341 // The default implementation implements reverse operator*() and ++ in a way 2342 // that does most of the same work twice for reading variable-length sequences. 2343 template<
typename CP32,
typename UnitIter>
2344 classstd::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter>> {
2345 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2346 using Impl = U_HEADER_ONLY_NAMESPACE::UnsafeUTFImpl<CP32, UnitIter>;
2349 // Proxy type for operator->() (required by LegacyInputIterator) 2350 // so that we don't promise always returning UnsafeCodeUnits. 2353 explicit Proxy(UnsafeCodeUnits_ units) : units_(units) {}
2354 UnsafeCodeUnits_ &operator*() {
return units_; }
2355 UnsafeCodeUnits_ *operator->() {
return &units_; }
2357 UnsafeCodeUnits_ units_;
2361 using value_type = UnsafeCodeUnits_;
2362 using reference = value_type;
2363 using pointer = Proxy;
2365 using iterator_category = std::bidirectional_iterator_tag;
2368 p_(iter.getLogicalPosition()), units_(0, 0, p_, p_) {}
2369 U_FORCE_INLINE reverse_iterator() : p_{}, units_(0, 0, p_, p_) {}
2371 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
2372 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
2374 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
2375 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
2378 return getLogicalPosition() == other.getLogicalPosition();
2384 units_ = Impl::decAndRead(p_);
2392 units_ = Impl::decAndRead(p_);
2395 return Proxy(units_);
2400 // operator*() called decAndRead() so p_ is already behind. 2402 }
elseif (state_ == 0) {
2404 }
else/* state_ > 0 */ {
2405 // operator--() called readAndInc() so we know how far to skip. 2406 p_ = units_.begin();
2412 U_FORCE_INLINE reverse_iterator operator++(
int) {
// post-increment 2414 // operator*() called decAndRead() so p_ is already behind. 2415 reverse_iterator result(*
this);
2418 }
elseif (state_ == 0) {
2419 units_ = Impl::decAndRead(p_);
2420 reverse_iterator result(*
this);
2422 // keep this->state_ == 0 2424 }
else/* state_ > 0 */ {
2425 reverse_iterator result(*
this);
2426 // operator--() called readAndInc() so we know how far to skip. 2427 p_ = units_.begin();
2435 // operator*() called decAndRead() so p_ is behind the logical position. 2439 units_ = Impl::readAndInc(p0, p_);
2444 U_FORCE_INLINE reverse_iterator operator--(
int) {
// post-decrement 2445 reverse_iterator result(*
this);
2452 return state_ >= 0 ? p_ : units_.end();
2455 // operator*() etc. are logically const. 2456 mutable UnitIter p_;
2457 // Keep state so that we call decAndRead() only once for both operator*() and ++ 2458 // to make it easy for the compiler to optimize. 2459 mutable UnsafeCodeUnits_ units_;
2460 // >0: units_ = readAndInc(), p_ = units limit 2462 // <0: units_ = decAndRead(), p_ = units start 2463 // which means that p_ is behind its logical position 2464 mutable int8_t state_ = 0;
2466 #endif// U_IN_DOXYGEN 2468 namespaceU_HEADER_ONLY_NAMESPACE {
2485 template<
typename CP32,
typename UnitIter>
2517 template<
typename CP32,
typename Range>
2519 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2532 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
2542 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
2556 return unsafeUTFIterator<CP32>(unitRange.begin());
2563 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2565 return unsafeUTFIterator<CP32>(unitRange.begin());
2573 using UnitIter = decltype(unitRange.begin());
2574 using LimitIter = decltype(unitRange.end());
2575 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2576 // Return the code unit sentinel. 2577 return unitRange.end();
2579 return unsafeUTFIterator<CP32>(unitRange.end());
2587 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2589 using UnitIter = decltype(unitRange.begin());
2590 using LimitIter = decltype(unitRange.end());
2591 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2592 // Return the code unit sentinel. 2593 return unitRange.end();
2595 return unsafeUTFIterator<CP32>(unitRange.end());
2604 return std::make_reverse_iterator(
end());
2612 return std::make_reverse_iterator(
begin());
2620 template<
typename CP32>
2623 __cpp_lib_bind_back >= 2022'02
// http://wg21.link/P2387R3. 2624 : std::ranges::range_adaptor_closure<UnsafeUTFStringCodePointsAdaptor<CP32>>
2628 template<
typename Range>
2630 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10// We need https://wg21.link/P2415R2. 2633 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
2634 // Take basic_string_view by copy, not by reference. In C++20 this is handled by 2635 // all_t<Range>, which is Range if Range is a view. 2657 template<
typename CP32>
2660 }
// namespace U_HEADER_ONLY_NAMESPACE 2663 #if defined(__cpp_lib_ranges) 2664 template <
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
2665 constexpr
bool std::ranges::enable_borrowed_range<
2667 std::ranges::enable_borrowed_range<Range>;
2669 template <
typename CP32,
typename Range>
2670 constexpr
bool std::ranges::enable_borrowed_range<
2672 std::ranges::enable_borrowed_range<Range>;
2675 #endif// U_HIDE_DRAFT_API 2676 #endif// U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API 2677 #endif// __UTFITERATOR_H__ A C++ "range" over all Unicode code points U+0000..U+10FFFF.
AllCodePoints()
Constructor.
A C++ "range" over all Unicode scalar values U+0000..U+D7FF & U+E000..U+10FFFF.
AllScalarValues()
Constructor.
Result of validating and decoding a code unit sequence for one code point.
CodeUnits & operator=(const CodeUnits &other)=default
Copy assignment operator.
CodeUnits(const CodeUnits &other)=default
Copy constructor.
CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, UnitIter start, UnitIter limit)
Validating iterator over the code points in a Unicode string.
U_FORCE_INLINE UTFIterator()
Default constructor.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const Sentinel &s, const UTFIterator &iter)
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UTFIterator > operator--(int)
Post-decrement operator.
U_FORCE_INLINE Proxy operator->() const
Decodes the code unit sequence at the current position.
value_type reference
C++ iterator boilerplate.
U_FORCE_INLINE UTFIterator & operator=(UTFIterator &&src) noexcept=default
Move assignment operator.
CodeUnits< CP32, UnitIter > value_type
C++ iterator boilerplate.
U_FORCE_INLINE UTFIterator(UTFIterator &&src) noexcept=default
Move constructor.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const UTFIterator &iter, const Sentinel &s)
std::conditional_t< prv::bidirectional_iterator< UnitIter >, std::bidirectional_iterator_tag, std::forward_iterator_tag > iterator_category
C++ iterator boilerplate.
U_FORCE_INLINE UTFIterator(UnitIter p)
Constructs an iterator start or limit sentinel.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const Sentinel &s, const UTFIterator &iter)
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const UTFIterator &iter, const Sentinel &s)
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UTFIterator & > operator--()
Pre-decrement operator.
U_FORCE_INLINE bool operator!=(const UTFIterator &other) const
Proxy pointer
C++ iterator boilerplate.
U_FORCE_INLINE UTFIterator(UnitIter start, UnitIter p, LimitIter limit)
Constructor with start <= p < limit.
U_FORCE_INLINE UTFIterator(const UTFIterator &other)=default
Copy constructor.
U_FORCE_INLINE CodeUnits< CP32, UnitIter > operator*() const
Decodes the code unit sequence at the current position.
U_FORCE_INLINE UTFIterator operator++(int)
Post-increment operator.
U_FORCE_INLINE UTFIterator & operator++()
Pre-increment operator.
U_FORCE_INLINE UTFIterator & operator=(const UTFIterator &other)=default
Copy assignment operator.
U_FORCE_INLINE bool operator==(const UTFIterator &other) const
U_FORCE_INLINE UTFIterator(UnitIter p, LimitIter limit)
Constructor with start == p < limit.
prv::iter_difference_t< UnitIter > difference_type
C++ iterator boilerplate.
A C++ "range" for validating iteration over all of the code points of a code unit range.
UTFStringCodePoints()=default
Constructs an empty C++ "range" object.
UTFStringCodePoints & operator=(const UTFStringCodePoints &other)=default
Copy assignment operator.
UTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string.
UTFStringCodePoints(const UTFStringCodePoints &other)=default
Copy constructor.
UTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string, keeping a reference to the code u...
Result of decoding a code unit sequence for one code point.
std::enable_if_t< std::is_pointer_v< Iter >||std::is_same_v< Iter, typename std::basic_string< Unit >::iterator >||std::is_same_v< Iter, typename std::basic_string< Unit >::const_iterator >||std::is_same_v< Iter, typename std::basic_string_view< Unit >::iterator >||std::is_same_v< Iter, typename std::basic_string_view< Unit >::const_iterator >, std::basic_string_view< Unit > > stringView() const
UnsafeCodeUnits & operator=(const UnsafeCodeUnits &other)=default
Copy assignment operator.
UnsafeCodeUnits(CP32 codePoint, uint8_t length, UnitIter start, UnitIter limit)
UnsafeCodeUnits(const UnsafeCodeUnits &other)=default
Copy constructor.
Non-validating iterator over the code points in a Unicode string.
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UnsafeUTFIterator & > operator--()
Pre-decrement operator.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const Sentinel &s, const UnsafeUTFIterator &iter)
U_FORCE_INLINE UnsafeUTFIterator()
Default constructor.
UnsafeCodeUnits< CP32, UnitIter > value_type
C++ iterator boilerplate.
U_FORCE_INLINE bool operator!=(const UnsafeUTFIterator &other) const
U_FORCE_INLINE UnsafeUTFIterator & operator=(const UnsafeUTFIterator &other)=default
Copy assignment operator.
value_type reference
C++ iterator boilerplate.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const UnsafeUTFIterator &iter, const Sentinel &s)
U_FORCE_INLINE UnsafeCodeUnits< CP32, UnitIter > operator*() const
Decodes the code unit sequence at the current position.
std::conditional_t< prv::bidirectional_iterator< UnitIter >, std::bidirectional_iterator_tag, std::forward_iterator_tag > iterator_category
C++ iterator boilerplate.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const UnsafeUTFIterator &iter, const Sentinel &s)
Proxy pointer
C++ iterator boilerplate.
prv::iter_difference_t< UnitIter > difference_type
C++ iterator boilerplate.
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UnsafeUTFIterator > operator--(int)
Post-decrement operator.
U_FORCE_INLINE Proxy operator->() const
Decodes the code unit sequence at the current position.
U_FORCE_INLINE UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept=default
Move constructor.
U_FORCE_INLINE UnsafeUTFIterator operator++(int)
Post-increment operator.
U_FORCE_INLINE UnsafeUTFIterator(const UnsafeUTFIterator &other)=default
Copy constructor.
U_FORCE_INLINE UnsafeUTFIterator & operator++()
Pre-increment operator.
U_FORCE_INLINE UnsafeUTFIterator(UnitIter p)
Constructor; the iterator/pointer should be at a code point boundary.
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const Sentinel &s, const UnsafeUTFIterator &iter)
U_FORCE_INLINE bool operator==(const UnsafeUTFIterator &other) const
U_FORCE_INLINE UnsafeUTFIterator & operator=(UnsafeUTFIterator &&src) noexcept=default
Move assignment operator.
A C++ "range" for non-validating iteration over all of the code points of a code unit range.
UnsafeUTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string, keeping a reference to the code u...
UnsafeUTFStringCodePoints(const UnsafeUTFStringCodePoints &other)=default
Copy constructor.
UnsafeUTFStringCodePoints()=default
Constructs an empty C++ "range" object.
UnsafeUTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string.
UnsafeUTFStringCodePoints & operator=(const UnsafeUTFStringCodePoints &other)=default
Copy assignment operator.
int32_t difference_type
C++ iterator boilerplate.
bool operator==(const CodePointsIterator &other) const
bool operator!=(const CodePointsIterator &other) const
value_type reference
C++ iterator boilerplate.
CP32 value_type
C++ iterator boilerplate.
CodePointsIterator(CP32 c)
CodePointsIterator & operator++()
std::forward_iterator_tag iterator_category
C++ iterator boilerplate.
CodePointsIterator operator++(int)
CP32 * pointer
C++ iterator boilerplate.
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
#define U_CPLUSPLUS_VERSION
0 if no C++; 1, 11, 14, ...
auto operator()(Range &&unitRange) const
auto operator()(Range &&unitRange) const
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
#define U_FORCE_INLINE
Forces function inlining on compilers that are known to support it.
C API: 16-bit Unicode handling macros.
#define U16_IS_SURROGATE_TRAIL(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a trail surrogate?
#define U16_IS_SURROGATE_LEAD(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a lead surrogate?
#define U16_GET_SUPPLEMENTARY(lead, trail)
Get a supplementary code point value (U+10000..U+10ffff) from its lead and trail surrogates.
#define U16_IS_SURROGATE(c)
Is this code unit a surrogate (U+d800..U+dfff)?
#define U16_IS_LEAD(c)
Is this code unit a lead surrogate (U+d800..U+dbff)?
#define U16_IS_TRAIL(c)
Is this code unit a trail surrogate (U+dc00..U+dfff)?
C API: 8-bit Unicode handling macros.
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte)
Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1)
Internal 3-byte UTF-8 validity check.
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1)
Internal 4-byte UTF-8 validity check.
#define U8_IS_SINGLE(c)
Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
#define U8_LEAD3_T1_BITS
Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
#define U8_LEAD4_T1_BITS
Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
#define U8_IS_LEAD(c)
Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes)
Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
#define U8_IS_TRAIL(c)
Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
auto unsafeUTFIterator(UnitIter iter)
UnsafeUTFIterator factory function.
typename std::iterator_traits< Iter >::difference_type iter_difference_t
constexpr bool is_basic_string_view_v
constexpr bool forward_iterator
auto utfIterator(UnitIter start, UnitIter p, LimitIter limit)
UTFIterator factory function for start <= p < limit.
constexpr UTFStringCodePointsAdaptor< CP32, behavior > utfStringCodePoints
Range adaptor function object returning a UTFStringCodePoints object that represents a "range" of cod...
typename std::iterator_traits< Iter >::value_type iter_value_t
constexpr bool bidirectional_iterator
constexpr UnsafeUTFStringCodePointsAdaptor< CP32 > unsafeUTFStringCodePoints
Range adaptor function object returning an UnsafeUTFStringCodePoints object that represents a "range"...
UTFIllFormedBehavior
Some defined behaviors for handling ill-formed Unicode strings.
@ UTF_BEHAVIOR_FFFD
Returns U+FFFD Replacement Character.
@ UTF_BEHAVIOR_SURROGATE
UTF-8: Not allowed; UTF-16: returns the unpaired surrogate; UTF-32: returns the surrogate code point,...
@ UTF_BEHAVIOR_NEGATIVE
Returns a negative value (-1=U_SENTINEL) instead of a code point.
Basic definitions for ICU, for both C and C++ APIs.
C API: API for accessing ICU version numbers.