Movatterモバイル変換


[0]ホーム

URL:


ICU 78.1  78.1
utfiterator.h
Go to the documentation of this file.
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: https://www.unicode.org/copyright.html
3 
4 // utfiterator.h
5 // created: 2024aug12 Markus W. Scherer
6 
7 #ifndef __UTFITERATOR_H__
8 #define __UTFITERATOR_H__
9 
10 #include "unicode/utypes.h"
11 
12 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
13 
14 #include <iterator>
15 #if defined(__cpp_lib_ranges)
16 #include <ranges>
17 #endif
18 #include <string>
19 #include <string_view>
20 #include <type_traits>
21 #include "unicode/utf16.h"
22 #include "unicode/utf8.h"
23 #include "unicode/uversion.h"
24 
135 #ifndef U_HIDE_DRAFT_API
136 
149 typedefenumUTFIllFormedBehavior {
157 UTF_BEHAVIOR_NEGATIVE,
159 UTF_BEHAVIOR_FFFD,
167 UTF_BEHAVIOR_SURROGATE
168 }UTFIllFormedBehavior;
169 
170 namespaceU_HEADER_ONLY_NAMESPACE {
171 
172 namespaceprv {
173 #if U_CPLUSPLUS_VERSION >= 20
174 
176 template<typename Iter>
177 usingiter_value_t =typename std::iter_value_t<Iter>;
178 
180 template<typename Iter>
181 usingiter_difference_t = std::iter_difference_t<Iter>;
182 
184 template<typename Iter>
185 constexprboolforward_iterator = std::forward_iterator<Iter>;
186 
188 template<typename Iter>
189 constexprboolbidirectional_iterator = std::bidirectional_iterator<Iter>;
190 
192 template<typename Range>
193 constexprboolrange = std::ranges::range<Range>;
194 
195 #else
196 
198 template<typename Iter>
199 usingiter_value_t =typename std::iterator_traits<Iter>::value_type;
200 
202 template<typename Iter>
203 usingiter_difference_t =typename std::iterator_traits<Iter>::difference_type;
204 
206 template<typename Iter>
207 constexprboolforward_iterator =
208  std::is_base_of_v<
209  std::forward_iterator_tag,
210 typename std::iterator_traits<Iter>::iterator_category>;
211 
213 template<typename Iter>
214 constexprboolbidirectional_iterator =
215  std::is_base_of_v<
216  std::bidirectional_iterator_tag,
217 typename std::iterator_traits<Iter>::iterator_category>;
218 
220 template<typename Range,typename =void>
221 structrange_type : std::false_type {};
222 
224 template<typename Range>
225 structrange_type<
226  Range,
227  std::void_t<decltype(std::declval<Range>().begin()),
228  decltype(std::declval<Range>().end())>> : std::true_type {};
229 
231 template<typename Range>
232 constexprboolrange =range_type<Range>::value;
233 
234 #endif
235 
237 template <typename T>structis_basic_string_view : std::false_type {};
238 
240 template <typename... Args>
241 structis_basic_string_view<std::basic_string_view<Args...>> : std::true_type {};
242 
244 template <typename T> constexprboolis_basic_string_view_v =is_basic_string_view<T>::value;
245 
247 template<typename CP32,bool skipSurrogates>
248 classCodePointsIterator {
249  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
250 public:
252 usingvalue_type = CP32;
254 usingreference =value_type;
256 usingpointer = CP32 *;
258 usingdifference_type = int32_t;
260 usingiterator_category = std::forward_iterator_tag;
261 
263 inlineCodePointsIterator(CP32 c) : c_(c) {}
265 inlinebooloperator==(constCodePointsIterator &other) const{return c_ == other.c_; }
267 inlinebooloperator!=(constCodePointsIterator &other) const{return !operator==(other); }
269 inline CP32operator*() const{return c_; }
271 inlineCodePointsIterator &operator++() {// pre-increment
272  ++c_;
273 if (skipSurrogates && c_ == 0xd800) {
274  c_ = 0xe000;
275  }
276 return *this;
277  }
279 inlineCodePointsIteratoroperator++(int) {// post-increment
280 CodePointsIterator result(*this);
281  ++(*this);
282 return result;
283  }
284 
285 private:
286  CP32 c_;
287 };
288 
289 }// namespace prv
290 
301 template<typename CP32>
302 classAllCodePoints {
303  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
304 public:
306 AllCodePoints() {}
312 autobegin() const{returnprv::CodePointsIterator<CP32, false>(0); }
317 autoend() const{returnprv::CodePointsIterator<CP32, false>(0x110000); }
318 };
319 
332 template<typename CP32>
333 classAllScalarValues {
334  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
335 public:
337 AllScalarValues() {}
343 autobegin() const{returnprv::CodePointsIterator<CP32, true>(0); }
348 autoend() const{returnprv::CodePointsIterator<CP32, true>(0x110000); }
349 };
350 
366 template<typename CP32,typename UnitIter,typename =void>
367 classUnsafeCodeUnits {
368  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
369 using Unit =typenameprv::iter_value_t<UnitIter>;
370 public:
372 UnsafeCodeUnits(CP32codePoint, uint8_tlength, UnitIter start, UnitIter limit) :
373  c_(codePoint), len_(length), start_(start), limit_(limit) {}
374 
376 UnsafeCodeUnits(constUnsafeCodeUnits &other) =default;
378 UnsafeCodeUnits &operator=(constUnsafeCodeUnits &other) =default;
379 
387  CP32codePoint() const{return c_; }
388 
394  UnitIterbegin() const{return start_; }
395 
401  UnitIterend() const{return limit_; }
402 
407  uint8_tlength() const{return len_; }
408 
409 #if U_CPLUSPLUS_VERSION >= 20
415 template<std::contiguous_iterator Iter = UnitIter>
416  std::basic_string_view<Unit>stringView() const{
417 return std::basic_string_view<Unit>(begin(),end());
418  }
419 #else
425  template<typename Iter = UnitIter, typename Unit = typename std::iterator_traits<Iter>::value_type>
426  std::enable_if_t<std::is_pointer_v<Iter> ||
427  std::is_same_v<Iter, typename std::basic_string<Unit>::iterator> ||
428  std::is_same_v<Iter, typename std::basic_string<Unit>::const_iterator> ||
429  std::is_same_v<Iter, typename std::basic_string_view<Unit>::iterator> ||
430  std::is_same_v<Iter, typename std::basic_string_view<Unit>::const_iterator>,
431  std::basic_string_view<Unit>>
432 stringView() const{
433 return std::basic_string_view<Unit>(&*start_, len_);
434  }
435 #endif
436 
437 private:
438 // Order of fields with padding and access frequency in mind.
439  CP32 c_;
440  uint8_t len_;
441  UnitIter start_;
442  UnitIter limit_;
443 };
444 
445 #ifndef U_IN_DOXYGEN
446 // Partial template specialization for single-pass input iterator.
447 // No UnitIter field, no getter for it, no stringView().
448 template<typename CP32,typename UnitIter>
449 classUnsafeCodeUnits<
450  CP32,
451  UnitIter,
452  std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
453  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
454 public:
455 UnsafeCodeUnits(CP32codePoint, uint8_tlength) : c_(codePoint), len_(length) {}
456 
457 UnsafeCodeUnits(constUnsafeCodeUnits &other) =default;
458 UnsafeCodeUnits &operator=(constUnsafeCodeUnits &other) =default;
459 
460  CP32codePoint() const{return c_; }
461 
462  uint8_tlength() const{return len_; }
463 
464 private:
465 // Order of fields with padding and access frequency in mind.
466  CP32 c_;
467  uint8_t len_;
468 };
469 #endif// U_IN_DOXYGEN
470 
486 template<typename CP32,typename UnitIter,typename =void>
487 classCodeUnits :publicUnsafeCodeUnits<CP32, UnitIter> {
488 public:
490 CodeUnits(CP32codePoint, uint8_tlength,boolwellFormed, UnitIter start, UnitIter limit) :
491 UnsafeCodeUnits<CP32, UnitIter>(codePoint,length, start, limit), ok_(wellFormed) {}
492 
494 CodeUnits(constCodeUnits &other) =default;
496 CodeUnits &operator=(constCodeUnits &other) =default;
497 
502 boolwellFormed() const{return ok_; }
503 
504 private:
505 bool ok_;
506 };
507 
508 #ifndef U_IN_DOXYGEN
509 // Partial template specialization for single-pass input iterator.
510 // No UnitIter field, no getter for it, no stringView().
511 template<typename CP32,typename UnitIter>
512 classCodeUnits<
513  CP32,
514  UnitIter,
515  std::enable_if_t<!prv::forward_iterator<UnitIter>>> :
516 public UnsafeCodeUnits<CP32, UnitIter> {
517 public:
518 CodeUnits(CP32codePoint, uint8_tlength,boolwellFormed) :
519 UnsafeCodeUnits<CP32, UnitIter>(codePoint,length), ok_(wellFormed) {}
520 
521 CodeUnits(constCodeUnits &other) =default;
522 CodeUnits &operator=(constCodeUnits &other) =default;
523 
524 boolwellFormed() const{return ok_; }
525 
526 private:
527 bool ok_;
528 };
529 #endif// U_IN_DOXYGEN
530 
531 // Validating implementations ---------------------------------------------- ***
532 
533 #ifndef U_IN_DOXYGEN
534 template<typename CP32,UTFIllFormedBehavior behavior,
535 typename UnitIter,typename LimitIter = UnitIter,typename =void>
536 classUTFImpl;
537 
538 // Note: readAndInc() functions take both a p0 and a p iterator.
539 // They must have the same value.
540 // For a multi-pass UnitIter, the caller must copy its p into a local variable p0,
541 // and readAndInc() copies p0 and the incremented p into the CodeUnits.
542 // For a single-pass UnitIter, which may not be default-constructible nor coypable,
543 // the caller can pass p into both references, and readAndInc() does not use p0
544 // and constructs CodeUnits without them.
545 // Moving the p0 variable into the call site avoids having to declare it inside readAndInc()
546 // which may not be possible for a single-pass iterator.
547 
548 // UTF-8
549 template<typename CP32, UTFIllFormedBehavior behavior,typename UnitIter,typename LimitIter>
550 classUTFImpl<
551  CP32, behavior,
552  UnitIter, LimitIter,
553  std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
554  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
555  static_assert(behavior !=UTF_BEHAVIOR_SURROGATE,
556 "For 8-bit strings, the SURROGATE option does not have an equivalent.");
557 public:
558 // Handle ill-formed UTF-8
559 U_FORCE_INLINEstatic CP32 sub() {
560 switch (behavior) {
561 caseUTF_BEHAVIOR_NEGATIVE:returnU_SENTINEL;
562 caseUTF_BEHAVIOR_FFFD:return 0xfffd;
563  }
564  }
565 
566 U_FORCE_INLINEstaticvoid inc(UnitIter &p,const LimitIter &limit) {
567 // Very similar to U8_FWD_1().
568  uint8_t b = *p;
569  ++p;
570 if (U8_IS_LEAD(b) && p != limit) {
571  uint8_t t1 = *p;
572 if ((0xe0 <= b && b < 0xf0)) {
573 if (U8_IS_VALID_LEAD3_AND_T1(b, t1) &&
574  ++p != limit &&U8_IS_TRAIL(*p)) {
575  ++p;
576  }
577  }elseif (b < 0xe0) {
578 if (U8_IS_TRAIL(t1)) {
579  ++p;
580  }
581  }else/* b >= 0xf0 */ {
582 if (U8_IS_VALID_LEAD4_AND_T1(b, t1) &&
583  ++p != limit &&U8_IS_TRAIL(*p) &&
584  ++p != limit &&U8_IS_TRAIL(*p)) {
585  ++p;
586  }
587  }
588  }
589  }
590 
591 U_FORCE_INLINEstaticvoid dec(UnitIter start, UnitIter &p) {
592 // Very similar to U8_BACK_1().
593  uint8_t c = *--p;
594 if (U8_IS_TRAIL(c) && p != start) {
595  UnitIter p1 = p;
596  uint8_t b1 = *--p1;
597 if (U8_IS_LEAD(b1)) {
598 if (b1 < 0xe0 ||
599  (b1 < 0xf0 ?
600 U8_IS_VALID_LEAD3_AND_T1(b1, c) :
601 U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
602  p = p1;
603 return;
604  }
605  }elseif (U8_IS_TRAIL(b1) && p1 != start) {
606  uint8_t b2 = *--p1;
607 if (0xe0 <= b2 && b2 <= 0xf4) {
608 if (b2 < 0xf0 ?
609 U8_IS_VALID_LEAD3_AND_T1(b2, b1) :
610 U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
611  p = p1;
612 return;
613  }
614  }elseif (U8_IS_TRAIL(b2) && p1 != start) {
615  uint8_t b3 = *--p1;
616 if (0xf0 <= b3 && b3 <= 0xf4 &&U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
617  p = p1;
618 return;
619  }
620  }
621  }
622  }
623  }
624 
625 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> readAndInc(
626  UnitIter &p0, UnitIter &p,const LimitIter &limit) {
627  constexprbool isMultiPass = prv::forward_iterator<UnitIter>;
628 // Very similar to U8_NEXT_OR_FFFD().
629  CP32 c = uint8_t(*p);
630  ++p;
631 if (U8_IS_SINGLE(c)) {
632 if constexpr (isMultiPass) {
633 return {c, 1,true, p0, p};
634  }else {
635 return {c, 1,true};
636  }
637  }
638  uint8_t length = 1;
639  uint8_t t = 0;
640 if (p != limit &&
641 // fetch/validate/assemble all but last trail byte
642  (c >= 0xe0 ?
643  (c < 0xf0 ?// U+0800..U+FFFF except surrogates
644 U8_LEAD3_T1_BITS[c &= 0xf] & (1 << ((t = *p) >> 5)) &&
645  (t &= 0x3f, 1)
646  :// U+10000..U+10FFFF
647  (c -= 0xf0) <= 4 &&
648 U8_LEAD4_T1_BITS[(t = *p) >> 4] & (1 << c) &&
649  (c = (c << 6) | (t & 0x3f), ++length, ++p != limit) &&
650  (t = *p - 0x80) <= 0x3f) &&
651 // valid second-to-last trail byte
652  (c = (c << 6) | t, ++length, ++p != limit)
653  :// U+0080..U+07FF
654  c >= 0xc2 && (c &= 0x1f, 1)) &&
655 // last trail byte
656  (t = *p - 0x80) <= 0x3f) {
657  c = (c << 6) | t;
658  ++length;
659  ++p;
660 if constexpr (isMultiPass) {
661 return {c, length,true, p0, p};
662  }else {
663 return {c, length,true};
664  }
665  }
666 if constexpr (isMultiPass) {
667 return {sub(), length,false, p0, p};
668  }else {
669 return {sub(), length,false};
670  }
671  }
672 
673 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
674 // Very similar to U8_PREV_OR_FFFD().
675  UnitIter p0 = p;
676  CP32 c = uint8_t(*--p);
677 if (U8_IS_SINGLE(c)) {
678 return {c, 1,true, p, p0};
679  }
680 if (U8_IS_TRAIL(c) && p != start) {
681  UnitIter p1 = p;
682  uint8_t b1 = *--p1;
683 if (U8_IS_LEAD(b1)) {
684 if (b1 < 0xe0) {
685  p = p1;
686  c = ((b1 - 0xc0) << 6) | (c & 0x3f);
687 return {c, 2,true, p, p0};
688  }elseif (b1 < 0xf0 ?
689 U8_IS_VALID_LEAD3_AND_T1(b1, c) :
690 U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
691 // Truncated 3- or 4-byte sequence.
692  p = p1;
693 return {sub(), 2,false, p, p0};
694  }
695  }elseif (U8_IS_TRAIL(b1) && p1 != start) {
696 // Extract the value bits from the last trail byte.
697  c &= 0x3f;
698  uint8_t b2 = *--p1;
699 if (0xe0 <= b2 && b2 <= 0xf4) {
700 if (b2 < 0xf0) {
701  b2 &= 0xf;
702 if (U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
703  p = p1;
704  c = (b2 << 12) | ((b1 & 0x3f) << 6) | c;
705 return {c, 3,true, p, p0};
706  }
707  }elseif (U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
708 // Truncated 4-byte sequence.
709  p = p1;
710 return {sub(), 3,false, p, p0};
711  }
712  }elseif (U8_IS_TRAIL(b2) && p1 != start) {
713  uint8_t b3 = *--p1;
714 if (0xf0 <= b3 && b3 <= 0xf4) {
715  b3 &= 7;
716 if (U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
717  p = p1;
718  c = (b3 << 18) | ((b2 & 0x3f) << 12) | ((b1 & 0x3f) << 6) | c;
719 return {c, 4,true, p, p0};
720  }
721  }
722  }
723  }
724  }
725 return {sub(), 1,false, p, p0};
726  }
727 };
728 
729 // UTF-16
730 template<typename CP32, UTFIllFormedBehavior behavior,typename UnitIter,typename LimitIter>
731 classUTFImpl<
732  CP32, behavior,
733  UnitIter, LimitIter,
734  std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
735  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
736 public:
737 // Handle ill-formed UTF-16: One unpaired surrogate.
738 U_FORCE_INLINEstatic CP32 sub(CP32 surrogate) {
739 switch (behavior) {
740 caseUTF_BEHAVIOR_NEGATIVE:returnU_SENTINEL;
741 caseUTF_BEHAVIOR_FFFD:return 0xfffd;
742 caseUTF_BEHAVIOR_SURROGATE:return surrogate;
743  }
744  }
745 
746 U_FORCE_INLINEstaticvoid inc(UnitIter &p,const LimitIter &limit) {
747 // Very similar to U16_FWD_1().
748 auto c = *p;
749  ++p;
750 if (U16_IS_LEAD(c) && p != limit &&U16_IS_TRAIL(*p)) {
751  ++p;
752  }
753  }
754 
755 U_FORCE_INLINEstaticvoid dec(UnitIter start, UnitIter &p) {
756 // Very similar to U16_BACK_1().
757  UnitIter p1;
758 if (U16_IS_TRAIL(*--p) && p != start && (p1 = p,U16_IS_LEAD(*--p1))) {
759  p = p1;
760  }
761  }
762 
763 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> readAndInc(
764  UnitIter &p0, UnitIter &p,const LimitIter &limit) {
765  constexprbool isMultiPass = prv::forward_iterator<UnitIter>;
766 // Very similar to U16_NEXT_OR_FFFD().
767  CP32 c =static_cast<CP32>(*p);
768  ++p;
769 if (!U16_IS_SURROGATE(c)) {
770 if constexpr (isMultiPass) {
771 return {c, 1,true, p0, p};
772  }else {
773 return {c, 1,true};
774  }
775  }else {
776  uint16_t c2;
777 if (U16_IS_SURROGATE_LEAD(c) && p != limit &&U16_IS_TRAIL(c2 = *p)) {
778  ++p;
779  c =U16_GET_SUPPLEMENTARY(c, c2);
780 if constexpr (isMultiPass) {
781 return {c, 2,true, p0, p};
782  }else {
783 return {c, 2,true};
784  }
785  }else {
786 if constexpr (isMultiPass) {
787 return {sub(c), 1,false, p0, p};
788  }else {
789 return {sub(c), 1,false};
790  }
791  }
792  }
793  }
794 
795 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
796 // Very similar to U16_PREV_OR_FFFD().
797  UnitIter p0 = p;
798  CP32 c =static_cast<CP32>(*--p);
799 if (!U16_IS_SURROGATE(c)) {
800 return {c, 1,true, p, p0};
801  }else {
802  UnitIter p1;
803  uint16_t c2;
804 if (U16_IS_SURROGATE_TRAIL(c) && p != start && (p1 = p,U16_IS_LEAD(c2 = *--p1))) {
805  p = p1;
806  c =U16_GET_SUPPLEMENTARY(c2, c);
807 return {c, 2,true, p, p0};
808  }else {
809 return {sub(c), 1,false, p, p0};
810  }
811  }
812  }
813 };
814 
815 // UTF-32: trivial, but still validating
816 template<typename CP32, UTFIllFormedBehavior behavior,typename UnitIter,typename LimitIter>
817 classUTFImpl<
818  CP32, behavior,
819  UnitIter, LimitIter,
820  std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
821  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
822 public:
823 // Handle ill-formed UTF-32
824 U_FORCE_INLINEstatic CP32 sub(bool forSurrogate, CP32 surrogate) {
825 switch (behavior) {
826 caseUTF_BEHAVIOR_NEGATIVE:returnU_SENTINEL;
827 caseUTF_BEHAVIOR_FFFD:return 0xfffd;
828 caseUTF_BEHAVIOR_SURROGATE:return forSurrogate ? surrogate : 0xfffd;
829  }
830  }
831 
832 U_FORCE_INLINEstaticvoid inc(UnitIter &p,const LimitIter &/*limit*/) {
833  ++p;
834  }
835 
836 U_FORCE_INLINEstaticvoid dec(UnitIter/*start*/, UnitIter &p) {
837  --p;
838  }
839 
840 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> readAndInc(
841  UnitIter &p0, UnitIter &p,const LimitIter &/*limit*/) {
842  constexprbool isMultiPass = prv::forward_iterator<UnitIter>;
843  uint32_t uc = *p;
844  CP32 c = uc;
845  ++p;
846 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
847 if constexpr (isMultiPass) {
848 return {c, 1,true, p0, p};
849  }else {
850 return {c, 1,true};
851  }
852  }else {
853 if constexpr (isMultiPass) {
854 return {sub(uc < 0xe000, c), 1,false, p0, p};
855  }else {
856 return {sub(uc < 0xe000, c), 1,false};
857  }
858  }
859  }
860 
861 U_FORCE_INLINEstatic CodeUnits<CP32, UnitIter> decAndRead(UnitIter/*start*/, UnitIter &p) {
862  UnitIter p0 = p;
863  uint32_t uc = *--p;
864  CP32 c = uc;
865 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
866 return {c, 1,true, p, p0};
867  }else {
868 return {sub(uc < 0xe000, c), 1,false, p, p0};
869  }
870  }
871 };
872 
873 // Non-validating implementations ------------------------------------------ ***
874 
875 template<typename CP32,typename UnitIter,typename =void>
876 classUnsafeUTFImpl;
877 
878 // UTF-8
879 template<typename CP32,typename UnitIter>
880 classUnsafeUTFImpl<
881  CP32,
882  UnitIter,
883  std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
884  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
885 public:
886 U_FORCE_INLINEstaticvoid inc(UnitIter &p) {
887 // Very similar to U8_FWD_1_UNSAFE().
888  uint8_t b = *p;
889  std::advance(p, 1 +U8_COUNT_TRAIL_BYTES_UNSAFE(b));
890  }
891 
892 U_FORCE_INLINEstaticvoid dec(UnitIter &p) {
893 // Very similar to U8_BACK_1_UNSAFE().
894 while (U8_IS_TRAIL(*--p)) {}
895  }
896 
897 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
898  constexprbool isMultiPass = prv::forward_iterator<UnitIter>;
899 // Very similar to U8_NEXT_UNSAFE().
900  CP32 c = uint8_t(*p);
901  ++p;
902 if (U8_IS_SINGLE(c)) {
903 if constexpr (isMultiPass) {
904 return {c, 1, p0, p};
905  }else {
906 return {c, 1};
907  }
908  }elseif (c < 0xe0) {
909  c = ((c & 0x1f) << 6) | (*p & 0x3f);
910  ++p;
911 if constexpr (isMultiPass) {
912 return {c, 2, p0, p};
913  }else {
914 return {c, 2};
915  }
916  }elseif (c < 0xf0) {
917 // No need for (c&0xf) because the upper bits are truncated
918 // after <<12 in the cast to uint16_t.
919  c = uint16_t(c << 12) | ((*p & 0x3f) << 6);
920  ++p;
921  c |= *p & 0x3f;
922  ++p;
923 if constexpr (isMultiPass) {
924 return {c, 3, p0, p};
925  }else {
926 return {c, 3};
927  }
928  }else {
929  c = ((c & 7) << 18) | ((*p & 0x3f) << 12);
930  ++p;
931  c |= (*p & 0x3f) << 6;
932  ++p;
933  c |= *p & 0x3f;
934  ++p;
935 if constexpr (isMultiPass) {
936 return {c, 4, p0, p};
937  }else {
938 return {c, 4};
939  }
940  }
941  }
942 
943 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
944 // Very similar to U8_PREV_UNSAFE().
945  UnitIter p0 = p;
946  CP32 c = uint8_t(*--p);
947 if (U8_IS_SINGLE(c)) {
948 return {c, 1, p, p0};
949  }
950 // U8_IS_TRAIL(c) if well-formed
951  c &= 0x3f;
952  uint8_t count = 1;
953 for (uint8_t shift = 6;;) {
954  uint8_t b = *--p;
955 if (b >= 0xc0) {
956 U8_MASK_LEAD_BYTE(b, count);
957  c |= uint32_t{b} << shift;
958 break;
959  }else {
960  c |= (uint32_t{b} & 0x3f) << shift;
961  ++count;
962  shift += 6;
963  }
964  }
965  ++count;
966 return {c, count, p, p0};
967  }
968 };
969 
970 // UTF-16
971 template<typename CP32,typename UnitIter>
972 classUnsafeUTFImpl<
973  CP32,
974  UnitIter,
975  std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
976  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
977 public:
978 U_FORCE_INLINEstaticvoid inc(UnitIter &p) {
979 // Very similar to U16_FWD_1_UNSAFE().
980 auto c = *p;
981  ++p;
982 if (U16_IS_LEAD(c)) {
983  ++p;
984  }
985  }
986 
987 U_FORCE_INLINEstaticvoid dec(UnitIter &p) {
988 // Very similar to U16_BACK_1_UNSAFE().
989 if (U16_IS_TRAIL(*--p)) {
990  --p;
991  }
992  }
993 
994 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
995  constexprbool isMultiPass = prv::forward_iterator<UnitIter>;
996 // Very similar to U16_NEXT_UNSAFE().
997  CP32 c =static_cast<CP32>(*p);
998  ++p;
999 if (!U16_IS_LEAD(c)) {
1000 if constexpr (isMultiPass) {
1001 return {c, 1, p0, p};
1002  }else {
1003 return {c, 1};
1004  }
1005  }else {
1006  uint16_t c2 = *p;
1007  ++p;
1008  c =U16_GET_SUPPLEMENTARY(c, c2);
1009 if constexpr (isMultiPass) {
1010 return {c, 2, p0, p};
1011  }else {
1012 return {c, 2};
1013  }
1014  }
1015  }
1016 
1017 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1018 // Very similar to U16_PREV_UNSAFE().
1019  UnitIter p0 = p;
1020  CP32 c =static_cast<CP32>(*--p);
1021 if (!U16_IS_TRAIL(c)) {
1022 return {c, 1, p, p0};
1023  }else {
1024  uint16_t c2 = *--p;
1025  c =U16_GET_SUPPLEMENTARY(c2, c);
1026 return {c, 2, p, p0};
1027  }
1028  }
1029 };
1030 
1031 // UTF-32: trivial
1032 template<typename CP32,typename UnitIter>
1033 classUnsafeUTFImpl<
1034  CP32,
1035  UnitIter,
1036  std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
1037  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
1038 public:
1039 U_FORCE_INLINEstaticvoid inc(UnitIter &p) {
1040  ++p;
1041  }
1042 
1043 U_FORCE_INLINEstaticvoid dec(UnitIter &p) {
1044  --p;
1045  }
1046 
1047 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
1048  constexprbool isMultiPass = prv::forward_iterator<UnitIter>;
1049  CP32 c = *p;
1050  ++p;
1051 if constexpr (isMultiPass) {
1052 return {c, 1, p0, p};
1053  }else {
1054 return {c, 1};
1055  }
1056  }
1057 
1058 U_FORCE_INLINEstatic UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1059  UnitIter p0 = p;
1060  CP32 c = *--p;
1061 return {c, 1, p, p0};
1062  }
1063 };
1064 
1065 #endif
1066 
1067 // Validating iterators ---------------------------------------------------- ***
1068 
1092 template<typename CP32,UTFIllFormedBehavior behavior,
1093 typename UnitIter,typename LimitIter = UnitIter,typename =void>
1094 classUTFIterator {
1095  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
1096 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1097 
1098 // Proxy type for operator->() (required by LegacyInputIterator)
1099 // so that we don't promise always returning CodeUnits.
1100 classProxy {
1101 public:
1102 explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
1103 CodeUnits<CP32, UnitIter> &operator*() {return units_; }
1104 CodeUnits<CP32, UnitIter> *operator->() {return &units_; }
1105 private:
1106 CodeUnits<CP32, UnitIter> units_;
1107  };
1108 
1109 public:
1111 usingvalue_type =CodeUnits<CP32, UnitIter>;
1113 usingreference =value_type;
1115 usingpointer = Proxy;
1117 usingdifference_type =prv::iter_difference_t<UnitIter>;
1119 usingiterator_category = std::conditional_t<
1120  prv::bidirectional_iterator<UnitIter>,
1121  std::bidirectional_iterator_tag,
1122  std::forward_iterator_tag>;
1123 
1137 U_FORCE_INLINEUTFIterator(UnitIter start, UnitIter p, LimitIter limit) :
1138  p_(p), start_(start), limit_(limit), units_(0, 0, false, p, p) {}
1150 U_FORCE_INLINEUTFIterator(UnitIter p, LimitIter limit) :
1151  p_(p), start_(p), limit_(limit), units_(0, 0, false, p, p) {}
1163 U_FORCE_INLINEexplicitUTFIterator(UnitIter p) : p_(p), start_(p), limit_(p), units_(0, 0, false, p, p) {}
1169 U_FORCE_INLINEUTFIterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
1170 
1172 U_FORCE_INLINEUTFIterator(UTFIterator &&src) noexcept =default;
1174 U_FORCE_INLINEUTFIterator &operator=(UTFIterator &&src) noexcept =default;
1175 
1177 U_FORCE_INLINEUTFIterator(constUTFIterator &other) =default;
1179 U_FORCE_INLINEUTFIterator &operator=(constUTFIterator &other) =default;
1180 
1186 U_FORCE_INLINEbooloperator==(constUTFIterator &other) const{
1187 return getLogicalPosition() == other.getLogicalPosition();
1188  }
1194 U_FORCE_INLINEbooloperator!=(constUTFIterator &other) const{return !operator==(other); }
1195 
1196 // Asymmetric equality & nonequality with a sentinel type.
1197 
1204 template<typename Sentinel>U_FORCE_INLINEfriend
1205  std::enable_if_t<
1206  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1207 bool>
1208 operator==(constUTFIterator &iter,const Sentinel &s) {
1209 return iter.getLogicalPosition() == s;
1210  }
1211 
1212 #if U_CPLUSPLUS_VERSION < 20
1213 // C++17: Need to define all four combinations of == / != vs. parameter order.
1214 // Once we require C++20, we could remove all but the first == because
1215 // the compiler would generate the rest.
1216 
1223 template<typename Sentinel>U_FORCE_INLINEfriend
1224  std::enable_if_t<
1225  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1226 bool>
1227 operator==(const Sentinel &s,constUTFIterator &iter) {
1228 return iter.getLogicalPosition() == s;
1229  }
1236 template<typename Sentinel>U_FORCE_INLINEfriend
1237  std::enable_if_t<
1238  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1239 bool>
1240 operator!=(constUTFIterator &iter,const Sentinel &s) {return !(iter == s); }
1247 template<typename Sentinel>U_FORCE_INLINEfriend
1248  std::enable_if_t<
1249  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1250 bool>
1251 operator!=(const Sentinel &s,constUTFIterator &iter) {return !(iter == s); }
1252 #endif// C++17
1253 
1260 U_FORCE_INLINECodeUnits<CP32, UnitIter>operator*() const{
1261 if (state_ == 0) {
1262  UnitIter p0 = p_;
1263  units_ = Impl::readAndInc(p0, p_, limit_);
1264  state_ = 1;
1265  }
1266 return units_;
1267  }
1268 
1277 U_FORCE_INLINE Proxyoperator->() const{
1278 if (state_ == 0) {
1279  UnitIter p0 = p_;
1280  units_ = Impl::readAndInc(p0, p_, limit_);
1281  state_ = 1;
1282  }
1283 return Proxy(units_);
1284  }
1285 
1292 U_FORCE_INLINEUTFIterator &operator++() {// pre-increment
1293 if (state_ > 0) {
1294 // operator*() called readAndInc() so p_ is already ahead.
1295  state_ = 0;
1296  }elseif (state_ == 0) {
1297  Impl::inc(p_, limit_);
1298  }else/* state_ < 0 */ {
1299 // operator--() called decAndRead() so we know how far to skip.
1300  p_ = units_.end();
1301  state_ = 0;
1302  }
1303 return *this;
1304  }
1305 
1314 U_FORCE_INLINEUTFIteratoroperator++(int) {// post-increment
1315 if (state_ > 0) {
1316 // operator*() called readAndInc() so p_ is already ahead.
1317 UTFIterator result(*this);
1318  state_ = 0;
1319 return result;
1320  }elseif (state_ == 0) {
1321  UnitIter p0 = p_;
1322  units_ = Impl::readAndInc(p0, p_, limit_);
1323 UTFIterator result(*this);
1324  result.state_ = 1;
1325 // keep this->state_ == 0
1326 return result;
1327  }else/* state_ < 0 */ {
1328 UTFIterator result(*this);
1329 // operator--() called decAndRead() so we know how far to skip.
1330  p_ = units_.end();
1331  state_ = 0;
1332 return result;
1333  }
1334  }
1335 
1343 template<typename Iter = UnitIter>
1344 U_FORCE_INLINE
1345  std::enable_if_t<prv::bidirectional_iterator<Iter>,UTFIterator &>
1346 operator--() {// pre-decrement
1347 if (state_ > 0) {
1348 // operator*() called readAndInc() so p_ is ahead of the logical position.
1349  p_ = units_.begin();
1350  }
1351  units_ = Impl::decAndRead(start_, p_);
1352  state_ = -1;
1353 return *this;
1354  }
1355 
1363 template<typename Iter = UnitIter>
1364 U_FORCE_INLINE
1365  std::enable_if_t<prv::bidirectional_iterator<Iter>,UTFIterator>
1366 operator--(int) {// post-decrement
1367 UTFIterator result(*this);
1368 operator--();
1369 return result;
1370  }
1371 
1372 private:
1373 friendclassstd::reverse_iterator<UTFIterator<CP32, behavior, UnitIter>>;
1374 
1375 U_FORCE_INLINE UnitIter getLogicalPosition() const{
1376 return state_ <= 0 ? p_ : units_.begin();
1377  }
1378 
1379 // operator*() etc. are logically const.
1380 mutable UnitIter p_;
1381 // In a validating iterator, we need start_ & limit_ so that when we read a code point
1382 // (forward or backward) we can test if there are enough code units.
1383  UnitIter start_;
1384  LimitIter limit_;
1385 // Keep state so that we call readAndInc() only once for both operator*() and ++
1386 // to make it easy for the compiler to optimize.
1387 mutable CodeUnits<CP32, UnitIter> units_;
1388 // >0: units_ = readAndInc(), p_ = units limit
1389 // which means that p_ is ahead of its logical position
1390 // 0: initial state
1391 // <0: units_ = decAndRead(), p_ = units start
1392 mutable int8_t state_ = 0;
1393 };
1394 
1395 #ifndef U_IN_DOXYGEN
1396 // Partial template specialization for single-pass input iterator.
1397 template<typename CP32, UTFIllFormedBehavior behavior,typename UnitIter,typename LimitIter>
1398 classUTFIterator<
1399  CP32, behavior,
1400  UnitIter, LimitIter,
1401  std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
1402  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
1403 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1404 
1405 // Proxy type for post-increment return value, to make *iter++ work.
1406 // Also for operator->() (required by LegacyInputIterator)
1407 // so that we don't promise always returning CodeUnits.
1408 classProxy {
1409 public:
1410 explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
1411  CodeUnits<CP32, UnitIter> &operator*() {return units_; }
1412  CodeUnits<CP32, UnitIter> *operator->() {return &units_; }
1413 private:
1414  CodeUnits<CP32, UnitIter> units_;
1415  };
1416 
1417 public:
1418 usingvalue_type = CodeUnits<CP32, UnitIter>;
1419 usingreference =value_type;
1420 usingpointer = Proxy;
1421 usingdifference_type = prv::iter_difference_t<UnitIter>;
1422 usingiterator_category = std::input_iterator_tag;
1423 
1424 U_FORCE_INLINEUTFIterator(UnitIter p, LimitIter limit) : p_(std::move(p)), limit_(std::move(limit)) {}
1425 
1426 // Constructs an iterator start or limit sentinel.
1427 // Requires p to be copyable.
1428 U_FORCE_INLINEexplicitUTFIterator(UnitIter p) : p_(std::move(p)), limit_(p_) {}
1429 
1430 U_FORCE_INLINEUTFIterator(UTFIterator &&src) noexcept =default;
1431 U_FORCE_INLINEUTFIterator &operator=(UTFIterator &&src) noexcept =default;
1432 
1433 U_FORCE_INLINEUTFIterator(constUTFIterator &other) =default;
1434 U_FORCE_INLINEUTFIterator &operator=(constUTFIterator &other) =default;
1435 
1436 U_FORCE_INLINEbooloperator==(constUTFIterator &other) const{
1437 return p_ == other.p_ && ahead_ == other.ahead_;
1438 // Strictly speaking, we should check if the logical position is the same.
1439 // However, we cannot advance, or do arithmetic with, a single-pass UnitIter.
1440  }
1441 U_FORCE_INLINEbooloperator!=(constUTFIterator &other) const{return !operator==(other); }
1442 
1443 template<typename Sentinel>U_FORCE_INLINEfriend
1444  std::enable_if_t<
1445  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1446 bool>
1447 operator==(constUTFIterator &iter,const Sentinel &s) {
1448 return !iter.ahead_ && iter.p_ == s;
1449  }
1450 
1451 #if U_CPLUSPLUS_VERSION < 20
1452 template<typename Sentinel>U_FORCE_INLINEfriend
1453  std::enable_if_t<
1454  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1455 bool>
1456 operator==(const Sentinel &s,constUTFIterator &iter) {
1457 return !iter.ahead_ && iter.p_ == s;
1458  }
1459 
1460 template<typename Sentinel>U_FORCE_INLINEfriend
1461  std::enable_if_t<
1462  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1463 bool>
1464 operator!=(constUTFIterator &iter,const Sentinel &s) {return !(iter == s); }
1465 
1466 template<typename Sentinel>U_FORCE_INLINEfriend
1467  std::enable_if_t<
1468  !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1469 bool>
1470 operator!=(const Sentinel &s,constUTFIterator &iter) {return !(iter == s); }
1471 #endif// C++17
1472 
1473 U_FORCE_INLINE CodeUnits<CP32, UnitIter>operator*() const{
1474 if (!ahead_) {
1475  units_ = Impl::readAndInc(p_, p_, limit_);
1476  ahead_ =true;
1477  }
1478 return units_;
1479  }
1480 
1481 U_FORCE_INLINE Proxyoperator->() const{
1482 if (!ahead_) {
1483  units_ = Impl::readAndInc(p_, p_, limit_);
1484  ahead_ =true;
1485  }
1486 return Proxy(units_);
1487  }
1488 
1489 U_FORCE_INLINEUTFIterator &operator++() {// pre-increment
1490 if (ahead_) {
1491 // operator*() called readAndInc() so p_ is already ahead.
1492  ahead_ =false;
1493  }else {
1494  Impl::inc(p_, limit_);
1495  }
1496 return *this;
1497  }
1498 
1499 U_FORCE_INLINE Proxyoperator++(int) {// post-increment
1500 if (ahead_) {
1501 // operator*() called readAndInc() so p_ is already ahead.
1502  ahead_ =false;
1503  }else {
1504  units_ = Impl::readAndInc(p_, p_, limit_);
1505 // keep this->ahead_ == false
1506  }
1507 return Proxy(units_);
1508  }
1509 
1510 private:
1511 // operator*() etc. are logically const.
1512 mutable UnitIter p_;
1513 // In a validating iterator, we need limit_ so that when we read a code point
1514 // we can test if there are enough code units.
1515  LimitIter limit_;
1516 // Keep state so that we call readAndInc() only once for both operator*() and ++
1517 // so that we can use a single-pass input iterator for UnitIter.
1518 mutable CodeUnits<CP32, UnitIter> units_ = {0, 0,false};
1519 // true: units_ = readAndInc(), p_ = units limit
1520 // which means that p_ is ahead of its logical position
1521 // false: initial state
1522 mutablebool ahead_ =false;
1523 };
1524 #endif// U_IN_DOXYGEN
1525 
1526 }// namespace U_HEADER_ONLY_NAMESPACE
1527 
1528 #ifndef U_IN_DOXYGEN
1529 // Bespoke specialization of reverse_iterator.
1530 // The default implementation implements reverse operator*() and ++ in a way
1531 // that does most of the same work twice for reading variable-length sequences.
1532 template<typename CP32, UTFIllFormedBehavior behavior,typename UnitIter>
1533 classstd::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter>> {
1534  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
1535 using Impl = U_HEADER_ONLY_NAMESPACE::UTFImpl<CP32, behavior, UnitIter>;
1536 using CodeUnits_ =U_HEADER_ONLY_NAMESPACE::CodeUnits<CP32, UnitIter>;
1537 
1538 // Proxy type for operator->() (required by LegacyInputIterator)
1539 // so that we don't promise always returning CodeUnits.
1540 classProxy {
1541 public:
1542 explicit Proxy(CodeUnits_ units) : units_(units) {}
1543  CodeUnits_ &operator*() {return units_; }
1544  CodeUnits_ *operator->() {return &units_; }
1545 private:
1546  CodeUnits_ units_;
1547  };
1548 
1549 public:
1550 using value_type = CodeUnits_;
1551 using reference = value_type;
1552 using pointer = Proxy;
1553 using difference_type =U_HEADER_ONLY_NAMESPACE::prv::iter_difference_t<UnitIter>;
1554 using iterator_category = std::bidirectional_iterator_tag;
1555 
1556 U_FORCE_INLINEexplicit reverse_iterator(U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter> iter) :
1557  p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
1558  units_(0, 0, false, p_, p_) {}
1559 U_FORCE_INLINE reverse_iterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
1560 
1561 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =default;
1562 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =default;
1563 
1564 U_FORCE_INLINE reverse_iterator(const reverse_iterator &other) =default;
1565 U_FORCE_INLINE reverse_iterator &operator=(const reverse_iterator &other) =default;
1566 
1567 U_FORCE_INLINEbooloperator==(const reverse_iterator &other) const{
1568 return getLogicalPosition() == other.getLogicalPosition();
1569  }
1570 U_FORCE_INLINEbooloperator!=(const reverse_iterator &other) const{return !operator==(other); }
1571 
1572 U_FORCE_INLINE CodeUnits_ operator*() const{
1573 if (state_ == 0) {
1574  units_ = Impl::decAndRead(start_, p_);
1575  state_ = -1;
1576  }
1577 return units_;
1578  }
1579 
1580 U_FORCE_INLINE Proxy operator->() const{
1581 if (state_ == 0) {
1582  units_ = Impl::decAndRead(start_, p_);
1583  state_ = -1;
1584  }
1585 return Proxy(units_);
1586  }
1587 
1588 U_FORCE_INLINE reverse_iterator &operator++() {// pre-increment
1589 if (state_ < 0) {
1590 // operator*() called decAndRead() so p_ is already behind.
1591  state_ = 0;
1592  }elseif (state_ == 0) {
1593  Impl::dec(start_, p_);
1594  }else/* state_ > 0 */ {
1595 // operator--() called readAndInc() so we know how far to skip.
1596  p_ = units_.begin();
1597  state_ = 0;
1598  }
1599 return *this;
1600  }
1601 
1602 U_FORCE_INLINE reverse_iterator operator++(int) {// post-increment
1603 if (state_ < 0) {
1604 // operator*() called decAndRead() so p_ is already behind.
1605  reverse_iterator result(*this);
1606  state_ = 0;
1607 return result;
1608  }elseif (state_ == 0) {
1609  units_ = Impl::decAndRead(start_, p_);
1610  reverse_iterator result(*this);
1611  result.state_ = -1;
1612 // keep this->state_ == 0
1613 return result;
1614  }else/* state_ > 0 */ {
1615  reverse_iterator result(*this);
1616 // operator--() called readAndInc() so we know how far to skip.
1617  p_ = units_.begin();
1618  state_ = 0;
1619 return result;
1620  }
1621  }
1622 
1623 U_FORCE_INLINE reverse_iterator &operator--() {// pre-decrement
1624 if (state_ < 0) {
1625 // operator*() called decAndRead() so p_ is behind the logical position.
1626  p_ = units_.end();
1627  }
1628  UnitIter p0 = p_;
1629  units_ = Impl::readAndInc(p0, p_, limit_);
1630  state_ = 1;
1631 return *this;
1632  }
1633 
1634 U_FORCE_INLINE reverse_iterator operator--(int) {// post-decrement
1635  reverse_iterator result(*this);
1636  operator--();
1637 return result;
1638  }
1639 
1640 private:
1641 U_FORCE_INLINE UnitIter getLogicalPosition() const{
1642 return state_ >= 0 ? p_ : units_.end();
1643  }
1644 
1645 // operator*() etc. are logically const.
1646 mutable UnitIter p_;
1647 // In a validating iterator, we need start_ & limit_ so that when we read a code point
1648 // (forward or backward) we can test if there are enough code units.
1649  UnitIter start_;
1650  UnitIter limit_;
1651 // Keep state so that we call decAndRead() only once for both operator*() and ++
1652 // to make it easy for the compiler to optimize.
1653 mutable CodeUnits_ units_;
1654 // >0: units_ = readAndInc(), p_ = units limit
1655 // 0: initial state
1656 // <0: units_ = decAndRead(), p_ = units start
1657 // which means that p_ is behind its logical position
1658 mutable int8_t state_ = 0;
1659 };
1660 #endif// U_IN_DOXYGEN
1661 
1662 namespaceU_HEADER_ONLY_NAMESPACE {
1663 
1686 template<typename CP32,UTFIllFormedBehavior behavior,
1687 typename UnitIter,typename LimitIter = UnitIter>
1688 autoutfIterator(UnitIter start, UnitIter p, LimitIter limit) {
1689 returnUTFIterator<CP32, behavior, UnitIter, LimitIter>(
1690  std::move(start), std::move(p), std::move(limit));
1691 }
1692 
1713 template<typename CP32,UTFIllFormedBehavior behavior,
1714 typename UnitIter,typename LimitIter = UnitIter>
1715 autoutfIterator(UnitIter p, LimitIter limit) {
1716 returnUTFIterator<CP32, behavior, UnitIter, LimitIter>(
1717  std::move(p), std::move(limit));
1718 }
1719 
1720 // Note: We should only enable the following factory function for a copyable UnitIter.
1721 // In C++17, we would have to partially specialize with enable_if_t testing for forward_iterator,
1722 // but a function template partial specialization is not allowed.
1723 // In C++20, we might be able to require the std::copyable concept.
1724 
1744 template<typename CP32, UTFIllFormedBehavior behavior,typename UnitIter>
1745 autoutfIterator(UnitIter p) {
1746 returnUTFIterator<CP32, behavior, UnitIter>(std::move(p));
1747 }
1748 
1776 template<typename CP32, UTFIllFormedBehavior behavior,typename Range>
1777 classUTFStringCodePoints {
1778  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
1779 public:
1784 UTFStringCodePoints() =default;
1785 
1791 template<typename R = Range,typename = std::enable_if_t<!std::is_reference_v<R>>>
1792 explicitUTFStringCodePoints(Range unitRange) : unitRange(std::move(unitRange)) {}
1801 template<typename R = Range,typename = std::enable_if_t<std::is_reference_v<R>>,typename =void>
1802 explicitUTFStringCodePoints(Range unitRange) : unitRange(unitRange) {}
1803 
1805 UTFStringCodePoints(constUTFStringCodePoints &other) =default;
1806 
1808 UTFStringCodePoints &operator=(constUTFStringCodePoints &other) =default;
1809 
1814 autobegin() {
1815 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1816  }
1817 
1822 template<typename R = Range,typename = std::enable_if_t<prv::range<const R>>>
1823 autobegin() const{
1824 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1825  }
1826 
1831 autoend() {
1832 using UnitIter = decltype(unitRange.begin());
1833 using LimitIter = decltype(unitRange.end());
1834 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1835 // Return the code unit sentinel.
1836 return unitRange.end();
1837  }elseif constexpr (prv::bidirectional_iterator<UnitIter>) {
1838 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1839  }else {
1840 // The input iterator specialization has no three-argument constructor.
1841 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1842  }
1843  }
1844 
1849 template<typename R = Range,typename = std::enable_if_t<prv::range<const R>>>
1850 autoend() const{
1851 using UnitIter = decltype(unitRange.begin());
1852 using LimitIter = decltype(unitRange.end());
1853 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1854 // Return the code unit sentinel.
1855 return unitRange.end();
1856  }elseif constexpr (prv::bidirectional_iterator<UnitIter>) {
1857 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1858  }else {
1859 // The input iterator specialization has no three-argument constructor.
1860 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1861  }
1862  }
1863 
1868 autorbegin() const{
1869 return std::make_reverse_iterator(end());
1870  }
1871 
1876 autorend() const{
1877 return std::make_reverse_iterator(begin());
1878  }
1879 
1880 private:
1881  Range unitRange;
1882 };
1883 
1885 template<typename CP32, UTFIllFormedBehavior behavior>
1886 structUTFStringCodePointsAdaptor
1887 #ifU_CPLUSPLUS_VERSION >= 23 && __cpp_lib_ranges >= 2022'02 && \
1888  __cpp_lib_bind_back >= 2022'02// http://wg21.link/P2387R3.
1889  : std::ranges::range_adaptor_closure<UTFStringCodePointsAdaptor<CP32, behavior>>
1890 #endif
1891 {
1893 template<typename Range>
1894 autooperator()(Range &&unitRange) const{
1895 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10// We need https://wg21.link/P2415R2.
1896 returnUTFStringCodePoints<CP32, behavior, std::ranges::views::all_t<Range>>(
1897  std::forward<Range>(unitRange));
1898 #else
1899 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
1900 // Take basic_string_view by copy, not by reference. In C++20 this is handled by
1901 // all_t<Range>, which is Range if Range is a view.
1902 returnUTFStringCodePoints<CP32, behavior, std::decay_t<Range>>(
1903  std::forward<Range>(unitRange));
1904  }else {
1905 returnUTFStringCodePoints<CP32, behavior, Range>(std::forward<Range>(unitRange));
1906  }
1907 #endif
1908  }
1909 };
1910 
1925 template<typename CP32, UTFIllFormedBehavior behavior>
1926 constexprUTFStringCodePointsAdaptor<CP32, behavior>utfStringCodePoints;
1927 
1928 // Non-validating iterators ------------------------------------------------ ***
1929 
1951 template<typename CP32,typename UnitIter,typename =void>
1952 classUnsafeUTFIterator {
1953  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
1954 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
1955 
1956 // Proxy type for operator->() (required by LegacyInputIterator)
1957 // so that we don't promise always returning UnsafeCodeUnits.
1958 classProxy {
1959 public:
1960 explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
1961 UnsafeCodeUnits<CP32, UnitIter> &operator*() {return units_; }
1962 UnsafeCodeUnits<CP32, UnitIter> *operator->() {return &units_; }
1963 private:
1964 UnsafeCodeUnits<CP32, UnitIter> units_;
1965  };
1966 
1967 public:
1969 usingvalue_type =UnsafeCodeUnits<CP32, UnitIter>;
1971 usingreference =value_type;
1973 usingpointer = Proxy;
1975 usingdifference_type =prv::iter_difference_t<UnitIter>;
1977 usingiterator_category = std::conditional_t<
1978  prv::bidirectional_iterator<UnitIter>,
1979  std::bidirectional_iterator_tag,
1980  std::forward_iterator_tag>;
1981 
1991 U_FORCE_INLINEexplicitUnsafeUTFIterator(UnitIter p) : p_(p), units_(0, 0, p, p) {}
1997 U_FORCE_INLINEUnsafeUTFIterator() : p_{}, units_(0, 0, p_, p_) {}
1998 
2000 U_FORCE_INLINEUnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept =default;
2002 U_FORCE_INLINEUnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept =default;
2003 
2005 U_FORCE_INLINEUnsafeUTFIterator(constUnsafeUTFIterator &other) =default;
2007 U_FORCE_INLINEUnsafeUTFIterator &operator=(constUnsafeUTFIterator &other) =default;
2008 
2014 U_FORCE_INLINEbooloperator==(constUnsafeUTFIterator &other) const{
2015 return getLogicalPosition() == other.getLogicalPosition();
2016  }
2022 U_FORCE_INLINEbooloperator!=(constUnsafeUTFIterator &other) const{return !operator==(other); }
2023 
2030 template<typename Sentinel>U_FORCE_INLINEfriend
2031  std::enable_if_t<
2032  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2033 bool>
2034 operator==(constUnsafeUTFIterator &iter,const Sentinel &s) {
2035 return iter.getLogicalPosition() == s;
2036  }
2037 
2038 #if U_CPLUSPLUS_VERSION < 20
2045 template<typename Sentinel>U_FORCE_INLINEfriend
2046  std::enable_if_t<
2047  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2048 bool>
2049 operator==(const Sentinel &s,constUnsafeUTFIterator &iter) {
2050 return iter.getLogicalPosition() == s;
2051  }
2058 template<typename Sentinel>U_FORCE_INLINEfriend
2059  std::enable_if_t<
2060  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2061 bool>
2062 operator!=(constUnsafeUTFIterator &iter,const Sentinel &s) {return !(iter == s); }
2069 template<typename Sentinel>U_FORCE_INLINEfriend
2070  std::enable_if_t<
2071  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2072 bool>
2073 operator!=(const Sentinel &s,constUnsafeUTFIterator &iter) {return !(iter == s); }
2074 #endif// C++17
2075 
2082 U_FORCE_INLINEUnsafeCodeUnits<CP32, UnitIter>operator*() const{
2083 if (state_ == 0) {
2084  UnitIter p0 = p_;
2085  units_ = Impl::readAndInc(p0, p_);
2086  state_ = 1;
2087  }
2088 return units_;
2089  }
2090 
2099 U_FORCE_INLINE Proxyoperator->() const{
2100 if (state_ == 0) {
2101  UnitIter p0 = p_;
2102  units_ = Impl::readAndInc(p0, p_);
2103  state_ = 1;
2104  }
2105 return Proxy(units_);
2106  }
2107 
2114 U_FORCE_INLINEUnsafeUTFIterator &operator++() {// pre-increment
2115 if (state_ > 0) {
2116 // operator*() called readAndInc() so p_ is already ahead.
2117  state_ = 0;
2118  }elseif (state_ == 0) {
2119  Impl::inc(p_);
2120  }else/* state_ < 0 */ {
2121 // operator--() called decAndRead() so we know how far to skip.
2122  p_ = units_.end();
2123  state_ = 0;
2124  }
2125 return *this;
2126  }
2127 
2136 U_FORCE_INLINEUnsafeUTFIteratoroperator++(int) {// post-increment
2137 if (state_ > 0) {
2138 // operator*() called readAndInc() so p_ is already ahead.
2139 UnsafeUTFIterator result(*this);
2140  state_ = 0;
2141 return result;
2142  }elseif (state_ == 0) {
2143  UnitIter p0 = p_;
2144  units_ = Impl::readAndInc(p0, p_);
2145 UnsafeUTFIterator result(*this);
2146  result.state_ = 1;
2147 // keep this->state_ == 0
2148 return result;
2149  }else/* state_ < 0 */ {
2150 UnsafeUTFIterator result(*this);
2151 // operator--() called decAndRead() so we know how far to skip.
2152  p_ = units_.end();
2153  state_ = 0;
2154 return result;
2155  }
2156  }
2157 
2165 template<typename Iter = UnitIter>
2166 U_FORCE_INLINE
2167  std::enable_if_t<prv::bidirectional_iterator<Iter>,UnsafeUTFIterator &>
2168 operator--() {// pre-decrement
2169 if (state_ > 0) {
2170 // operator*() called readAndInc() so p_ is ahead of the logical position.
2171  p_ = units_.begin();
2172  }
2173  units_ = Impl::decAndRead(p_);
2174  state_ = -1;
2175 return *this;
2176  }
2177 
2185 template<typename Iter = UnitIter>
2186 U_FORCE_INLINE
2187  std::enable_if_t<prv::bidirectional_iterator<Iter>,UnsafeUTFIterator>
2188 operator--(int) {// post-decrement
2189 UnsafeUTFIterator result(*this);
2190 operator--();
2191 return result;
2192  }
2193 
2194 private:
2195 friendclassstd::reverse_iterator<UnsafeUTFIterator<CP32, UnitIter>>;
2196 
2197 U_FORCE_INLINE UnitIter getLogicalPosition() const{
2198 return state_ <= 0 ? p_ : units_.begin();
2199  }
2200 
2201 // operator*() etc. are logically const.
2202 mutable UnitIter p_;
2203 // Keep state so that we call readAndInc() only once for both operator*() and ++
2204 // to make it easy for the compiler to optimize.
2205 mutable UnsafeCodeUnits<CP32, UnitIter> units_;
2206 // >0: units_ = readAndInc(), p_ = units limit
2207 // which means that p_ is ahead of its logical position
2208 // 0: initial state
2209 // <0: units_ = decAndRead(), p_ = units start
2210 mutable int8_t state_ = 0;
2211 };
2212 
2213 #ifndef U_IN_DOXYGEN
2214 // Partial template specialization for single-pass input iterator.
2215 template<typename CP32,typename UnitIter>
2216 classUnsafeUTFIterator<
2217  CP32,
2218  UnitIter,
2219  std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
2220  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
2221 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
2222 
2223 // Proxy type for post-increment return value, to make *iter++ work.
2224 // Also for operator->() (required by LegacyInputIterator)
2225 // so that we don't promise always returning UnsafeCodeUnits.
2226 classProxy {
2227 public:
2228 explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
2229  UnsafeCodeUnits<CP32, UnitIter> &operator*() {return units_; }
2230  UnsafeCodeUnits<CP32, UnitIter> *operator->() {return &units_; }
2231 private:
2232  UnsafeCodeUnits<CP32, UnitIter> units_;
2233  };
2234 
2235 public:
2236 usingvalue_type = UnsafeCodeUnits<CP32, UnitIter>;
2237 usingreference =value_type;
2238 usingpointer = Proxy;
2239 usingdifference_type = prv::iter_difference_t<UnitIter>;
2240 usingiterator_category = std::input_iterator_tag;
2241 
2242 U_FORCE_INLINEexplicitUnsafeUTFIterator(UnitIter p) : p_(std::move(p)) {}
2243 
2244 U_FORCE_INLINEUnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept =default;
2245 U_FORCE_INLINEUnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept =default;
2246 
2247 U_FORCE_INLINEUnsafeUTFIterator(constUnsafeUTFIterator &other) =default;
2248 U_FORCE_INLINEUnsafeUTFIterator &operator=(constUnsafeUTFIterator &other) =default;
2249 
2250 U_FORCE_INLINEbooloperator==(constUnsafeUTFIterator &other) const{
2251 return p_ == other.p_ && ahead_ == other.ahead_;
2252 // Strictly speaking, we should check if the logical position is the same.
2253 // However, we cannot advance, or do arithmetic with, a single-pass UnitIter.
2254  }
2255 U_FORCE_INLINEbooloperator!=(constUnsafeUTFIterator &other) const{return !operator==(other); }
2256 
2257 template<typename Sentinel>U_FORCE_INLINEfriend
2258  std::enable_if_t<
2259  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2260 bool>
2261 operator==(constUnsafeUTFIterator &iter,const Sentinel &s) {
2262 return !iter.ahead_ && iter.p_ == s;
2263  }
2264 
2265 #if U_CPLUSPLUS_VERSION < 20
2266 template<typename Sentinel>U_FORCE_INLINEfriend
2267  std::enable_if_t<
2268  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2269 bool>
2270 operator==(const Sentinel &s,constUnsafeUTFIterator &iter) {
2271 return !iter.ahead_ && iter.p_ == s;
2272  }
2273 
2274 template<typename Sentinel>U_FORCE_INLINEfriend
2275  std::enable_if_t<
2276  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2277 bool>
2278 operator!=(constUnsafeUTFIterator &iter,const Sentinel &s) {return !(iter == s); }
2279 
2280 template<typename Sentinel>U_FORCE_INLINEfriend
2281  std::enable_if_t<
2282  !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2283 bool>
2284 operator!=(const Sentinel &s,constUnsafeUTFIterator &iter) {return !(iter == s); }
2285 #endif// C++17
2286 
2287 U_FORCE_INLINE UnsafeCodeUnits<CP32, UnitIter>operator*() const{
2288 if (!ahead_) {
2289  units_ = Impl::readAndInc(p_, p_);
2290  ahead_ =true;
2291  }
2292 return units_;
2293  }
2294 
2295 U_FORCE_INLINE Proxyoperator->() const{
2296 if (!ahead_) {
2297  units_ = Impl::readAndInc(p_, p_);
2298  ahead_ =true;
2299  }
2300 return Proxy(units_);
2301  }
2302 
2303 U_FORCE_INLINEUnsafeUTFIterator &operator++() {// pre-increment
2304 if (ahead_) {
2305 // operator*() called readAndInc() so p_ is already ahead.
2306  ahead_ =false;
2307  }else {
2308  Impl::inc(p_);
2309  }
2310 return *this;
2311  }
2312 
2313 U_FORCE_INLINE Proxyoperator++(int) {// post-increment
2314 if (ahead_) {
2315 // operator*() called readAndInc() so p_ is already ahead.
2316  ahead_ =false;
2317  }else {
2318  units_ = Impl::readAndInc(p_, p_);
2319 // keep this->ahead_ == false
2320  }
2321 return Proxy(units_);
2322  }
2323 
2324 private:
2325 // operator*() etc. are logically const.
2326 mutable UnitIter p_;
2327 // Keep state so that we call readAndInc() only once for both operator*() and ++
2328 // so that we can use a single-pass input iterator for UnitIter.
2329 mutable UnsafeCodeUnits<CP32, UnitIter> units_ = {0, 0};
2330 // true: units_ = readAndInc(), p_ = units limit
2331 // which means that p_ is ahead of its logical position
2332 // false: initial state
2333 mutablebool ahead_ =false;
2334 };
2335 #endif// U_IN_DOXYGEN
2336 
2337 }// namespace U_HEADER_ONLY_NAMESPACE
2338 
2339 #ifndef U_IN_DOXYGEN
2340 // Bespoke specialization of reverse_iterator.
2341 // The default implementation implements reverse operator*() and ++ in a way
2342 // that does most of the same work twice for reading variable-length sequences.
2343 template<typename CP32,typename UnitIter>
2344 classstd::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter>> {
2345  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
2346 using Impl = U_HEADER_ONLY_NAMESPACE::UnsafeUTFImpl<CP32, UnitIter>;
2347 using UnsafeCodeUnits_ =U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits<CP32, UnitIter>;
2348 
2349 // Proxy type for operator->() (required by LegacyInputIterator)
2350 // so that we don't promise always returning UnsafeCodeUnits.
2351 classProxy {
2352 public:
2353 explicit Proxy(UnsafeCodeUnits_ units) : units_(units) {}
2354  UnsafeCodeUnits_ &operator*() {return units_; }
2355  UnsafeCodeUnits_ *operator->() {return &units_; }
2356 private:
2357  UnsafeCodeUnits_ units_;
2358  };
2359 
2360 public:
2361 using value_type = UnsafeCodeUnits_;
2362 using reference = value_type;
2363 using pointer = Proxy;
2364 using difference_type =U_HEADER_ONLY_NAMESPACE::prv::iter_difference_t<UnitIter>;
2365 using iterator_category = std::bidirectional_iterator_tag;
2366 
2367 U_FORCE_INLINEexplicit reverse_iterator(U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter> iter) :
2368  p_(iter.getLogicalPosition()), units_(0, 0, p_, p_) {}
2369 U_FORCE_INLINE reverse_iterator() : p_{}, units_(0, 0, p_, p_) {}
2370 
2371 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =default;
2372 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =default;
2373 
2374 U_FORCE_INLINE reverse_iterator(const reverse_iterator &other) =default;
2375 U_FORCE_INLINE reverse_iterator &operator=(const reverse_iterator &other) =default;
2376 
2377 U_FORCE_INLINEbooloperator==(const reverse_iterator &other) const{
2378 return getLogicalPosition() == other.getLogicalPosition();
2379  }
2380 U_FORCE_INLINEbooloperator!=(const reverse_iterator &other) const{return !operator==(other); }
2381 
2382 U_FORCE_INLINE UnsafeCodeUnits_ operator*() const{
2383 if (state_ == 0) {
2384  units_ = Impl::decAndRead(p_);
2385  state_ = -1;
2386  }
2387 return units_;
2388  }
2389 
2390 U_FORCE_INLINE Proxy operator->() const{
2391 if (state_ == 0) {
2392  units_ = Impl::decAndRead(p_);
2393  state_ = -1;
2394  }
2395 return Proxy(units_);
2396  }
2397 
2398 U_FORCE_INLINE reverse_iterator &operator++() {// pre-increment
2399 if (state_ < 0) {
2400 // operator*() called decAndRead() so p_ is already behind.
2401  state_ = 0;
2402  }elseif (state_ == 0) {
2403  Impl::dec(p_);
2404  }else/* state_ > 0 */ {
2405 // operator--() called readAndInc() so we know how far to skip.
2406  p_ = units_.begin();
2407  state_ = 0;
2408  }
2409 return *this;
2410  }
2411 
2412 U_FORCE_INLINE reverse_iterator operator++(int) {// post-increment
2413 if (state_ < 0) {
2414 // operator*() called decAndRead() so p_ is already behind.
2415  reverse_iterator result(*this);
2416  state_ = 0;
2417 return result;
2418  }elseif (state_ == 0) {
2419  units_ = Impl::decAndRead(p_);
2420  reverse_iterator result(*this);
2421  result.state_ = -1;
2422 // keep this->state_ == 0
2423 return result;
2424  }else/* state_ > 0 */ {
2425  reverse_iterator result(*this);
2426 // operator--() called readAndInc() so we know how far to skip.
2427  p_ = units_.begin();
2428  state_ = 0;
2429 return result;
2430  }
2431  }
2432 
2433 U_FORCE_INLINE reverse_iterator &operator--() {// pre-decrement
2434 if (state_ < 0) {
2435 // operator*() called decAndRead() so p_ is behind the logical position.
2436  p_ = units_.end();
2437  }
2438  UnitIter p0 = p_;
2439  units_ = Impl::readAndInc(p0, p_);
2440  state_ = 1;
2441 return *this;
2442  }
2443 
2444 U_FORCE_INLINE reverse_iterator operator--(int) {// post-decrement
2445  reverse_iterator result(*this);
2446  operator--();
2447 return result;
2448  }
2449 
2450 private:
2451 U_FORCE_INLINE UnitIter getLogicalPosition() const{
2452 return state_ >= 0 ? p_ : units_.end();
2453  }
2454 
2455 // operator*() etc. are logically const.
2456 mutable UnitIter p_;
2457 // Keep state so that we call decAndRead() only once for both operator*() and ++
2458 // to make it easy for the compiler to optimize.
2459 mutable UnsafeCodeUnits_ units_;
2460 // >0: units_ = readAndInc(), p_ = units limit
2461 // 0: initial state
2462 // <0: units_ = decAndRead(), p_ = units start
2463 // which means that p_ is behind its logical position
2464 mutable int8_t state_ = 0;
2465 };
2466 #endif// U_IN_DOXYGEN
2467 
2468 namespaceU_HEADER_ONLY_NAMESPACE {
2469 
2485 template<typename CP32,typename UnitIter>
2486 autounsafeUTFIterator(UnitIter iter) {
2487 returnUnsafeUTFIterator<CP32, UnitIter>(std::move(iter));
2488 }
2489 
2517 template<typename CP32,typename Range>
2518 classUnsafeUTFStringCodePoints {
2519  static_assert(sizeof(CP32) == 4,"CP32 must be a 32-bit type to hold a code point");
2520 public:
2525 UnsafeUTFStringCodePoints() =default;
2526 
2532 template<typename R = Range,typename = std::enable_if_t<!std::is_reference_v<R>>>
2533 explicitUnsafeUTFStringCodePoints(Range unitRange) : unitRange(std::move(unitRange)) {}
2542 template<typename R = Range,typename = std::enable_if_t<std::is_reference_v<R>>,typename =void>
2543 explicitUnsafeUTFStringCodePoints(Range unitRange) : unitRange(unitRange) {}
2544 
2546 UnsafeUTFStringCodePoints(constUnsafeUTFStringCodePoints &other) =default;
2547 
2549 UnsafeUTFStringCodePoints &operator=(constUnsafeUTFStringCodePoints &other) =default;
2550 
2555 autobegin() {
2556 return unsafeUTFIterator<CP32>(unitRange.begin());
2557  }
2558 
2563 template<typename R = Range,typename = std::enable_if_t<prv::range<const R>>>
2564 autobegin() const{
2565 return unsafeUTFIterator<CP32>(unitRange.begin());
2566  }
2567 
2572 autoend() {
2573 using UnitIter = decltype(unitRange.begin());
2574 using LimitIter = decltype(unitRange.end());
2575 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2576 // Return the code unit sentinel.
2577 return unitRange.end();
2578  }else {
2579 return unsafeUTFIterator<CP32>(unitRange.end());
2580  }
2581  }
2582 
2587 template<typename R = Range,typename = std::enable_if_t<prv::range<const R>>>
2588 autoend() const{
2589 using UnitIter = decltype(unitRange.begin());
2590 using LimitIter = decltype(unitRange.end());
2591 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2592 // Return the code unit sentinel.
2593 return unitRange.end();
2594  }else {
2595 return unsafeUTFIterator<CP32>(unitRange.end());
2596  }
2597  }
2598 
2603 autorbegin() const{
2604 return std::make_reverse_iterator(end());
2605  }
2606 
2611 autorend() const{
2612 return std::make_reverse_iterator(begin());
2613  }
2614 
2615 private:
2616  Range unitRange;
2617 };
2618 
2620 template<typename CP32>
2621 structUnsafeUTFStringCodePointsAdaptor
2622 #ifU_CPLUSPLUS_VERSION >= 23 && __cpp_lib_ranges >= 2022'02 && \
2623  __cpp_lib_bind_back >= 2022'02// http://wg21.link/P2387R3.
2624  : std::ranges::range_adaptor_closure<UnsafeUTFStringCodePointsAdaptor<CP32>>
2625 #endif
2626 {
2628 template<typename Range>
2629 autooperator()(Range &&unitRange) const{
2630 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10// We need https://wg21.link/P2415R2.
2631 returnUnsafeUTFStringCodePoints<CP32, std::ranges::views::all_t<Range>>(std::forward<Range>(unitRange));
2632 #else
2633 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
2634 // Take basic_string_view by copy, not by reference. In C++20 this is handled by
2635 // all_t<Range>, which is Range if Range is a view.
2636 returnUnsafeUTFStringCodePoints<CP32, std::decay_t<Range>>(std::forward<Range>(unitRange));
2637  }else {
2638 returnUnsafeUTFStringCodePoints<CP32, Range>(std::forward<Range>(unitRange));
2639  }
2640 #endif
2641  }
2642 };
2643 
2644 
2657 template<typename CP32>
2658 constexprUnsafeUTFStringCodePointsAdaptor<CP32>unsafeUTFStringCodePoints;
2659 
2660 }// namespace U_HEADER_ONLY_NAMESPACE
2661 
2662 
2663 #if defined(__cpp_lib_ranges)
2664 template <typename CP32, UTFIllFormedBehavior behavior,typename Range>
2665 constexprbool std::ranges::enable_borrowed_range<
2666 U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints<CP32, behavior, Range>> =
2667  std::ranges::enable_borrowed_range<Range>;
2668 
2669 template <typename CP32,typename Range>
2670 constexprbool std::ranges::enable_borrowed_range<
2671 U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints<CP32, Range>> =
2672  std::ranges::enable_borrowed_range<Range>;
2673 #endif
2674 
2675 #endif// U_HIDE_DRAFT_API
2676 #endif// U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
2677 #endif// __UTFITERATOR_H__
U_HEADER_ONLY_NAMESPACE::AllCodePoints
A C++ "range" over all Unicode code points U+0000..U+10FFFF.
Definition:utfiterator.h:302
U_HEADER_ONLY_NAMESPACE::AllCodePoints::begin
auto begin() const
Definition:utfiterator.h:312
U_HEADER_ONLY_NAMESPACE::AllCodePoints::AllCodePoints
AllCodePoints()
Constructor.
Definition:utfiterator.h:306
U_HEADER_ONLY_NAMESPACE::AllCodePoints::end
auto end() const
Definition:utfiterator.h:317
U_HEADER_ONLY_NAMESPACE::AllScalarValues
A C++ "range" over all Unicode scalar values U+0000..U+D7FF & U+E000..U+10FFFF.
Definition:utfiterator.h:333
U_HEADER_ONLY_NAMESPACE::AllScalarValues::begin
auto begin() const
Definition:utfiterator.h:343
U_HEADER_ONLY_NAMESPACE::AllScalarValues::AllScalarValues
AllScalarValues()
Constructor.
Definition:utfiterator.h:337
U_HEADER_ONLY_NAMESPACE::AllScalarValues::end
auto end() const
Definition:utfiterator.h:348
U_HEADER_ONLY_NAMESPACE::CodeUnits
Result of validating and decoding a code unit sequence for one code point.
Definition:utfiterator.h:487
U_HEADER_ONLY_NAMESPACE::CodeUnits::wellFormed
bool wellFormed() const
Definition:utfiterator.h:502
U_HEADER_ONLY_NAMESPACE::CodeUnits::operator=
CodeUnits & operator=(const CodeUnits &other)=default
Copy assignment operator.
U_HEADER_ONLY_NAMESPACE::CodeUnits::CodeUnits
CodeUnits(const CodeUnits &other)=default
Copy constructor.
U_HEADER_ONLY_NAMESPACE::CodeUnits::CodeUnits
CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, UnitIter start, UnitIter limit)
Definition:utfiterator.h:490
U_HEADER_ONLY_NAMESPACE::UTFIterator
Validating iterator over the code points in a Unicode string.
Definition:utfiterator.h:1094
U_HEADER_ONLY_NAMESPACE::UTFIterator::UTFIterator
U_FORCE_INLINE UTFIterator()
Default constructor.
Definition:utfiterator.h:1169
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator==
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const Sentinel &s, const UTFIterator &iter)
Definition:utfiterator.h:1227
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator--
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UTFIterator > operator--(int)
Post-decrement operator.
Definition:utfiterator.h:1366
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator->
U_FORCE_INLINE Proxy operator->() const
Decodes the code unit sequence at the current position.
Definition:utfiterator.h:1277
U_HEADER_ONLY_NAMESPACE::UTFIterator::reference
value_type reference
C++ iterator boilerplate.
Definition:utfiterator.h:1113
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator=
U_FORCE_INLINE UTFIterator & operator=(UTFIterator &&src) noexcept=default
Move assignment operator.
U_HEADER_ONLY_NAMESPACE::UTFIterator::value_type
CodeUnits< CP32, UnitIter > value_type
C++ iterator boilerplate.
Definition:utfiterator.h:1111
U_HEADER_ONLY_NAMESPACE::UTFIterator::UTFIterator
U_FORCE_INLINE UTFIterator(UTFIterator &&src) noexcept=default
Move constructor.
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator!=
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const UTFIterator &iter, const Sentinel &s)
Definition:utfiterator.h:1240
U_HEADER_ONLY_NAMESPACE::UTFIterator::iterator_category
std::conditional_t< prv::bidirectional_iterator< UnitIter >, std::bidirectional_iterator_tag, std::forward_iterator_tag > iterator_category
C++ iterator boilerplate.
Definition:utfiterator.h:1122
U_HEADER_ONLY_NAMESPACE::UTFIterator::UTFIterator
U_FORCE_INLINE UTFIterator(UnitIter p)
Constructs an iterator start or limit sentinel.
Definition:utfiterator.h:1163
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator!=
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const Sentinel &s, const UTFIterator &iter)
Definition:utfiterator.h:1251
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator==
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const UTFIterator &iter, const Sentinel &s)
Definition:utfiterator.h:1208
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator--
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UTFIterator & > operator--()
Pre-decrement operator.
Definition:utfiterator.h:1346
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator!=
U_FORCE_INLINE bool operator!=(const UTFIterator &other) const
Definition:utfiterator.h:1194
U_HEADER_ONLY_NAMESPACE::UTFIterator::pointer
Proxy pointer
C++ iterator boilerplate.
Definition:utfiterator.h:1115
U_HEADER_ONLY_NAMESPACE::UTFIterator::UTFIterator
U_FORCE_INLINE UTFIterator(UnitIter start, UnitIter p, LimitIter limit)
Constructor with start <= p < limit.
Definition:utfiterator.h:1137
U_HEADER_ONLY_NAMESPACE::UTFIterator::UTFIterator
U_FORCE_INLINE UTFIterator(const UTFIterator &other)=default
Copy constructor.
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator*
U_FORCE_INLINE CodeUnits< CP32, UnitIter > operator*() const
Decodes the code unit sequence at the current position.
Definition:utfiterator.h:1260
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator++
U_FORCE_INLINE UTFIterator operator++(int)
Post-increment operator.
Definition:utfiterator.h:1314
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator++
U_FORCE_INLINE UTFIterator & operator++()
Pre-increment operator.
Definition:utfiterator.h:1292
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator=
U_FORCE_INLINE UTFIterator & operator=(const UTFIterator &other)=default
Copy assignment operator.
U_HEADER_ONLY_NAMESPACE::UTFIterator::operator==
U_FORCE_INLINE bool operator==(const UTFIterator &other) const
Definition:utfiterator.h:1186
U_HEADER_ONLY_NAMESPACE::UTFIterator::UTFIterator
U_FORCE_INLINE UTFIterator(UnitIter p, LimitIter limit)
Constructor with start == p < limit.
Definition:utfiterator.h:1150
U_HEADER_ONLY_NAMESPACE::UTFIterator::difference_type
prv::iter_difference_t< UnitIter > difference_type
C++ iterator boilerplate.
Definition:utfiterator.h:1117
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints
A C++ "range" for validating iteration over all of the code points of a code unit range.
Definition:utfiterator.h:1777
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::begin
auto begin() const
Definition:utfiterator.h:1823
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::begin
auto begin()
Definition:utfiterator.h:1814
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::UTFStringCodePoints
UTFStringCodePoints()=default
Constructs an empty C++ "range" object.
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::rend
auto rend() const
Definition:utfiterator.h:1876
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::rbegin
auto rbegin() const
Definition:utfiterator.h:1868
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::operator=
UTFStringCodePoints & operator=(const UTFStringCodePoints &other)=default
Copy assignment operator.
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::UTFStringCodePoints
UTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string.
Definition:utfiterator.h:1792
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::end
auto end()
Definition:utfiterator.h:1831
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::UTFStringCodePoints
UTFStringCodePoints(const UTFStringCodePoints &other)=default
Copy constructor.
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::UTFStringCodePoints
UTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string, keeping a reference to the code u...
Definition:utfiterator.h:1802
U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints::end
auto end() const
Definition:utfiterator.h:1850
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits
Result of decoding a code unit sequence for one code point.
Definition:utfiterator.h:367
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::stringView
std::enable_if_t< std::is_pointer_v< Iter >||std::is_same_v< Iter, typename std::basic_string< Unit >::iterator >||std::is_same_v< Iter, typename std::basic_string< Unit >::const_iterator >||std::is_same_v< Iter, typename std::basic_string_view< Unit >::iterator >||std::is_same_v< Iter, typename std::basic_string_view< Unit >::const_iterator >, std::basic_string_view< Unit > > stringView() const
Definition:utfiterator.h:432
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::begin
UnitIter begin() const
Definition:utfiterator.h:394
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::end
UnitIter end() const
Definition:utfiterator.h:401
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::codePoint
CP32 codePoint() const
Definition:utfiterator.h:387
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::operator=
UnsafeCodeUnits & operator=(const UnsafeCodeUnits &other)=default
Copy assignment operator.
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::UnsafeCodeUnits
UnsafeCodeUnits(CP32 codePoint, uint8_t length, UnitIter start, UnitIter limit)
Definition:utfiterator.h:372
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::length
uint8_t length() const
Definition:utfiterator.h:407
U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits::UnsafeCodeUnits
UnsafeCodeUnits(const UnsafeCodeUnits &other)=default
Copy constructor.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator
Non-validating iterator over the code points in a Unicode string.
Definition:utfiterator.h:1952
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator--
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UnsafeUTFIterator & > operator--()
Pre-decrement operator.
Definition:utfiterator.h:2168
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator==
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const Sentinel &s, const UnsafeUTFIterator &iter)
Definition:utfiterator.h:2049
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::UnsafeUTFIterator
U_FORCE_INLINE UnsafeUTFIterator()
Default constructor.
Definition:utfiterator.h:1997
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::value_type
UnsafeCodeUnits< CP32, UnitIter > value_type
C++ iterator boilerplate.
Definition:utfiterator.h:1969
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator!=
U_FORCE_INLINE bool operator!=(const UnsafeUTFIterator &other) const
Definition:utfiterator.h:2022
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator=
U_FORCE_INLINE UnsafeUTFIterator & operator=(const UnsafeUTFIterator &other)=default
Copy assignment operator.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::reference
value_type reference
C++ iterator boilerplate.
Definition:utfiterator.h:1971
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator!=
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const UnsafeUTFIterator &iter, const Sentinel &s)
Definition:utfiterator.h:2062
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator*
U_FORCE_INLINE UnsafeCodeUnits< CP32, UnitIter > operator*() const
Decodes the code unit sequence at the current position.
Definition:utfiterator.h:2082
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::iterator_category
std::conditional_t< prv::bidirectional_iterator< UnitIter >, std::bidirectional_iterator_tag, std::forward_iterator_tag > iterator_category
C++ iterator boilerplate.
Definition:utfiterator.h:1980
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator==
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator==(const UnsafeUTFIterator &iter, const Sentinel &s)
Definition:utfiterator.h:2034
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::pointer
Proxy pointer
C++ iterator boilerplate.
Definition:utfiterator.h:1973
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::difference_type
prv::iter_difference_t< UnitIter > difference_type
C++ iterator boilerplate.
Definition:utfiterator.h:1975
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator--
U_FORCE_INLINE std::enable_if_t< prv::bidirectional_iterator< Iter >, UnsafeUTFIterator > operator--(int)
Post-decrement operator.
Definition:utfiterator.h:2188
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator->
U_FORCE_INLINE Proxy operator->() const
Decodes the code unit sequence at the current position.
Definition:utfiterator.h:2099
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::UnsafeUTFIterator
U_FORCE_INLINE UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept=default
Move constructor.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator++
U_FORCE_INLINE UnsafeUTFIterator operator++(int)
Post-increment operator.
Definition:utfiterator.h:2136
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::UnsafeUTFIterator
U_FORCE_INLINE UnsafeUTFIterator(const UnsafeUTFIterator &other)=default
Copy constructor.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator++
U_FORCE_INLINE UnsafeUTFIterator & operator++()
Pre-increment operator.
Definition:utfiterator.h:2114
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::UnsafeUTFIterator
U_FORCE_INLINE UnsafeUTFIterator(UnitIter p)
Constructor; the iterator/pointer should be at a code point boundary.
Definition:utfiterator.h:1991
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator!=
U_FORCE_INLINE friend std::enable_if_t< !std::is_same_v< Sentinel, UnsafeUTFIterator > &&!std::is_same_v< Sentinel, UnitIter >, bool > operator!=(const Sentinel &s, const UnsafeUTFIterator &iter)
Definition:utfiterator.h:2073
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator==
U_FORCE_INLINE bool operator==(const UnsafeUTFIterator &other) const
Definition:utfiterator.h:2014
U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator::operator=
U_FORCE_INLINE UnsafeUTFIterator & operator=(UnsafeUTFIterator &&src) noexcept=default
Move assignment operator.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints
A C++ "range" for non-validating iteration over all of the code points of a code unit range.
Definition:utfiterator.h:2518
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::rend
auto rend() const
Definition:utfiterator.h:2611
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::UnsafeUTFStringCodePoints
UnsafeUTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string, keeping a reference to the code u...
Definition:utfiterator.h:2543
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::end
auto end()
Definition:utfiterator.h:2572
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::UnsafeUTFStringCodePoints
UnsafeUTFStringCodePoints(const UnsafeUTFStringCodePoints &other)=default
Copy constructor.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::UnsafeUTFStringCodePoints
UnsafeUTFStringCodePoints()=default
Constructs an empty C++ "range" object.
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::begin
auto begin() const
Definition:utfiterator.h:2564
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::begin
auto begin()
Definition:utfiterator.h:2555
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::UnsafeUTFStringCodePoints
UnsafeUTFStringCodePoints(Range unitRange)
Constructs a C++ "range" object over the code points in the string.
Definition:utfiterator.h:2533
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::end
auto end() const
Definition:utfiterator.h:2588
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::rbegin
auto rbegin() const
Definition:utfiterator.h:2603
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints::operator=
UnsafeUTFStringCodePoints & operator=(const UnsafeUTFStringCodePoints &other)=default
Copy assignment operator.
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator
Definition:utfiterator.h:248
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::difference_type
int32_t difference_type
C++ iterator boilerplate.
Definition:utfiterator.h:258
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::operator==
bool operator==(const CodePointsIterator &other) const
Definition:utfiterator.h:265
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::operator!=
bool operator!=(const CodePointsIterator &other) const
Definition:utfiterator.h:267
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::reference
value_type reference
C++ iterator boilerplate.
Definition:utfiterator.h:254
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::operator*
CP32 operator*() const
Definition:utfiterator.h:269
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::value_type
CP32 value_type
C++ iterator boilerplate.
Definition:utfiterator.h:252
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::CodePointsIterator
CodePointsIterator(CP32 c)
Definition:utfiterator.h:263
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::operator++
CodePointsIterator & operator++()
Definition:utfiterator.h:271
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::iterator_category
std::forward_iterator_tag iterator_category
C++ iterator boilerplate.
Definition:utfiterator.h:260
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::operator++
CodePointsIterator operator++(int)
Definition:utfiterator.h:279
U_HEADER_ONLY_NAMESPACE::prv::CodePointsIterator::pointer
CP32 * pointer
C++ iterator boilerplate.
Definition:utfiterator.h:256
icu::operator==
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
icu::operator!=
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition:stringpiece.h:346
U_CPLUSPLUS_VERSION
#define U_CPLUSPLUS_VERSION
0 if no C++; 1, 11, 14, ...
Definition:platform.h:464
U_HEADER_ONLY_NAMESPACE::UTFStringCodePointsAdaptor
Definition:utfiterator.h:1891
U_HEADER_ONLY_NAMESPACE::UTFStringCodePointsAdaptor::operator()
auto operator()(Range &&unitRange) const
Definition:utfiterator.h:1894
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePointsAdaptor
Definition:utfiterator.h:2626
U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePointsAdaptor::operator()
auto operator()(Range &&unitRange) const
Definition:utfiterator.h:2629
U_HEADER_ONLY_NAMESPACE::prv::is_basic_string_view
Definition:utfiterator.h:237
U_HEADER_ONLY_NAMESPACE::prv::range_type
Definition:utfiterator.h:221
U_SENTINEL
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
Definition:umachine.h:469
U_FORCE_INLINE
#define U_FORCE_INLINE
Forces function inlining on compilers that are known to support it.
Definition:umachine.h:135
utf16.h
C API: 16-bit Unicode handling macros.
U16_IS_SURROGATE_TRAIL
#define U16_IS_SURROGATE_TRAIL(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a trail surrogate?
Definition:utf16.h:93
U16_IS_SURROGATE_LEAD
#define U16_IS_SURROGATE_LEAD(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a lead surrogate?
Definition:utf16.h:84
U16_GET_SUPPLEMENTARY
#define U16_GET_SUPPLEMENTARY(lead, trail)
Get a supplementary code point value (U+10000..U+10ffff) from its lead and trail surrogates.
Definition:utf16.h:112
U16_IS_SURROGATE
#define U16_IS_SURROGATE(c)
Is this code unit a surrogate (U+d800..U+dfff)?
Definition:utf16.h:75
U16_IS_LEAD
#define U16_IS_LEAD(c)
Is this code unit a lead surrogate (U+d800..U+dbff)?
Definition:utf16.h:59
U16_IS_TRAIL
#define U16_IS_TRAIL(c)
Is this code unit a trail surrogate (U+dc00..U+dfff)?
Definition:utf16.h:67
utf8.h
C API: 8-bit Unicode handling macros.
U8_COUNT_TRAIL_BYTES_UNSAFE
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte)
Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
Definition:utf8.h:71
U8_IS_VALID_LEAD3_AND_T1
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1)
Internal 3-byte UTF-8 validity check.
Definition:utf8.h:98
U8_IS_VALID_LEAD4_AND_T1
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1)
Internal 4-byte UTF-8 validity check.
Definition:utf8.h:115
U8_IS_SINGLE
#define U8_IS_SINGLE(c)
Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
Definition:utf8.h:173
U8_LEAD3_T1_BITS
#define U8_LEAD3_T1_BITS
Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
Definition:utf8.h:91
U8_LEAD4_T1_BITS
#define U8_LEAD4_T1_BITS
Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
Definition:utf8.h:108
U8_IS_LEAD
#define U8_IS_LEAD(c)
Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
Definition:utf8.h:181
U8_MASK_LEAD_BYTE
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes)
Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
Definition:utf8.h:81
U8_IS_TRAIL
#define U8_IS_TRAIL(c)
Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
Definition:utf8.h:190
U_HEADER_ONLY_NAMESPACE::unsafeUTFIterator
auto unsafeUTFIterator(UnitIter iter)
UnsafeUTFIterator factory function.
Definition:utfiterator.h:2486
U_HEADER_ONLY_NAMESPACE::prv::iter_difference_t
typename std::iterator_traits< Iter >::difference_type iter_difference_t
Definition:utfiterator.h:203
U_HEADER_ONLY_NAMESPACE::prv::is_basic_string_view_v
constexpr bool is_basic_string_view_v
Definition:utfiterator.h:244
U_HEADER_ONLY_NAMESPACE::prv::forward_iterator
constexpr bool forward_iterator
Definition:utfiterator.h:207
U_HEADER_ONLY_NAMESPACE::utfIterator
auto utfIterator(UnitIter start, UnitIter p, LimitIter limit)
UTFIterator factory function for start <= p < limit.
Definition:utfiterator.h:1688
U_HEADER_ONLY_NAMESPACE::utfStringCodePoints
constexpr UTFStringCodePointsAdaptor< CP32, behavior > utfStringCodePoints
Range adaptor function object returning a UTFStringCodePoints object that represents a "range" of cod...
Definition:utfiterator.h:1926
U_HEADER_ONLY_NAMESPACE::prv::iter_value_t
typename std::iterator_traits< Iter >::value_type iter_value_t
Definition:utfiterator.h:199
U_HEADER_ONLY_NAMESPACE::prv::bidirectional_iterator
constexpr bool bidirectional_iterator
Definition:utfiterator.h:214
U_HEADER_ONLY_NAMESPACE::unsafeUTFStringCodePoints
constexpr UnsafeUTFStringCodePointsAdaptor< CP32 > unsafeUTFStringCodePoints
Range adaptor function object returning an UnsafeUTFStringCodePoints object that represents a "range"...
Definition:utfiterator.h:2658
UTFIllFormedBehavior
UTFIllFormedBehavior
Some defined behaviors for handling ill-formed Unicode strings.
Definition:utfiterator.h:149
UTF_BEHAVIOR_FFFD
@ UTF_BEHAVIOR_FFFD
Returns U+FFFD Replacement Character.
Definition:utfiterator.h:159
UTF_BEHAVIOR_SURROGATE
@ UTF_BEHAVIOR_SURROGATE
UTF-8: Not allowed; UTF-16: returns the unpaired surrogate; UTF-32: returns the surrogate code point,...
Definition:utfiterator.h:167
UTF_BEHAVIOR_NEGATIVE
@ UTF_BEHAVIOR_NEGATIVE
Returns a negative value (-1=U_SENTINEL) instead of a code point.
Definition:utfiterator.h:157
U_HEADER_ONLY_NAMESPACE::prv::range
constexpr bool range
Definition:utfiterator.h:232
utypes.h
Basic definitions for ICU, for both C and C++ APIs.
uversion.h
C API: API for accessing ICU version numbers.

Generated by doxygen 1.9.1
[8]ページ先頭

©2009-2025 Movatter.jp