1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 4 ******************************************************************************* 6 * Copyright (C) 2002-2012, International Business Machines 7 * Corporation and others. All Rights Reserved. 9 ******************************************************************************* 10 * file name: utf_old.h 12 * tab size: 8 (not used) 15 * created on: 2002sep21 16 * created by: Markus W. Scherer 142 #ifndef __UTF_OLD_H__ 143 #define __UTF_OLD_H__ 160 #ifndef U_HIDE_OBSOLETE_UTF_OLD_H 161 # define U_HIDE_OBSOLETE_UTF_OLD_H 0 164 #if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H 166 /* Formerly utf.h, part 1 --------------------------------------------------- */ 168 #ifdef U_USE_UTF_DEPRECATES 176 typedef int32_t UTextOffset;
208 #define UTF8_ERROR_VALUE_1 0x15 215 #define UTF8_ERROR_VALUE_2 0x9f 223 #define UTF_ERROR_VALUE 0xffff 231 #define UTF_IS_ERROR(c) \ 232 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 239 #define UTF_IS_VALID(c) \ 240 (UTF_IS_UNICODE_CHAR(c) && \ 241 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 247 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 254 #define UTF_IS_UNICODE_NONCHAR(c) \ 256 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 257 (uint32_t)(c)<=0x10ffff) 274 #define UTF_IS_UNICODE_CHAR(c) \ 275 ((uint32_t)(c)<0xd800 || \ 276 ((uint32_t)(c)>0xdfff && \ 277 (uint32_t)(c)<=0x10ffff && \ 278 !UTF_IS_UNICODE_NONCHAR(c))) 280 /* Formerly utf8.h ---------------------------------------------------------- */ 294 // No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes. 295 #elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) 305 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 311 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 314 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 316 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 318 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 321 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 337 # define UTF8_CHAR_LENGTH(c) \ 338 ((uint32_t)(c)<=0x7f ? 1 : \ 339 ((uint32_t)(c)<=0x7ff ? 2 : \ 340 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 344 # define UTF8_CHAR_LENGTH(c) \ 345 ((uint32_t)(c)<=0x7f ? 1 : \ 346 ((uint32_t)(c)<=0x7ff ? 2 : \ 347 ((uint32_t)(c)<=0xffff ? 3 : \ 348 ((uint32_t)(c)<=0x10ffff ? 4 : \ 349 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 350 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 359 #define UTF8_MAX_CHAR_LENGTH 4 362 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 365 #define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 366 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 367 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 368 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 369 } UPRV_BLOCK_MACRO_END 372 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 373 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 374 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 375 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 376 } UPRV_BLOCK_MACRO_END 379 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 381 if((uint8_t)((c)-0xc0)<0x35) { \ 382 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 383 UTF8_MASK_LEAD_BYTE(c, __count); \ 385 /* each following branch falls through to the next one */ \ 387 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 390 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 393 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 394 /* no other branches to optimize switch() */ \ 398 } UPRV_BLOCK_MACRO_END 401 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 402 if((uint32_t)(c)<=0x7f) { \ 403 (s)[(i)++]=(uint8_t)(c); \ 405 if((uint32_t)(c)<=0x7ff) { \ 406 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 408 if((uint32_t)(c)<=0xffff) { \ 409 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 411 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 412 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 414 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 416 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 418 } UPRV_BLOCK_MACRO_END 421 #define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 422 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 423 } UPRV_BLOCK_MACRO_END 426 #define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 429 UTF8_FWD_1_UNSAFE(s, i); \ 432 } UPRV_BLOCK_MACRO_END 435 #define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 436 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 437 } UPRV_BLOCK_MACRO_END 440 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 443 if(UTF8_IS_LEAD(c)) { \ 444 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 446 (c)=UTF8_ERROR_VALUE_1; \ 449 } UPRV_BLOCK_MACRO_END 452 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 453 if((uint32_t)(c)<=0x7f) { \ 454 (s)[(i)++]=(uint8_t)(c); \ 456 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 458 } UPRV_BLOCK_MACRO_END 461 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 464 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 467 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 470 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 472 if(UTF8_IS_TRAIL(c)) { \ 473 uint8_t __b, __count=1, __shift=6; \ 475 /* c is a trail byte */ \ 480 UTF8_MASK_LEAD_BYTE(__b, __count); \ 481 (c)|=(UChar32)__b<<__shift; \ 484 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 490 } UPRV_BLOCK_MACRO_END 493 #define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 494 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 495 } UPRV_BLOCK_MACRO_END 498 #define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 501 UTF8_BACK_1_UNSAFE(s, i); \ 504 } UPRV_BLOCK_MACRO_END 507 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 508 UTF8_BACK_1_UNSAFE(s, i); \ 509 UTF8_FWD_1_UNSAFE(s, i); \ 510 } UPRV_BLOCK_MACRO_END 513 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 517 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 519 (c)=UTF8_ERROR_VALUE_1; \ 522 } UPRV_BLOCK_MACRO_END 525 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 528 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 531 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 533 /* Formerly utf16.h --------------------------------------------------------- */ 536 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 539 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 542 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 545 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 548 #define UTF16_GET_PAIR_VALUE(first, second) \ 549 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 552 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 555 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 558 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 561 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 564 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 567 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 570 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 573 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 576 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 579 #define UTF16_MAX_CHAR_LENGTH 2 582 #define UTF16_ARRAY_SIZE(size) (size) 595 #define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 597 if(UTF_IS_SURROGATE(c)) { \ 598 if(UTF_IS_SURROGATE_FIRST(c)) { \ 599 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 601 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 604 } UPRV_BLOCK_MACRO_END 607 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 609 if(UTF_IS_SURROGATE(c)) { \ 611 if(UTF_IS_SURROGATE_FIRST(c)) { \ 612 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 613 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 614 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 616 /* unmatched first surrogate */ \ 617 (c)=UTF_ERROR_VALUE; \ 620 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 621 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 622 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 624 /* unmatched second surrogate */ \ 625 (c)=UTF_ERROR_VALUE; \ 628 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 629 (c)=UTF_ERROR_VALUE; \ 631 } UPRV_BLOCK_MACRO_END 634 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 636 if(UTF_IS_FIRST_SURROGATE(c)) { \ 637 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 639 } UPRV_BLOCK_MACRO_END 642 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 643 if((uint32_t)(c)<=0xffff) { \ 644 (s)[(i)++]=(uint16_t)(c); \ 646 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 647 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 649 } UPRV_BLOCK_MACRO_END 652 #define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 653 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 656 } UPRV_BLOCK_MACRO_END 659 #define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 662 UTF16_FWD_1_UNSAFE(s, i); \ 665 } UPRV_BLOCK_MACRO_END 668 #define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 669 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 672 } UPRV_BLOCK_MACRO_END 675 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 677 if(UTF_IS_FIRST_SURROGATE(c)) { \ 679 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 681 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 682 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 684 /* unmatched first surrogate */ \ 685 (c)=UTF_ERROR_VALUE; \ 687 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 688 /* unmatched second surrogate or other non-character */ \ 689 (c)=UTF_ERROR_VALUE; \ 691 } UPRV_BLOCK_MACRO_END 694 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 695 if((uint32_t)(c)<=0xffff) { \ 696 (s)[(i)++]=(uint16_t)(c); \ 697 } else if((uint32_t)(c)<=0x10ffff) { \ 698 if((i)+1<(length)) { \ 699 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 700 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 701 } else/* not enough space */ { \ 702 (s)[(i)++]=UTF_ERROR_VALUE; \ 704 } else/* c>0x10ffff, write error value */ { \ 705 (s)[(i)++]=UTF_ERROR_VALUE; \ 707 } UPRV_BLOCK_MACRO_END 710 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 713 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 716 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 719 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 721 if(UTF_IS_SECOND_SURROGATE(c)) { \ 722 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 724 } UPRV_BLOCK_MACRO_END 727 #define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 728 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 731 } UPRV_BLOCK_MACRO_END 734 #define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 737 UTF16_BACK_1_UNSAFE(s, i); \ 740 } UPRV_BLOCK_MACRO_END 743 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 744 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 747 } UPRV_BLOCK_MACRO_END 750 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 752 if(UTF_IS_SECOND_SURROGATE(c)) { \ 754 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 756 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 757 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 759 /* unmatched second surrogate */ \ 760 (c)=UTF_ERROR_VALUE; \ 762 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 763 /* unmatched first surrogate or other non-character */ \ 764 (c)=UTF_ERROR_VALUE; \ 766 } UPRV_BLOCK_MACRO_END 769 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 772 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 775 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 777 /* Formerly utf32.h --------------------------------------------------------- */ 782 * This file defines macros to deal with UTF-32 code units and code points. 783 * Signatures and semantics are the same as for the similarly named macros 785 * utf32.h is included by utf.h after unicode/umachine.h</p> 786 * and some common definitions. 787 * <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. 788 * Compound statements (curly braces {}) must be used for if-else-while... 789 * bodies and all macro statements should be terminated with semicolon.</p> 792 /* internal definitions ----------------------------------------------------- */ 795 #define UTF32_IS_SAFE(c, strict) \ 797 (uint32_t)(c)<=0x10ffff : \ 798 UTF_IS_UNICODE_CHAR(c)) 801 * For the semantics of all of these macros, see utf16.h. 802 * The UTF-32 versions are trivial because any code point is 803 * encoded using exactly one code unit. 806 /* single-code point definitions -------------------------------------------- */ 808 /* classes of code unit values */ 811 #define UTF32_IS_SINGLE(uchar) 1 813 #define UTF32_IS_LEAD(uchar) 0 815 #define UTF32_IS_TRAIL(uchar) 0 817 /* number of code units per code point */ 820 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 822 #define UTF32_CHAR_LENGTH(c) 1 824 #define UTF32_MAX_CHAR_LENGTH 1 826 /* average number of code units compared to UTF-16 */ 829 #define UTF32_ARRAY_SIZE(size) (size) 832 #define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 834 } UPRV_BLOCK_MACRO_END 837 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 839 if(!UTF32_IS_SAFE(c, strict)) { \ 840 (c)=UTF_ERROR_VALUE; \ 842 } UPRV_BLOCK_MACRO_END 844 /* definitions with forward iteration --------------------------------------- */ 847 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 849 } UPRV_BLOCK_MACRO_END 852 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 854 } UPRV_BLOCK_MACRO_END 857 #define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 859 } UPRV_BLOCK_MACRO_END 862 #define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 864 } UPRV_BLOCK_MACRO_END 867 #define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 868 } UPRV_BLOCK_MACRO_END 871 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 873 if(!UTF32_IS_SAFE(c, strict)) { \ 874 (c)=UTF_ERROR_VALUE; \ 876 } UPRV_BLOCK_MACRO_END 879 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 880 if((uint32_t)(c)<=0x10ffff) { \ 882 } else/* c>0x10ffff, write 0xfffd */ { \ 885 } UPRV_BLOCK_MACRO_END 888 #define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 890 } UPRV_BLOCK_MACRO_END 893 #define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ 894 if(((i)+=(n))>(length)) { \ 897 } UPRV_BLOCK_MACRO_END 900 #define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 901 } UPRV_BLOCK_MACRO_END 903 /* definitions with backward iteration -------------------------------------- */ 906 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 908 } UPRV_BLOCK_MACRO_END 911 #define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 913 } UPRV_BLOCK_MACRO_END 916 #define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 918 } UPRV_BLOCK_MACRO_END 921 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 922 } UPRV_BLOCK_MACRO_END 925 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ 927 if(!UTF32_IS_SAFE(c, strict)) { \ 928 (c)=UTF_ERROR_VALUE; \ 930 } UPRV_BLOCK_MACRO_END 933 #define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 935 } UPRV_BLOCK_MACRO_END 938 #define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 943 } UPRV_BLOCK_MACRO_END 946 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 947 } UPRV_BLOCK_MACRO_END 949 /* Formerly utf.h, part 2 --------------------------------------------------- */ 956 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 959 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 962 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 966 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 969 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 973 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 976 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 980 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 983 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 987 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 990 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 994 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 997 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 1001 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 1004 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 1008 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 1011 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 1015 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 1018 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 1022 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 1025 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 1027 /* Define default macros (UTF-16 "safe") ------------------------------------ */ 1034 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 1041 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 1048 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 1055 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 1062 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 1069 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 1080 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 1093 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 1106 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 1117 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 1128 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 1144 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 1157 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 1170 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 1183 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 1199 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 1201 #endif// !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H #define U_CAPI
This is used to declare a function as a public ICU C API.
#define U_CFUNC
This is used in a declaration of a library private ICU C function.
C API: 16-bit Unicode handling macros.
C API: 8-bit Unicode handling macros.
C API: Code point macros.
U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]
Internal array with numbers of trail bytes for any given byte used in lead byte position.