1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 4 ********************************************************************** 5 * Copyright (C) 1997-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 11 * Modification History: 13 * Date Name Description 14 * 04/02/97 aliu Creation. 15 * 03/29/99 helena Updated for C APIs. 16 * 4/15/99 Madhu Updated for C Implementation and Javadoc 17 * 5/20/99 Madhu Added the function u_getVersion() 18 * 8/19/1999 srl Upgraded scripts to Unicode 3.0 19 * 8/27/1999 schererm UCharDirection constants: U_... 20 * 11/11/1999 weiv added u_isalnum(), cleaned comments 21 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). 22 ****************************************************************************** 33 #if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) 52 /*==========================================================================*/ 53 /* Unicode version number */ 54 /*==========================================================================*/ 64 #define U_UNICODE_VERSION "16.0" 158 #define UCHAR_MIN_VALUE 0 168 #define UCHAR_MAX_VALUE 0x10ffff 174 #define U_MASK(x) ((uint32_t)1<<(x)) 198 * Note: UProperty constants are parsed by preparseucd.py. 199 * It matches lines like 200 * UCHAR_<Unicode property name>=<integer>, 203 /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that 204 debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, 205 rather than UCHAR_BINARY_START. Likewise for other *_START 555 #ifndef U_HIDE_DRAFT_API 562 #endif// U_HIDE_DRAFT_API 563 #ifndef U_HIDE_DEPRECATED_API 569 #endif// U_HIDE_DEPRECATED_API 685 #ifndef U_HIDE_DRAFT_API 692 #endif// U_HIDE_DRAFT_API 693 #ifndef U_HIDE_DEPRECATED_API 699 #endif// U_HIDE_DEPRECATED_API 712 #ifndef U_HIDE_DEPRECATED_API 718 #endif// U_HIDE_DEPRECATED_API 725 #ifndef U_HIDE_DEPRECATED_API 731 #endif// U_HIDE_DEPRECATED_API 744 #ifndef U_HIDE_DEPRECATED_API 748 #endif/* U_HIDE_DEPRECATED_API */ 770 #ifndef U_HIDE_DEPRECATED_API 776 #endif/* U_HIDE_DEPRECATED_API */ 783 #ifndef U_HIDE_DEPRECATED_API 789 #endif// U_HIDE_DEPRECATED_API 813 #ifndef U_HIDE_DEPRECATED_API 819 #endif// U_HIDE_DEPRECATED_API 833 * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. 834 * It matches pairs of lines like 835 * / ** <Unicode 2-letter General_Category value> comment... * / 836 * U_<[A-Z_]+> = <integer>, 925 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) 928 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) 930 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) 932 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) 934 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) 936 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) 939 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) 941 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) 943 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) 946 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) 948 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) 950 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) 953 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) 955 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) 957 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) 960 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) 962 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) 964 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) 966 #define U_GC_CS_MASK U_MASK(U_SURROGATE) 969 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) 971 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) 973 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) 975 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) 977 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) 980 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) 982 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) 984 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) 986 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) 989 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) 991 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) 995 #define U_GC_L_MASK \ 996 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) 999 #define U_GC_LC_MASK \ 1000 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) 1003 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) 1006 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) 1009 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) 1012 #define U_GC_C_MASK \ 1013 (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) 1016 #define U_GC_P_MASK \ 1017 (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ 1018 U_GC_PI_MASK|U_GC_PF_MASK) 1021 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) 1029 * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. 1030 * It matches pairs of lines like 1031 * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / 1032 * U_<[A-Z_]+> = <integer>, 1081 #ifndef U_HIDE_DEPRECATED_API 1089 #endif// U_HIDE_DEPRECATED_API 1100 * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. 1101 * It matches lines like 1102 * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> 1111 #ifndef U_HIDE_DEPRECATED_API 1119 #endif// U_HIDE_DEPRECATED_API 1128 * Note: UBlockCode constants are parsed by preparseucd.py. 1129 * It matches lines like 1130 * UBLOCK_<Unicode Block value name> = <integer>, 1421 /* New blocks in Unicode 3.1 */ 1442 /* New blocks in Unicode 3.2 */ 1478 /* New blocks in Unicode 4 */ 1511 /* New blocks in Unicode 4.1 */ 1554 /* New blocks in Unicode 5.0 */ 1575 /* New blocks in Unicode 5.1 */ 1612 /* New blocks in Unicode 5.2 */ 1667 /* New blocks in Unicode 6.0 */ 1694 /* New blocks in Unicode 6.1 */ 1719 /* New blocks in Unicode 7.0 */ 1786 /* New blocks in Unicode 8.0 */ 1809 /* New blocks in Unicode 9.0 */ 1834 // New blocks in Unicode 10.0 1851 // New blocks in Unicode 11.0 1876 // New blocks in Unicode 12.0 1897 // New blocks in Unicode 13.0 1916 // New blocks in Unicode 14.0 1943 // New blocks in Unicode 15.0 1960 // New block in Unicode 15.1 1965 // New blocks in Unicode 16.0 1988 #ifndef U_HIDE_DEPRECATED_API 1996 #endif// U_HIDE_DEPRECATED_API 2014 * Note: UEastAsianWidth constants are parsed by preparseucd.py. 2015 * It matches lines like 2016 * U_EA_<Unicode East_Asian_Width value name> 2019 U_EA_NEUTRAL,
/*[N]*/ 2020 U_EA_AMBIGUOUS,
/*[A]*/ 2021 U_EA_HALFWIDTH,
/*[H]*/ 2022 U_EA_FULLWIDTH,
/*[F]*/ 2023 U_EA_NARROW,
/*[Na]*/ 2025 #ifndef U_HIDE_DEPRECATED_API 2033 #endif// U_HIDE_DEPRECATED_API 2050 #ifndef U_HIDE_DEPRECATED_API 2057 #endif/* U_HIDE_DEPRECATED_API */ 2062 #ifndef U_HIDE_DEPRECATED_API 2068 #endif// U_HIDE_DEPRECATED_API 2085 U_SHORT_PROPERTY_NAME,
2086 U_LONG_PROPERTY_NAME,
2087 #ifndef U_HIDE_DEPRECATED_API 2093 #endif// U_HIDE_DEPRECATED_API 2104 * Note: UDecompositionType constants are parsed by preparseucd.py. 2105 * It matches lines like 2106 * U_DT_<Unicode Decomposition_Type value name> 2109 U_DT_NONE,
/*[none]*/ 2110 U_DT_CANONICAL,
/*[can]*/ 2111 U_DT_COMPAT,
/*[com]*/ 2112 U_DT_CIRCLE,
/*[enc]*/ 2113 U_DT_FINAL,
/*[fin]*/ 2114 U_DT_FONT,
/*[font]*/ 2115 U_DT_FRACTION,
/*[fra]*/ 2116 U_DT_INITIAL,
/*[init]*/ 2117 U_DT_ISOLATED,
/*[iso]*/ 2118 U_DT_MEDIAL,
/*[med]*/ 2119 U_DT_NARROW,
/*[nar]*/ 2120 U_DT_NOBREAK,
/*[nb]*/ 2121 U_DT_SMALL,
/*[sml]*/ 2122 U_DT_SQUARE,
/*[sqr]*/ 2124 U_DT_SUPER,
/*[sup]*/ 2125 U_DT_VERTICAL,
/*[vert]*/ 2126 U_DT_WIDE,
/*[wide]*/ 2127 #ifndef U_HIDE_DEPRECATED_API 2135 #endif// U_HIDE_DEPRECATED_API 2146 * Note: UJoiningType constants are parsed by preparseucd.py. 2147 * It matches lines like 2148 * U_JT_<Unicode Joining_Type value name> 2151 U_JT_NON_JOINING,
/*[U]*/ 2152 U_JT_JOIN_CAUSING,
/*[C]*/ 2153 U_JT_DUAL_JOINING,
/*[D]*/ 2154 U_JT_LEFT_JOINING,
/*[L]*/ 2155 U_JT_RIGHT_JOINING,
/*[R]*/ 2156 U_JT_TRANSPARENT,
/*[T]*/ 2157 #ifndef U_HIDE_DEPRECATED_API 2165 #endif// U_HIDE_DEPRECATED_API 2176 * Note: UJoiningGroup constants are parsed by preparseucd.py. 2177 * It matches lines like 2178 * U_JG_<Unicode Joining_Group value name> 2181 U_JG_NO_JOINING_GROUP,
2292 #ifndef U_HIDE_DEPRECATED_API 2300 #endif// U_HIDE_DEPRECATED_API 2311 * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. 2312 * It matches lines like 2313 * U_GCB_<Unicode Grapheme_Cluster_Break value name> 2316 U_GCB_OTHER = 0,
/*[XX]*/ 2317 U_GCB_CONTROL = 1,
/*[CN]*/ 2318 U_GCB_CR = 2,
/*[CR]*/ 2319 U_GCB_EXTEND = 3,
/*[EX]*/ 2321 U_GCB_LF = 5,
/*[LF]*/ 2322 U_GCB_LV = 6,
/*[LV]*/ 2323 U_GCB_LVT = 7,
/*[LVT]*/ 2343 #ifndef U_HIDE_DEPRECATED_API 2351 #endif// U_HIDE_DEPRECATED_API 2363 * Note: UWordBreakValues constants are parsed by preparseucd.py. 2364 * It matches lines like 2365 * U_WB_<Unicode Word_Break value name> 2368 U_WB_OTHER = 0,
/*[XX]*/ 2369 U_WB_ALETTER = 1,
/*[LE]*/ 2370 U_WB_FORMAT = 2,
/*[FO]*/ 2371 U_WB_KATAKANA = 3,
/*[KA]*/ 2372 U_WB_MIDLETTER = 4,
/*[ML]*/ 2373 U_WB_MIDNUM = 5,
/*[MN]*/ 2374 U_WB_NUMERIC = 6,
/*[NU]*/ 2375 U_WB_EXTENDNUMLET = 7,
/*[EX]*/ 2377 U_WB_CR = 8,
/*[CR]*//* from here on: new in Unicode 5.1/ICU 4.0 */ 2395 U_WB_E_BASE = 17,
/*[EB]*//* from here on: new in Unicode 9.0/ICU 58 */ 2407 #ifndef U_HIDE_DEPRECATED_API 2415 #endif// U_HIDE_DEPRECATED_API 2426 * Note: USentenceBreak constants are parsed by preparseucd.py. 2427 * It matches lines like 2428 * U_SB_<Unicode Sentence_Break value name> 2431 U_SB_OTHER = 0,
/*[XX]*/ 2432 U_SB_ATERM = 1,
/*[AT]*/ 2433 U_SB_CLOSE = 2,
/*[CL]*/ 2434 U_SB_FORMAT = 3,
/*[FO]*/ 2435 U_SB_LOWER = 4,
/*[LO]*/ 2436 U_SB_NUMERIC = 5,
/*[NU]*/ 2437 U_SB_OLETTER = 6,
/*[LE]*/ 2438 U_SB_SEP = 7,
/*[SE]*/ 2439 U_SB_SP = 8,
/*[SP]*/ 2440 U_SB_STERM = 9,
/*[ST]*/ 2441 U_SB_UPPER = 10,
/*[UP]*/ 2442 U_SB_CR = 11,
/*[CR]*//* from here on: new in Unicode 5.1/ICU 4.0 */ 2443 U_SB_EXTEND = 12,
/*[EX]*/ 2444 U_SB_LF = 13,
/*[LF]*/ 2445 U_SB_SCONTINUE = 14,
/*[SC]*/ 2446 #ifndef U_HIDE_DEPRECATED_API 2454 #endif// U_HIDE_DEPRECATED_API 2465 * Note: ULineBreak constants are parsed by preparseucd.py. 2466 * It matches lines like 2467 * U_LB_<Unicode Line_Break value name> 2470 U_LB_UNKNOWN = 0,
/*[XX]*/ 2471 U_LB_AMBIGUOUS = 1,
/*[AI]*/ 2472 U_LB_ALPHABETIC = 2,
/*[AL]*/ 2473 U_LB_BREAK_BOTH = 3,
/*[B2]*/ 2474 U_LB_BREAK_AFTER = 4,
/*[BA]*/ 2475 U_LB_BREAK_BEFORE = 5,
/*[BB]*/ 2476 U_LB_MANDATORY_BREAK = 6,
/*[BK]*/ 2477 U_LB_CONTINGENT_BREAK = 7,
/*[CB]*/ 2478 U_LB_CLOSE_PUNCTUATION = 8,
/*[CL]*/ 2479 U_LB_COMBINING_MARK = 9,
/*[CM]*/ 2480 U_LB_CARRIAGE_RETURN = 10,
/*[CR]*/ 2481 U_LB_EXCLAMATION = 11,
/*[EX]*/ 2482 U_LB_GLUE = 12,
/*[GL]*/ 2483 U_LB_HYPHEN = 13,
/*[HY]*/ 2484 U_LB_IDEOGRAPHIC = 14,
/*[ID]*/ 2488 U_LB_INFIX_NUMERIC = 16,
/*[IS]*/ 2489 U_LB_LINE_FEED = 17,
/*[LF]*/ 2490 U_LB_NONSTARTER = 18,
/*[NS]*/ 2491 U_LB_NUMERIC = 19,
/*[NU]*/ 2492 U_LB_OPEN_PUNCTUATION = 20,
/*[OP]*/ 2493 U_LB_POSTFIX_NUMERIC = 21,
/*[PO]*/ 2494 U_LB_PREFIX_NUMERIC = 22,
/*[PR]*/ 2495 U_LB_QUOTATION = 23,
/*[QU]*/ 2496 U_LB_COMPLEX_CONTEXT = 24,
/*[SA]*/ 2497 U_LB_SURROGATE = 25,
/*[SG]*/ 2498 U_LB_SPACE = 26,
/*[SP]*/ 2499 U_LB_BREAK_SYMBOLS = 27,
/*[SY]*/ 2500 U_LB_ZWSPACE = 28,
/*[ZW]*/ 2506 U_LB_H2 = 31,
/*[H2]*//* from here on: new in Unicode 4.1/ICU 3.4 */ 2524 U_LB_E_BASE = 40,
/*[EB]*//* from here on: new in Unicode 9.0/ICU 58 */ 2539 #ifndef U_HIDE_DEPRECATED_API 2547 #endif// U_HIDE_DEPRECATED_API 2558 * Note: UNumericType constants are parsed by preparseucd.py. 2559 * It matches lines like 2560 * U_NT_<Unicode Numeric_Type value name> 2563 U_NT_NONE,
/*[None]*/ 2564 U_NT_DECIMAL,
/*[de]*/ 2566 U_NT_NUMERIC,
/*[nu]*/ 2567 #ifndef U_HIDE_DEPRECATED_API 2575 #endif// U_HIDE_DEPRECATED_API 2586 * Note: UHangulSyllableType constants are parsed by preparseucd.py. 2587 * It matches lines like 2588 * U_HST_<Unicode Hangul_Syllable_Type value name> 2591 U_HST_NOT_APPLICABLE,
/*[NA]*/ 2592 U_HST_LEADING_JAMO,
/*[L]*/ 2593 U_HST_VOWEL_JAMO,
/*[V]*/ 2594 U_HST_TRAILING_JAMO,
/*[T]*/ 2595 U_HST_LV_SYLLABLE,
/*[LV]*/ 2596 U_HST_LVT_SYLLABLE,
/*[LVT]*/ 2597 #ifndef U_HIDE_DEPRECATED_API 2605 #endif// U_HIDE_DEPRECATED_API 2616 * Note: UIndicPositionalCategory constants are parsed by preparseucd.py. 2617 * It matches lines like 2618 * U_INPC_<Unicode Indic_Positional_Category value name> 2663 * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py. 2664 * It matches lines like 2665 * U_INSC_<Unicode Indic_Syllabic_Category value name> 2744 #ifndef U_HIDE_DRAFT_API 2753 * Note: UIndicConjunctBreak constants are parsed by preparseucd.py. 2754 * It matches lines like 2755 * U_INCB_<Unicode Indic_Conjunct_Break value name> 2767 #endif// U_HIDE_DRAFT_API 2777 * Note: UVerticalOrientation constants are parsed by preparseucd.py. 2778 * It matches lines like 2779 * U_VO_<Unicode Vertical_Orientation value name> 2801 * Note: UIdentifierStatus constants are parsed by preparseucd.py. 2802 * It matches lines like 2803 * U_ID_STATUS_<Unicode Identifier_Status value name> 2821 * Note: UIdentifierType constants are parsed by preparseucd.py. 2822 * It matches lines like 2823 * U_ID_TYPE_<Unicode Identifier_Type value name> 3134 #define U_NO_NUMERIC_VALUE ((double)-123456789.) 3644 #define U_GET_GC_MASK(c) U_MASK(u_charType(c)) 3688 #if !UCONFIG_NO_NORMALIZATION 3774 char *buffer, int32_t bufferLength,
3777 #ifndef U_HIDE_DEPRECATED_API 3798 char *dest, int32_t destCapacity,
3800 #endif/* U_HIDE_DEPRECATED_API */ 3909 U_CAPIconstchar* U_EXPORT2
3982 U_CAPIconstchar* U_EXPORT2
4373 #if !UCONFIG_NO_NORMALIZATION #define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category "Lu" (uppercase letter).
U_CAPI UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
@ UBLOCK_COUNT
One more than the highest normal UBlockCode value.
@ UBLOCK_OPTICAL_CHARACTER_RECOGNITION
@ UBLOCK_LATIN_EXTENDED_G
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
@ UBLOCK_INSCRIPTIONAL_PAHLAVI
@ UBLOCK_YIJING_HEXAGRAM_SYMBOLS
@ UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS
@ UBLOCK_ARABIC_EXTENDED_B
@ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
@ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION
@ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT
@ UBLOCK_CURRENCY_SYMBOLS
@ UBLOCK_MISCELLANEOUS_SYMBOLS
@ UBLOCK_ANCIENT_GREEK_NUMBERS
@ UBLOCK_SUTTON_SIGNWRITING
@ UBLOCK_ANATOLIAN_HIEROGLYPHS
@ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING
@ UBLOCK_SUPPLEMENTAL_PUNCTUATION
@ UBLOCK_MEROITIC_HIEROGLYPHS
@ UBLOCK_INDIC_SIYAQ_NUMBERS
@ UBLOCK_KAKTOVIK_NUMERALS
@ UBLOCK_COPTIC_EPACT_NUMBERS
@ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS
@ UBLOCK_TAMIL_SUPPLEMENT
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
@ UBLOCK_NYIAKENG_PUACHUE_HMONG
@ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT
@ UBLOCK_ETHIOPIC_EXTENDED_B
@ UBLOCK_TRANSPORT_AND_MAP_SYMBOLS
@ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
@ UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A
@ UBLOCK_PRIVATE_USE_AREA
Same as UBLOCK_PRIVATE_USE.
@ UBLOCK_SINHALA_ARCHAIC_NUMBERS
@ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
@ UBLOCK_KHITAN_SMALL_SCRIPT
@ UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT
@ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION
@ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT
@ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT
@ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED
@ UBLOCK_EGYPTIAN_HIEROGLYPHS
@ UBLOCK_TANGUT_COMPONENTS
@ UBLOCK_CHEROKEE_SUPPLEMENT
@ UBLOCK_LATIN_EXTENDED_A
@ UBLOCK_SUNDANESE_SUPPLEMENT
@ UBLOCK_CYRILLIC_EXTENDED_A
@ UBLOCK_HANGUL_JAMO_EXTENDED_B
@ UBLOCK_SUPPLEMENTAL_ARROWS_A
@ UBLOCK_DEVANAGARI_EXTENDED_A
@ UBLOCK_INSCRIPTIONAL_PARTHIAN
@ UBLOCK_ARABIC_EXTENDED_A
@ UBLOCK_CYRILLIC_EXTENDED_C
@ UBLOCK_OTTOMAN_SIYAQ_NUMBERS
@ UBLOCK_LINEAR_B_SYLLABARY
@ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS
@ UBLOCK_SPACING_MODIFIER_LETTERS
@ UBLOCK_MEROITIC_CURSIVE
@ UBLOCK_CYRILLIC_EXTENDED_B
@ UBLOCK_GENERAL_PUNCTUATION
@ UBLOCK_MONGOLIAN_SUPPLEMENT
@ UBLOCK_MISCELLANEOUS_TECHNICAL
@ UBLOCK_TAI_XUAN_JING_SYMBOLS
@ UBLOCK_CONTROL_PICTURES
@ UBLOCK_GREEK
Unicode 3.2 renames this block to "Greek and Coptic".
@ UBLOCK_COUNTING_ROD_NUMERALS
@ UBLOCK_LATIN_EXTENDED_E
@ UBLOCK_LINEAR_B_IDEOGRAMS
@ UBLOCK_RUMI_NUMERAL_SYMBOLS
@ UBLOCK_HIGH_PRIVATE_USE_SURROGATES
@ UBLOCK_MEETEI_MAYEK_EXTENSIONS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H
@ UBLOCK_COMMON_INDIC_NUMBER_FORMS
@ UBLOCK_ZANABAZAR_SQUARE
@ UBLOCK_ARABIC_EXTENDED_C
@ UBLOCK_ENCLOSED_ALPHANUMERICS
@ UBLOCK_COMBINING_HALF_MARKS
@ UBLOCK_GLAGOLITIC_SUPPLEMENT
@ UBLOCK_IMPERIAL_ARAMAIC
@ UBLOCK_BRAILLE_PATTERNS
@ UBLOCK_MATHEMATICAL_OPERATORS
@ UBLOCK_NO_BLOCK
New No_Block value in Unicode 4.
@ UBLOCK_TANGUT_SUPPLEMENT
@ UBLOCK_SMALL_FORM_VARIANTS
@ UBLOCK_GEORGIAN_EXTENDED
@ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
@ UBLOCK_LATIN_EXTENDED_D
@ UBLOCK_LATIN_EXTENDED_ADDITIONAL
@ UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
@ UBLOCK_SHORTHAND_FORMAT_CONTROLS
@ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
@ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
@ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS
@ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS
@ UBLOCK_ETHIOPIC_EXTENDED
@ UBLOCK_PRIVATE_USE
Same as UBLOCK_PRIVATE_USE_AREA.
@ UBLOCK_GEORGIAN_SUPPLEMENT
@ UBLOCK_HANGUL_COMPATIBILITY_JAMO
@ UBLOCK_ARABIC_SUPPLEMENT
@ UBLOCK_HANGUL_SYLLABLES
@ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS
@ UBLOCK_CJK_COMPATIBILITY
@ UBLOCK_CJK_RADICALS_SUPPLEMENT
@ UBLOCK_ARABIC_PRESENTATION_FORMS_B
@ UBLOCK_ARABIC_PRESENTATION_FORMS_A
@ UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
@ UBLOCK_BOPOMOFO_EXTENDED
@ UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION
@ UBLOCK_LATIN_EXTENDED_F
@ UBLOCK_CYPRIOT_SYLLABARY
@ UBLOCK_ETHIOPIC_SUPPLEMENT
@ UBLOCK_OLD_SOUTH_ARABIAN
@ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B
@ UBLOCK_BAMUM_SUPPLEMENT
@ UBLOCK_CYRILLIC_SUPPLEMENT
@ UBLOCK_MYANMAR_EXTENDED_B
@ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION
@ UBLOCK_SYRIAC_SUPPLEMENT
@ UBLOCK_ORNAMENTAL_DINGBATS
@ UBLOCK_OLD_NORTH_ARABIAN
@ UBLOCK_DEVANAGARI_EXTENDED
@ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I
@ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS
@ UBLOCK_SUPPLEMENTAL_ARROWS_C
@ UBLOCK_EGYPTIAN_HIEROGLYPHS_EXTENDED_A
@ UBLOCK_MYANMAR_EXTENDED_C
@ UBLOCK_LATIN_EXTENDED_B
@ UBLOCK_MODIFIER_TONE_LETTERS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS
@ UBLOCK_CJK_COMPATIBILITY_FORMS
@ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
@ UBLOCK_VEDIC_EXTENSIONS
@ UBLOCK_CYRILLIC_EXTENDED_D
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
@ UBLOCK_LATIN_1_SUPPLEMENT
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G
@ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
@ UBLOCK_CAUCASIAN_ALBANIAN
@ UBLOCK_ALCHEMICAL_SYMBOLS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F
@ UBLOCK_MYANMAR_EXTENDED_A
@ UBLOCK_EARLY_DYNASTIC_CUNEIFORM
@ UBLOCK_PHONETIC_EXTENSIONS
@ UBLOCK_GEOMETRIC_SHAPES
@ UBLOCK_LATIN_EXTENDED_C
@ UBLOCK_SUPPLEMENTAL_ARROWS_B
@ UBLOCK_ETHIOPIC_EXTENDED_A
@ UBLOCK_GEOMETRIC_SHAPES_EXTENDED
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A
@ UBLOCK_ALPHABETIC_PRESENTATION_FORMS
@ UBLOCK_LETTERLIKE_SYMBOLS
@ UBLOCK_ZNAMENNY_MUSICAL_NOTATION
@ UBLOCK_CYRILLIC_SUPPLEMENTARY
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
@ UBLOCK_HANGUL_JAMO_EXTENDED_A
@ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS
@ UBLOCK_SMALL_KANA_EXTENSION
@ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
@ UBLOCK_COMBINING_DIACRITICAL_MARKS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E
@ UBLOCK_VARIATION_SELECTORS
U_CAPI UBool u_isblank(UChar32 c)
Determines whether the specified code point is a "blank" or "horizontal space", a character that visi...
U_CAPI UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
UCharNameChoice
Selector constants for u_charName().
@ U_CHAR_NAME_CHOICE_COUNT
One more than the highest normal UCharNameChoice value.
@ U_UNICODE_10_CHAR_NAME
The Unicode_1_Name property value which is of little practical value.
@ U_CHAR_NAME_ALIAS
Corrected name from NameAliases.txt.
@ U_EXTENDED_CHAR_NAME
Standard or synthetic character name.
@ U_UNICODE_CHAR_NAME
Unicode character name (Name property).
U_CAPI UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
UIdentifierType
Identifier Type constants.
@ U_ID_TYPE_DEFAULT_IGNORABLE
@ U_ID_TYPE_NOT_CHARACTER
U_CAPI int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
U_CAPI int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
U_CAPI bool u_hasIDType(UChar32 c, UIdentifierType type)
Does the set of Identifier_Type values code point c contain the given type?
U_CAPI UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
U_CAPI UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
U_CAPI double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
U_CAPI UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData....
U_CAPI UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
UJoiningType
Joining Type constants.
@ U_JT_COUNT
One more than the highest normal UJoiningType value.
U_CAPI UBool u_isbase(UChar32 c)
Non-standard: Determines whether the specified code point is a base character.
UIdentifierStatus
Identifier Status constants.
U_CAPI UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
U_CAPI UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
U_CAPI void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the "age" of the code point.
U_CAPI UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
U_CAPI int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
UIndicPositionalCategory
Indic Positional Category constants.
@ U_INPC_VISUAL_ORDER_LEFT
@ U_INPC_TOP_AND_BOTTOM_AND_RIGHT
@ U_INPC_TOP_AND_BOTTOM_AND_LEFT
@ U_INPC_TOP_AND_LEFT_AND_RIGHT
@ U_INPC_BOTTOM_AND_RIGHT
U_CAPI void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
UCharDirection
This specifies the language directional property of a character set.
@ U_EUROPEAN_NUMBER_TERMINATOR
ET.
@ U_RIGHT_TO_LEFT_ARABIC
AL.
@ U_POP_DIRECTIONAL_ISOLATE
PDI.
@ U_COMMON_NUMBER_SEPARATOR
CS.
@ U_DIR_NON_SPACING_MARK
NSM.
@ U_FIRST_STRONG_ISOLATE
FSI.
@ U_POP_DIRECTIONAL_FORMAT
PDF.
@ U_CHAR_DIRECTION_COUNT
One more than the highest UCharDirection value.
@ U_WHITE_SPACE_NEUTRAL
WS.
@ U_RIGHT_TO_LEFT_OVERRIDE
RLO.
@ U_RIGHT_TO_LEFT_EMBEDDING
RLE.
@ U_EUROPEAN_NUMBER_SEPARATOR
ES.
@ U_LEFT_TO_RIGHT_ISOLATE
LRI.
@ U_LEFT_TO_RIGHT_OVERRIDE
LRO.
@ U_LEFT_TO_RIGHT_EMBEDDING
LRE.
@ U_RIGHT_TO_LEFT_ISOLATE
RLI.
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
@ U_PROPERTY_NAME_CHOICE_COUNT
One more than the highest normal UPropertyNameChoice value.
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
U_CAPI UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
U_CAPI int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
U_CAPI UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
U_CAPI UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
ULineBreak
Line Break constants.
@ U_LB_CONDITIONAL_JAPANESE_STARTER
@ U_LB_INSEPARABLE
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0.
@ U_LB_COUNT
One more than the highest normal ULineBreak value.
@ U_LB_REGIONAL_INDICATOR
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Returns an empty string.
U_CAPI const UCPMap * u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode)
Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
U_CAPI UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category "Ll" (lowercase letter).
U_CAPI UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
UCharCategory
Data for enumerated Unicode general category types.
@ U_GENERAL_OTHER_TYPES
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNE...
@ U_PARAGRAPH_SEPARATOR
Zp.
@ U_INITIAL_PUNCTUATION
Pi.
@ U_UNASSIGNED
Non-category for unassigned and non-character code points.
@ U_COMBINING_SPACING_MARK
Mc.
@ U_CONNECTOR_PUNCTUATION
Pc.
@ U_CHAR_CATEGORY_COUNT
One higher than the last enum UCharCategory constant.
@ U_DECIMAL_DIGIT_NUMBER
Nd.
U_CAPI UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
UVerticalOrientation
Vertical Orientation constants.
@ U_VO_TRANSFORMED_UPRIGHT
@ U_VO_TRANSFORMED_ROTATED
U_CAPI UBool u_isdefined(UChar32 c)
Determines whether the specified code point is "defined", which usually means that it is assigned a c...
UJoiningGroup
Joining Group constants.
@ U_JG_BURUSHASKI_YEH_BARREE
@ U_JG_HANIFI_ROHINGYA_PA
@ U_JG_MANICHAEAN_THAMEDH
@ U_JG_COUNT
One more than the highest normal UJoiningGroup value.
@ U_JG_MANICHAEAN_DHAMEDH
@ U_JG_MANICHAEAN_HUNDRED
@ U_JG_HANIFI_ROHINGYA_KINNA_YA
UHangulSyllableType
Hangul Syllable Type constants.
@ U_HST_COUNT
One more than the highest normal UHangulSyllableType value.
U_CAPI UChar32 u_getBidiPairedBracket(UChar32 c)
Maps the specified character to its paired bracket character.
U_CAPI UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
U_CAPI const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases....
USentenceBreak
Sentence Break constants.
@ U_SB_COUNT
One more than the highest normal USentenceBreak value.
U_CAPI int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c...
UEastAsianWidth
East Asian Width constants.
@ U_EA_COUNT
One more than the highest normal UEastAsianWidth value.
U_CAPI UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData....
U_CAPI UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
U_CAPI UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
U_CAPI UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
U_CAPI UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.
U_CAPI UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function).
U_CAPI const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
UGraphemeClusterBreak
Grapheme Cluster Break constants.
@ U_GCB_REGIONAL_INDICATOR
@ U_GCB_COUNT
One more than the highest normal UGraphemeClusterBreak value.
U_CAPI const USet * u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode)
Returns a frozen USet for a binary property.
U_CAPI uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
U_CAPI void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
U_CAPI UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
U_CAPI void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive,...
U_CAPI UChar32 u_charMirror(UChar32 c)
Maps the specified character to a "mirror-image" character.
U_CAPI UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
U_CAPI UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible as a non-initial character of an identifier acco...
U_CAPI UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData....
U_CAPI UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
U_CAPI int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
U_CAPI UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier,...
U_CAPI int32_t u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode)
Writes code point c's Identifier_Type as a list of UIdentifierType values to the output types array a...
U_CAPI UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
UNumericType
Numeric Type constants.
@ U_NT_COUNT
One more than the highest normal UNumericType value.
UIndicConjunctBreak
Indic Conjunct Break constants.
UDecompositionType
Decomposition Type constants.
@ U_DT_COUNT
One more than the highest normal UDecompositionType value.
U_CAPI UBool u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which)
Returns true if the property is true for the string.
U_CAPI int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
UProperty
Selection constants for Unicode properties.
@ UCHAR_GRAPHEME_CLUSTER_BREAK
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
@ UCHAR_BIDI_PAIRED_BRACKET
String property Bidi_Paired_Bracket (new in Unicode 6.3).
@ UCHAR_ALPHABETIC
Binary property Alphabetic.
@ UCHAR_POSIX_GRAPH
Binary property graph (a C/POSIX character class).
@ UCHAR_RGI_EMOJI_TAG_SEQUENCE
Binary property of strings RGI_Emoji_Tag_Sequence.
@ UCHAR_RGI_EMOJI
Binary property of strings RGI_Emoji.
@ UCHAR_BLOCK
Enumerated property Block.
@ UCHAR_INVALID_CODE
Represents a nonexistent or invalid property or property value.
@ UCHAR_SEGMENT_STARTER
Binary Property Segment_Starter.
@ UCHAR_PREPENDED_CONCATENATION_MARK
Binary property Prepended_Concatenation_Mark.
@ UCHAR_REGIONAL_INDICATOR
Binary property Regional_Indicator.
@ UCHAR_OTHER_PROPERTY_START
First constant for Unicode properties with unusual value types.
@ UCHAR_S_TERM
Binary property STerm (new in Unicode 4.0.1).
@ UCHAR_WHITE_SPACE
Binary property White_Space.
@ UCHAR_CANONICAL_COMBINING_CLASS
Enumerated property Canonical_Combining_Class.
@ UCHAR_SOFT_DOTTED
Binary property Soft_Dotted (new in Unicode 3.2).
@ UCHAR_GRAPHEME_LINK
Binary property Grapheme_Link (new in Unicode 3.2).
@ UCHAR_PATTERN_SYNTAX
Binary property Pattern_Syntax (new in Unicode 4.1).
@ UCHAR_GRAPHEME_EXTEND
Binary property Grapheme_Extend (new in Unicode 3.2).
@ UCHAR_CASED
Binary property Cased.
@ UCHAR_XID_START
Binary property XID_Start.
@ UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
Binary property of strings RGI_Emoji_ZWJ_Sequence.
@ UCHAR_VERTICAL_ORIENTATION
Enumerated property Vertical_Orientation.
@ UCHAR_NFKC_INERT
Binary property NFKC_Inert.
@ UCHAR_INT_LIMIT
One more than the last constant for enumerated/integer Unicode properties.
@ UCHAR_RGI_EMOJI_FLAG_SEQUENCE
Binary property of strings RGI_Emoji_Flag_Sequence.
@ UCHAR_OTHER_PROPERTY_LIMIT
One more than the last constant for Unicode properties with unusual value types.
@ UCHAR_PATTERN_WHITE_SPACE
Binary property Pattern_White_Space (new in Unicode 4.1).
@ UCHAR_VARIATION_SELECTOR
Binary property Variation_Selector (new in Unicode 4.0.1).
@ UCHAR_NUMERIC_VALUE
Double property Numeric_Value.
@ UCHAR_DOUBLE_START
First constant for double Unicode properties.
@ UCHAR_HEX_DIGIT
Binary property Hex_Digit.
@ UCHAR_SIMPLE_TITLECASE_MAPPING
String property Simple_Titlecase_Mapping.
@ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
Enumerated property Trail_Canonical_Combining_Class.
@ UCHAR_DIACRITIC
Binary property Diacritic.
@ UCHAR_XID_CONTINUE
Binary property XID_Continue.
@ UCHAR_NFKC_QUICK_CHECK
Enumerated property NFKC_Quick_Check.
@ UCHAR_INDIC_CONJUNCT_BREAK
Enumerated property Indic_Conjunct_Break.
@ UCHAR_HYPHEN
Binary property Hyphen.
@ UCHAR_RADICAL
Binary property Radical (new in Unicode 3.2).
@ UCHAR_BIDI_PAIRED_BRACKET_TYPE
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
@ UCHAR_ID_COMPAT_MATH_CONTINUE
Binary property ID_Compat_Math_Continue.
@ UCHAR_DASH
Binary property Dash.
@ UCHAR_ASCII_HEX_DIGIT
Binary property ASCII_Hex_Digit.
@ UCHAR_ID_CONTINUE
Binary property ID_Continue.
@ UCHAR_NFKD_INERT
Binary property NFKD_Inert.
@ UCHAR_BIDI_MIRRORING_GLYPH
String property Bidi_Mirroring_Glyph.
@ UCHAR_LEAD_CANONICAL_COMBINING_CLASS
Enumerated property Lead_Canonical_Combining_Class.
@ UCHAR_UNICODE_1_NAME
String property Unicode_1_Name.
@ UCHAR_BIDI_MIRRORED
Binary property Bidi_Mirrored.
@ UCHAR_CASE_IGNORABLE
Binary property Case_Ignorable.
@ UCHAR_EAST_ASIAN_WIDTH
Enumerated property East_Asian_Width.
@ UCHAR_STRING_LIMIT
One more than the last constant for string Unicode properties.
@ UCHAR_IDENTIFIER_TYPE
Miscellaneous property Identifier_Type.
@ UCHAR_EMOJI_PRESENTATION
Binary property Emoji_Presentation.
@ UCHAR_GRAPHEME_BASE
Binary property Grapheme_Base (new in Unicode 3.2).
@ UCHAR_NFKD_QUICK_CHECK
Enumerated property NFKD_Quick_Check.
@ UCHAR_NAME
String property Name.
@ UCHAR_UPPERCASE
Binary property Uppercase.
@ UCHAR_CASE_FOLDING
String property Case_Folding.
@ UCHAR_INDIC_SYLLABIC_CATEGORY
Enumerated property Indic_Syllabic_Category.
@ UCHAR_MATH
Binary property Math.
@ UCHAR_NUMERIC_TYPE
Enumerated property Numeric_Type.
@ UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE
Binary property of strings RGI_Emoji_Modifier_Sequence.
@ UCHAR_ISO_COMMENT
Deprecated string property ISO_Comment.
@ UCHAR_SCRIPT
Enumerated property Script.
@ UCHAR_CHANGES_WHEN_TITLECASED
Binary property Changes_When_Titlecased.
@ UCHAR_EXTENDED_PICTOGRAPHIC
Binary property Extended_Pictographic.
@ UCHAR_DEPRECATED
Binary property Deprecated (new in Unicode 3.2).
@ UCHAR_MODIFIER_COMBINING_MARK
Binary property Modifier_Combining_Mark.
@ UCHAR_CHANGES_WHEN_UPPERCASED
Binary property Changes_When_Uppercased.
@ UCHAR_CHANGES_WHEN_CASEFOLDED
Binary property Changes_When_Casefolded.
@ UCHAR_INDIC_POSITIONAL_CATEGORY
Enumerated property Indic_Positional_Category.
@ UCHAR_DEFAULT_IGNORABLE_CODE_POINT
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
@ UCHAR_GENERAL_CATEGORY
Enumerated property General_Category.
@ UCHAR_INT_START
First constant for enumerated/integer Unicode properties.
@ UCHAR_EMOJI_MODIFIER
Binary property Emoji_Modifier.
@ UCHAR_NFD_QUICK_CHECK
Enumerated property NFD_Quick_Check.
@ UCHAR_IDS_BINARY_OPERATOR
Binary property IDS_Binary_Operator (new in Unicode 3.2).
@ UCHAR_BINARY_START
First constant for binary Unicode properties.
@ UCHAR_IDS_UNARY_OPERATOR
Binary property IDS_Unary_Operator.
@ UCHAR_TERMINAL_PUNCTUATION
Binary property Terminal_Punctuation.
@ UCHAR_GENERAL_CATEGORY_MASK
Bitmask property General_Category_Mask.
@ UCHAR_MASK_START
First constant for bit-mask Unicode properties.
@ UCHAR_DECOMPOSITION_TYPE
Enumerated property Decomposition_Type.
@ UCHAR_TITLECASE_MAPPING
String property Titlecase_Mapping.
@ UCHAR_HANGUL_SYLLABLE_TYPE
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
@ UCHAR_LINE_BREAK
Enumerated property Line_Break.
@ UCHAR_SIMPLE_UPPERCASE_MAPPING
String property Simple_Uppercase_Mapping.
@ UCHAR_POSIX_ALNUM
Binary property alnum (a C/POSIX character class).
@ UCHAR_JOINING_TYPE
Enumerated property Joining_Type.
@ UCHAR_EMOJI_KEYCAP_SEQUENCE
Binary property of strings Emoji_Keycap_Sequence.
@ UCHAR_QUOTATION_MARK
Binary property Quotation_Mark.
@ UCHAR_NFC_INERT
Binary property NFC_Inert.
@ UCHAR_LOWERCASE_MAPPING
String property Lowercase_Mapping.
@ UCHAR_SIMPLE_CASE_FOLDING
String property Simple_Case_Folding.
@ UCHAR_JOIN_CONTROL
Binary property Join_Control.
@ UCHAR_NONCHARACTER_CODE_POINT
Binary property Noncharacter_Code_Point.
@ UCHAR_BIDI_CONTROL
Binary property Bidi_Control.
@ UCHAR_CHANGES_WHEN_LOWERCASED
Binary property Changes_When_Lowercased.
@ UCHAR_BINARY_LIMIT
One more than the last constant for binary Unicode properties.
@ UCHAR_IDS_TRINARY_OPERATOR
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
@ UCHAR_ID_START
Binary property ID_Start.
@ UCHAR_AGE
String property Age.
@ UCHAR_WORD_BREAK
Enumerated property Word_Break (new in Unicode 4.1).
@ UCHAR_DOUBLE_LIMIT
One more than the last constant for double Unicode properties.
@ UCHAR_EMOJI_MODIFIER_BASE
Binary property Emoji_Modifier_Base.
@ UCHAR_EMOJI_COMPONENT
Binary property Emoji_Component.
@ UCHAR_POSIX_BLANK
Binary property blank (a C/POSIX character class).
@ UCHAR_SIMPLE_LOWERCASE_MAPPING
String property Simple_Lowercase_Mapping.
@ UCHAR_NFD_INERT
Binary property NFD_Inert.
@ UCHAR_POSIX_PRINT
Binary property print (a C/POSIX character class).
@ UCHAR_SENTENCE_BREAK
Enumerated property Sentence_Break (new in Unicode 4.1).
@ UCHAR_IDEOGRAPHIC
Binary property Ideographic.
@ UCHAR_ID_COMPAT_MATH_START
Binary property ID_Compat_Math_Start.
@ UCHAR_UNIFIED_IDEOGRAPH
Binary property Unified_Ideograph (new in Unicode 3.2).
@ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
Binary property Changes_When_NFKC_Casefolded.
@ UCHAR_NFC_QUICK_CHECK
Enumerated property NFC_Quick_Check.
@ UCHAR_CASE_SENSITIVE
Binary property Case_Sensitive.
@ UCHAR_UPPERCASE_MAPPING
String property Uppercase_Mapping.
@ UCHAR_BIDI_CLASS
Enumerated property Bidi_Class.
@ UCHAR_BASIC_EMOJI
Binary property of strings Basic_Emoji.
@ UCHAR_MASK_LIMIT
One more than the last constant for bit-mask Unicode properties.
@ UCHAR_IDENTIFIER_STATUS
Enumerated property Identifier_Status.
@ UCHAR_JOINING_GROUP
Enumerated property Joining_Group.
@ UCHAR_LOGICAL_ORDER_EXCEPTION
Binary property Logical_Order_Exception (new in Unicode 3.2).
@ UCHAR_EXTENDER
Binary property Extender.
@ UCHAR_STRING_START
First constant for string Unicode properties.
@ UCHAR_SCRIPT_EXTENSIONS
Miscellaneous property Script_Extensions (new in Unicode 6.0).
@ UCHAR_FULL_COMPOSITION_EXCLUSION
Binary property Full_Composition_Exclusion.
@ UCHAR_EMOJI
Binary property Emoji.
@ UCHAR_LOWERCASE
Binary property Lowercase.
@ UCHAR_CHANGES_WHEN_CASEMAPPED
Binary property Changes_When_Casemapped.
@ UCHAR_POSIX_XDIGIT
Binary property xdigit (a C/POSIX character class).
U_CAPI int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
U_CAPI UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
U_CAPI UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
U_CAPI UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData....
U_CAPI int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
UIndicSyllabicCategory
Indic Syllabic Category constants.
@ U_INSC_INVISIBLE_STACKER
@ U_INSC_SYLLABLE_MODIFIER
@ U_INSC_MODIFYING_LETTER
@ U_INSC_REORDERING_KILLER
@ U_INSC_REGISTER_SHIFTER
@ U_INSC_CONSONANT_KILLER
@ U_INSC_CONSONANT_HEAD_LETTER
@ U_INSC_BRAHMI_JOINING_NUMBER
@ U_INSC_CANTILLATION_MARK
@ U_INSC_VOWEL_INDEPENDENT
@ U_INSC_CONSONANT_MEDIAL
@ U_INSC_CONSONANT_WITH_STACKER
@ U_INSC_CONSONANT_PREFIXED
@ U_INSC_CONSONANT_SUCCEEDING_REPHA
@ U_INSC_CONSONANT_PLACEHOLDER
@ U_INSC_CONSONANT_PRECEDING_REPHA
@ U_INSC_CONSONANT_SUBJOINED
@ U_INSC_CONSONANT_INITIAL_POSTFIXED
UWordBreakValues
Word Break constants.
@ U_WB_REGIONAL_INDICATOR
@ U_WB_COUNT
One more than the highest normal UWordBreakValues value.
U_CAPI UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier.
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
@ U_BPT_CLOSE
Close paired bracket.
@ U_BPT_COUNT
One more than the highest normal UBidiPairedBracketType value.
@ U_BPT_NONE
Not a paired bracket.
@ U_BPT_OPEN
Open paired bracket.
C API: This file defines an abstract map from Unicode code points to integer values.
struct UCPMap UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
int8_t UBool
The ICU boolean type, a signed-byte integer.
#define U_CAPI
This is used to declare a function as a public ICU C API.
char16_t UChar
The base type for UTF-16 code units and pointers.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.