|
9 | 9 | // except according to those terms. |
10 | 10 |
|
11 | 11 | //! Determine displayed width of `char` and `str` types according to |
12 | | -//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
13 | | -//! rules. |
| 12 | +//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/), |
| 13 | +//! other portions of the Unicode standard, and common implementations of |
| 14 | +//! POSIX [`wcwidth()`](https://pubs.opengroup.org/onlinepubs/9699919799/). |
| 15 | +//! See the [Rules for determining width](#rules-for-determining-width) section |
| 16 | +//! for the exact rules. |
14 | 17 | //! |
15 | | -//! ```rust |
16 | | -//! extern crate unicode_width; |
| 18 | +//! This crate is `#![no_std]`. |
17 | 19 | //! |
| 20 | +//! ```rust |
18 | 21 | //! use unicode_width::UnicodeWidthStr; |
19 | 22 | //! |
20 | | -//! fn main() { |
21 | | -//! let teststr = "Hello, world!"; |
22 | | -//! let width = UnicodeWidthStr::width(teststr); |
23 | | -//! println!("{}", teststr); |
24 | | -//! println!("The above string is {} columns wide.", width); |
25 | | -//! let width = teststr.width_cjk(); |
26 | | -//! println!("The above string is {} columns wide (CJK).", width); |
27 | | -//! } |
| 23 | +//! let teststr = "Hello, world!"; |
| 24 | +//! let width = UnicodeWidthStr::width(teststr); |
| 25 | +//! println!("{}", teststr); |
| 26 | +//! println!("The above string is {} columns wide.", width); |
| 27 | +//! let width = teststr.width_cjk(); |
| 28 | +//! println!("The above string is {} columns wide (CJK).", width); |
28 | 29 | //! ``` |
29 | 30 | //! |
30 | | -//! #features |
| 31 | +//! #Rules for determining width |
31 | 32 | //! |
32 | | -//!unicode-width does not depend on `std`, so it can be used in crates |
33 | | -//!with the `#![no_std]` attribute. |
| 33 | +//!This crate currently uses the following rules to determine the width of a |
| 34 | +//!character or string, in order of decreasing precedence. These may be tweaked in the future. |
34 | 35 | //! |
35 | | -//! # crates.io |
| 36 | +//! 1. [`'\u{00AD}'` SOFT HYPHEN](https://util.unicode.org/UnicodeJsps/character.jsp?a=00AD) has width 1. |
| 37 | +//! 2. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2. |
| 38 | +//! 3. The following have width 0: |
| 39 | +//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D) |
| 40 | +//! with the [`Default_Ignorable_Code_Point`](https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095) property. |
| 41 | +//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D) |
| 42 | +//! with the [`Grapheme_Extend`] property. |
| 43 | +//! - The following 8 characters, all of which have NFD decompositions consisting of two [`Grapheme_Extend`] chracters: |
| 44 | +//! - [`'\u{0CC0}'` KANNADA VOWEL SIGN II](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC0), |
| 45 | +//! - [`'\u{0CC7}'` KANNADA VOWEL SIGN EE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC7), |
| 46 | +//! - [`'\u{0CC8}'` KANNADA VOWEL SIGN AI](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CC8), |
| 47 | +//! - [`'\u{0CCA}'` KANNADA VOWEL SIGN O](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CCA), |
| 48 | +//! - [`'\u{0CCB}'` KANNADA VOWEL SIGN OO](https://util.unicode.org/UnicodeJsps/character.jsp?a=0CCB), |
| 49 | +//! - [`'\u{1B3B}'` BALINESE VOWEL SIGN RA REPA TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B3B), |
| 50 | +//! - [`'\u{1B3D}'` BALINESE VOWEL SIGN LA LENGA TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B3D), and |
| 51 | +//! - [`'\u{1B43}'` BALINESE VOWEL SIGN PEPET TEDUNG](https://util.unicode.org/UnicodeJsps/character.jsp?a=1B43). |
| 52 | +//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BHangul_Syllable_Type%3DV%7D%5Cp%7BHangul_Syllable_Type%3DT%7D) |
| 53 | +//! with a [`Hangul_Syllable_Type`](https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593) |
| 54 | +//! of `Vowel_Jamo` (`V`) or `Trailing_Jamo` (`T`). |
| 55 | +//! - [`'\0'` NUL](https://util.unicode.org/UnicodeJsps/character.jsp?a=0000). |
| 56 | +//! 4. The [control characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BCc%7D) |
| 57 | +//! have no defined width, and are ignored when determining the width of a string. |
| 58 | +//! 5. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D) |
| 59 | +//! with an [`East_Asian_Width`] of [`Fullwidth` (`F`)](https://www.unicode.org/reports/tr11/#ED2) |
| 60 | +//! or [`Wide` (`W`)](https://www.unicode.org/reports/tr11/#ED4) have width 2. |
| 61 | +//! 6. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D) |
| 62 | +//! with an [`East_Asian_Width`] of [`Ambiguous` (`A`)](https://www.unicode.org/reports/tr11/#ED6) |
| 63 | +//! have width 2 in an East Asian context, and width 1 otherwise. |
| 64 | +//! 7. All other characters have width 1. |
36 | 65 | //! |
37 | | -//! You can use this package in your project by adding the following |
38 | | -//! to your `Cargo.toml`: |
| 66 | +//! [`East_Asian_Width`]: https://www.unicode.org/reports/tr11/#ED1 |
| 67 | +//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443 |
| 68 | +
|
39 | 69 | //! |
40 | | -//!```toml |
41 | | -//! [dependencies] |
42 | | -//!unicode-width= "0.1.5" |
43 | | -//!``` |
| 70 | +//!## Canonical equivalence |
| 71 | +//! |
| 72 | +//!The non-CJKwidthmethods guarantee that canonically equivalent strings are assigned the same width. |
| 73 | +//!However, this guarantee does not currently hold for the CJK width variants. |
44 | 74 |
|
45 | | -#![deny(missing_docs, unsafe_code)] |
| 75 | +#![forbid(unsafe_code)] |
| 76 | +#![deny(missing_docs)] |
46 | 77 | #![doc( |
47 | 78 | html_logo_url ="https://unicode-rs.github.io/unicode-rs_sm.png", |
48 | 79 | html_favicon_url ="https://unicode-rs.github.io/unicode-rs_sm.png" |
|