4040//! 3. The sequence `"\r\n"` has width 1.
4141//! 4. [Lisu tone letter] combinations consisting of a character in the range `'\u{A4F8}'..='\u{A4FB}'`
4242//! followed by a character in the range `'\u{A4FC}'..='\u{A4FD}'` have width 1.
43- //! 5. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
44- //! 6. The following have width 0:
43+ //! 5. In an East Asian context only, `<`, `=`, or `>` have width 2 when followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY].
44+ //! 6. [`'\u{115F}'` HANGUL CHOSEONG FILLER](https://util.unicode.org/UnicodeJsps/character.jsp?a=115F) has width 2.
45+ //! 7. The following have width 0:
4546//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BDefault_Ignorable_Code_Point%7D)
4647//! with the [`Default_Ignorable_Code_Point`] property.
4748//! - [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGrapheme_Extend%7D)
6465//! - [`'\u{0891}'` PIASTRE MARK ABOVE](https://util.unicode.org/UnicodeJsps/character.jsp?a=0891), and
6566//! - [`'\u{08E2}'` DISPUTED END OF AYAH](https://util.unicode.org/UnicodeJsps/character.jsp?a=08E2).
6667//! - [`'\u{A8FA}'` DEVANAGARI CARET](https://util.unicode.org/UnicodeJsps/character.jsp?a=A8FA).
67- //!7 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
68+ //!8 . [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DF%7D%5Cp%7BEast_Asian_Width%3DW%7D)
6869//! with an [`East_Asian_Width`] of [`Fullwidth`] or [`Wide`] have width 2.
69- //! 8. [Characters](https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BEast_Asian_Width%3DA%7D)
70- //! with an [`East_Asian_Width`] of [`Ambiguous`] have width 2 in an East Asian context, and width 1 otherwise.
71- //! 9. All other characters have width 1.
70+ //! 9. Characters fulfilling all of the following conditions have width 2 in an East Asian context, and width 1 otherwise:
71+ //! - Has an [`East_Asian_Width`] of [`Ambiguous`], or
72+ //! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or
73+ //! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and
74+ //! - Does not have a [`General_Category`] of `Modifier_Symbol`, and
75+ //! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`.
76+ //! 10. All other characters have width 1.
77+ //!
78+ //! [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY]: https://util.unicode.org/UnicodeJsps/character.jsp?a=0338
7279//!
7380//! [`Default_Ignorable_Code_Point`]: https://www.unicode.org/versions/Unicode15.0.0/ch05.pdf#G40095
7481//! [`East_Asian_Width`]: https://www.unicode.org/reports/tr11/#ED1
7582//! [`Emoji_Presentation`]: https://unicode.org/reports/tr51/#def_emoji_presentation
83+ //! [`General_Category`]: https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G124142
7684//! [`Grapheme_Extend`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G52443
7785//! [`Hangul_Syllable_Type`]: https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G45593
7886//! [`Prepended_Concatenation_Mark`]: https://www.unicode.org/versions/Unicode15.0.0/ch23.pdf#G37908
87+ //! [`Script`]: https://www.unicode.org/reports/tr24/#Script
7988//!
8089//! [`Fullwidth`]: https://www.unicode.org/reports/tr11/#ED2
8190//! [`Wide`]: https://www.unicode.org/reports/tr11/#ED4
8493//! [Emoji presentation sequences]: https://unicode.org/reports/tr51/#def_emoji_presentation_sequence
8594//! [text presentation sequences]: https://unicode.org/reports/tr51/#def_text_presentation_sequence
8695//!
87- //! [Enclosed Ideographic Supplement]: https://unicode.org/charts/PDF/U1F200.pdf
96+ //! [Enclosed Ideographic Supplement]: https://unicode.org/charts/nameslist/n_1F200.html
8897//!
8998//! [Lisu tone letter]: https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078
9099//!
91100//! ## Canonical equivalence
92101//!
93- //! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width.
94- //! However, this guarantee does not currently hold for the CJK width variants.
102+ //! Canonically equivalent strings are assigned the same width (CJK and non-CJK).
95103
96104#![ forbid( unsafe_code) ]
97105#![ deny( missing_docs) ]
@@ -198,14 +206,17 @@ enum NextCharInfo {
198206#[ default]
199207Default ,
200208/// `'\n'`
201- LineFeed =0x0A ,
209+ LineFeed ,
210+ /// '\u{0338}'
211+ /// For preserving canonical equivalence with CJK
212+ CombiningLongSolidusOverlay ,
202213/// `'\u{A4FC}'..='\u{A4FD}'`
203214/// <https://www.unicode.org/versions/Unicode15.0.0/ch18.pdf#G42078>
204215TrailingLisuToneLetter ,
205216/// `'\u{FE0E}'`
206- Vs15 = 0x0E ,
217+ Vs15 ,
207218/// `'\u{FE0F}'`
208- Vs16 = 0x0F ,
219+ Vs16 ,
209220}
210221
211222fn str_width ( s : & str , is_cjk : bool ) ->usize {
@@ -222,7 +233,11 @@ fn str_width(s: &str, is_cjk: bool) -> usize {
222233/// they're treated as single width.
223234#[ inline]
224235fn width_in_str ( c : char , is_cjk : bool , next_info : NextCharInfo ) ->( usize , NextCharInfo ) {
225- if next_info ==NextCharInfo :: Vs16 && cw:: starts_emoji_presentation_seq ( c) {
236+ if ( is_cjk
237+ && next_info ==NextCharInfo :: CombiningLongSolidusOverlay
238+ &&matches ! ( c, '<' |'=' |'>' ) )
239+ ||( next_info ==NextCharInfo :: Vs16 && cw:: starts_emoji_presentation_seq ( c) )
240+ {
226241( 2 , NextCharInfo :: Default )
227242} else if c <='\u{A0}' {
228243match c{
@@ -235,6 +250,7 @@ fn width_in_str(c: char, is_cjk: bool, next_info: NextCharInfo) -> (usize, NextC
235250( '\u{A4F8}' ..='\u{A4FB}' , NextCharInfo :: TrailingLisuToneLetter ) =>{
236251( 0 , NextCharInfo :: Default )
237252}
253+ ( '\u{0338}' , _) =>( 0 , NextCharInfo :: CombiningLongSolidusOverlay ) ,
238254( '\u{A4FC}' ..='\u{A4FD}' , _) =>( 1 , NextCharInfo :: TrailingLisuToneLetter ) ,
239255( '\u{FE0E}' , _) =>( 0 , NextCharInfo :: Vs15 ) ,
240256( '\u{FE0F}' , _) =>( 0 , NextCharInfo :: Vs16 ) ,