Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2517d68

Browse files
Treat ambiguousModifier_Letters as narrow (#63)
* Treat ambiguous `Modifier_Letter`s as narrowThis matches the behavior of common fonts.Affects 6 characters:https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%5B%3AEast_Asian_Width%3DAmbiguous%3A%5D-%5B%5B%3AScript%3D%2FLatin%7CGreek%7CCyrillic%2F%3A%5D-%5B%5B%3ABlock%3DNumber+Forms%3A%5D%26%5B%3Asubhead%3DRoman+numerals%3A%5D%5D%5D%5D%26%5B%3AModifier_Letter%3A%5D* Simplify derivation of ambiguousUse `Letter` general category instead of script and block.Changes `ℓ` to narrow, matching common fonts
1 parent8e40640 commit2517d68

File tree

4 files changed

+20
-32
lines changed

4 files changed

+20
-32
lines changed

‎scripts/unicode.py‎

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
# - NormalizationTest.txt (for tests only)
1919
# - PropList.txt
2020
# - ReadMe.txt
21-
# - Scripts.txt
2221
# - UnicodeData.txt
2322
# - auxiliary/GraphemeBreakProperty.txt
2423
# - emoji/emoji-data.txt
@@ -430,22 +429,10 @@ def load_east_asian_widths() -> list[EastAsianWidth]:
430429
# Catch any leftover codepoints and assign them implicit Neutral/narrow width.
431430
width_map.append(EastAsianWidth.NARROW)
432431

433-
# Characters from alphabetic scripts are narrow
434-
load_property(
435-
"Scripts.txt",
436-
r"(?:Latin|Greek|Cyrillic)",
437-
lambdacp: (
438-
operator.setitem(width_map,cp,EastAsianWidth.NARROW)
439-
ifwidth_map[cp]==EastAsianWidth.AMBIGUOUS
440-
andnot (0x2160<=cp<=0x217F)# Roman numerals remain ambiguous
441-
elseNone
442-
),
443-
)
444-
445-
# Ambiguous `Modifier_Symbol`s are narrow
432+
# Ambiguous `Letter`s and `Modifier_Symbol`s are narrow
446433
load_property(
447434
"extracted/DerivedGeneralCategory.txt",
448-
"Sk",
435+
r"(:?Lu|Ll|Lt|Lm|Lo|Sk)",
449436
lambdacp: (
450437
operator.setitem(width_map,cp,EastAsianWidth.NARROW)
451438
ifwidth_map[cp]==EastAsianWidth.AMBIGUOUS

‎src/lib.rs‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,7 @@
122122
//! - Has an [`East_Asian_Width`] of [`Ambiguous`], or
123123
//! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or
124124
//! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and
125-
//! - Does not have a [`General_Category`] of `Modifier_Symbol`, and
126-
//! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`.
125+
//! - Does not have a [`General_Category`] of `Letter` or `Modifier_Symbol`.
127126
//! 7. All other characters have width 1.
128127
//!
129128
//! [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY]: https://util.unicode.org/UnicodeJsps/character.jsp?a=0338

‎src/tables.rs‎

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,17 +1022,17 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([
10221022
],
10231023
#[cfg(feature = "cjk")]
10241024
[
1025-
0x00, 0x9D, 0x02, 0x02, 0x02,0x9E, 0x9F, 0xA0, 0x02, 0x04, 0x02, 0x05, 0x06, 0x07, 0x08,
1025+
0x00, 0x9D, 0x02, 0x02, 0x02,0x02, 0x9E, 0x9F, 0x02, 0x04, 0x02, 0x05, 0x06, 0x07, 0x08,
10261026
0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
10271027
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x02, 0x02, 0x1E, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
10281028
0x02, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x02, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x02, 0x2A,
10291029
0x02, 0x02, 0x02, 0x02,
10301030
],
10311031
#[cfg(feature = "cjk")]
10321032
[
1033-
0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,0xA7, 0x2E, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE,
1034-
0x33, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,0xAF, 0x02, 0x02, 0x35, 0x36, 0x37, 0x02, 0x38,
1035-
0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,0xB0, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
1033+
0xA0,0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,0x2E, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1034+
0x33, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,0xAE, 0x02, 0x02, 0x35, 0x36, 0x37, 0x02, 0x38,
1035+
0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,0xAF, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10361036
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10371037
0x39, 0x39, 0x39, 0x39,
10381038
],
@@ -1042,23 +1042,23 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([
10421042
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10431043
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10441044
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x4C, 0x02, 0x02, 0x02, 0x02, 0x02,
1045-
0xB1, 0x4E, 0x4F,0xB2,
1045+
0xB0, 0x4E, 0x4F,0xB1,
10461046
],
10471047
#[cfg(feature = "cjk")]
10481048
[
10491049
0x85, 0x86, 0x75, 0x02, 0x02, 0x87, 0x02, 0x02, 0x02, 0x88, 0x02, 0x02, 0x02, 0x02, 0x02,
10501050
0x02, 0x02, 0x89, 0x8A, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
1051-
0x02, 0x02, 0x8B, 0x8C,0xB3, 0xB4, 0x8E, 0x02, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
1051+
0x02, 0x02, 0x8B, 0x8C,0xB2, 0xB3, 0x8E, 0x02, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
10521052
0x96, 0x02, 0x97, 0x02, 0x02, 0x98, 0x99, 0x9A, 0x9B, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
10531053
0x02, 0x02, 0x02, 0x02,
10541054
],
10551055
]);
10561056

10571057
#[cfg(feature = "cjk")]
1058-
const WIDTH_LEAVES_LEN: usize =181;
1058+
const WIDTH_LEAVES_LEN: usize =180;
10591059
#[cfg(not(feature = "cjk"))]
10601060
const WIDTH_LEAVES_LEN: usize = 157;
1061-
/// Autogenerated.181 sub-table(s). Consult [`lookup_width`] for layout info.
1061+
/// Autogenerated.180 sub-table(s). Consult [`lookup_width`] for layout info.
10621062
static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
10631063
[
10641064
0x55, 0x55, 0x75, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
@@ -1852,12 +1852,6 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
18521852
0x55, 0x55,
18531853
],
18541854
#[cfg(feature = "cjk")]
1855-
[
1856-
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
1857-
0x55, 0x55, 0x95, 0xA9, 0x59, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
1858-
0x55, 0x55,
1859-
],
1860-
#[cfg(feature = "cjk")]
18611855
[
18621856
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
18631857
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55,
@@ -1883,7 +1877,7 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
18831877
],
18841878
#[cfg(feature = "cjk")]
18851879
[
1886-
0x95, 0x59, 0x59, 0x55,0x95, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
1880+
0x95, 0x59, 0x59, 0x55,0x55, 0x65, 0x55, 0x55, 0x69, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
18871881
0x55, 0x55, 0x55, 0x55, 0x55, 0x95, 0x56, 0x95, 0x6A, 0xAA, 0xAA, 0xAA, 0x55, 0xAA, 0xAA,
18881882
0x5A, 0x55,
18891883
],

‎tests/tests.rs‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ fn test_default_ignorable() {
7878
assert_width!('\u{E0000}',Some(0),Some(0));
7979
}
8080

81+
#[test]
82+
fntest_ambiguous(){
83+
assert_width!("\u{B7}",1,2);
84+
assert_width!("\u{0387}",1,2);
85+
assert_width!("\u{A8}",1,1);
86+
assert_width!("\u{02C9}",1,1);
87+
}
88+
8189
#[test]
8290
fntest_jamo(){
8391
assert_width!('\u{1100}',Some(2),Some(2));

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp