Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit558890f

Browse files
Support Unicode 17
Add support for Unicode 17, Including VS3variation sequences for Mongolian quotation marks(https://www.unicode.org/L2/L2025/25028-vs3-sibe-quotation-marks.pdf).
1 parent7a7fcdc commit558890f

File tree

4 files changed

+203
-91
lines changed

4 files changed

+203
-91
lines changed

‎scripts/unicode.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
fromitertoolsimportbatched
4444
fromtypingimportCallable,Iterable
4545

46-
UNICODE_VERSION="16.0.0"
46+
UNICODE_VERSION="17.0.0"
4747
"""The version of the Unicode data files to download."""
4848

4949
NUM_CODEPOINTS=0x110000
@@ -178,7 +178,9 @@ class WidthState(enum.IntEnum):
178178
(if set, should also set 3rd and 4th)
179179
- 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
180180
where no ZWJ has been encountered yet; encountering one flips this on
181-
- Seventh bit: is VS1 (if CJK) or is VS2 (not CJK)
181+
- Seventh bit:
182+
- CJK mode: is VS1 or VS3
183+
- Not CJK: is VS2
182184
"""
183185

184186
# BASIC WIDTHS
@@ -275,8 +277,8 @@ class WidthState(enum.IntEnum):
275277

276278
# VARIATION SELECTORS
277279

278-
VARIATION_SELECTOR_1_OR_2=0b0000_0010_0000_0000
279-
"\\uFE00 if CJK, or\\uFE01 otherwise"
280+
VARIATION_SELECTOR_1_2_OR_3=0b0000_0010_0000_0000
281+
"\\uFE00or\\uFE02if CJK, or\\uFE01 otherwise"
280282

281283
# Text presentation sequences (not CJK)
282284
VARIATION_SELECTOR_15=0b0100_0000_0000_0000
@@ -373,7 +375,7 @@ def width_alone(self) -> int:
373375
|WidthState.COMBINING_LONG_SOLIDUS_OVERLAY
374376
|WidthState.VARIATION_SELECTOR_15
375377
|WidthState.VARIATION_SELECTOR_16
376-
|WidthState.VARIATION_SELECTOR_1_OR_2
378+
|WidthState.VARIATION_SELECTOR_1_2_OR_3
377379
):
378380
return0
379381
case (
@@ -657,11 +659,12 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
657659
ea[cp]=width
658660

659661
# East-Asian only
660-
ea[0xFE00]=WidthState.VARIATION_SELECTOR_1_OR_2
661662
ea[0x0338]=WidthState.COMBINING_LONG_SOLIDUS_OVERLAY
663+
ea[0xFE00]=WidthState.VARIATION_SELECTOR_1_2_OR_3
664+
ea[0xFE02]=WidthState.VARIATION_SELECTOR_1_2_OR_3
662665

663666
# Not East Asian only
664-
not_ea[0xFE01]=WidthState.VARIATION_SELECTOR_1_OR_2
667+
not_ea[0xFE01]=WidthState.VARIATION_SELECTOR_1_2_OR_3
665668
not_ea[0xFE0E]=WidthState.VARIATION_SELECTOR_15
666669

667670
return (not_ea,ea)
@@ -759,7 +762,7 @@ def load_solidus_transparent(
759762
num_chars=len(ccc_above_1)
760763

761764
forcpinccc_above_1:
762-
ifcpnotin [0xFE00,0xFE0F]:
765+
ifcpnotin [0xFE00,0xFE02,0xFE0F]:
763766
assert (
764767
cjk_width_map[cp].table_width()!=CharWidthInTable.SPECIAL
765768
),f"U+{cp:X}"
@@ -1317,14 +1320,14 @@ def lookup_fns(
13171320

13181321
ifis_cjk:
13191322
s+="""
1320-
ifc =='\\u{FE00}' {
1321-
return (0, next_info.set_vs1_2());
1323+
ifmatches!(c,'\\u{FE00}' | '\\u{FE02}') {
1324+
return (0, next_info.set_vs1_2_3());
13221325
}
13231326
"""
13241327
else:
13251328
s+="""
13261329
if c == '\\u{FE01}' {
1327-
return (0, next_info.set_vs1_2());
1330+
return (0, next_info.set_vs1_2_3());
13281331
}
13291332
if c == '\\u{FE0E}' {
13301333
return (0, next_info.set_text_presentation());
@@ -1337,15 +1340,15 @@ def lookup_fns(
13371340
}
13381341
} else """
13391342

1340-
s+="""if next_info.is_vs1_2() {
1343+
s+="""if next_info.is_vs1_2_3() {
13411344
if matches!(c, '\\u{2018}' | '\\u{2019}' | '\\u{201C}' | '\\u{201D}') {
13421345
return ("""
13431346

13441347
s+=str(2-is_cjk)
13451348

13461349
s+=""", WidthInfo::DEFAULT);
13471350
} else {
1348-
next_info = next_info.unset_vs1_2();
1351+
next_info = next_info.unset_vs1_2_3();
13491352
}
13501353
}
13511354
if next_info.is_ligature_transparent() {
@@ -1655,7 +1658,7 @@ def emit_module(
16551658
self.0
16561659
| WidthInfo::VARIATION_SELECTOR_16.0
16571660
& !WidthInfo::VARIATION_SELECTOR_15.0
1658-
& !WidthInfo::VARIATION_SELECTOR_1_OR_2.0,
1661+
& !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0,
16591662
)
16601663
}} else {{
16611664
Self::VARIATION_SELECTOR_16
@@ -1683,7 +1686,7 @@ def emit_module(
16831686
self.0
16841687
| WidthInfo::VARIATION_SELECTOR_15.0
16851688
& !WidthInfo::VARIATION_SELECTOR_16.0
1686-
& !WidthInfo::VARIATION_SELECTOR_1_OR_2.0,
1689+
& !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0,
16871690
)
16881691
}} else {{
16891692
Self(WidthInfo::VARIATION_SELECTOR_15.0)
@@ -1696,27 +1699,28 @@ def emit_module(
16961699
}}
16971700
16981701
/// Has 7th bit set
1699-
fn is_vs1_2(self) -> bool {{
1700-
(self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1702+
fn is_vs1_2_3(self) -> bool {{
1703+
(self.0 & WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0)
1704+
== WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0
17011705
}}
17021706
17031707
/// Set 7th bit
1704-
fnset_vs1_2(self) -> Self {{
1708+
fnset_vs1_2_3(self) -> Self {{
17051709
if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
17061710
Self(
17071711
self.0
1708-
| WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1712+
| WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0
17091713
& !WidthInfo::VARIATION_SELECTOR_15.0
17101714
& !WidthInfo::VARIATION_SELECTOR_16.0,
17111715
)
17121716
}} else {{
1713-
Self(WidthInfo::VARIATION_SELECTOR_1_OR_2.0)
1717+
Self(WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0)
17141718
}}
17151719
}}
17161720
17171721
/// Clear 7th bit
1718-
fnunset_vs1_2(self) -> Self {{
1719-
Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0)
1722+
fnunset_vs1_2_3(self) -> Self {{
1723+
Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0)
17201724
}}
17211725
}}
17221726

‎src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@
6262
//! - Outside of an East Asian context, [text presentation sequences] have width 1 if their base character:
6363
//! - Has the [`Emoji_Presentation`] property, and
6464
//! - Is not in the [Enclosed Ideographic Supplement] block.
65-
//! - [`'\u{2018}'`, `'\u{2019}'`, `'\u{201C}'`, and `'\u{201D}'`][General Punctuation] always have width 1 when followed by '\u{FE00}',
66-
//! and width 2 when followed by '\u{FE01}'.
65+
//! - [`'\u{2018}'`, `'\u{2019}'`, `'\u{201C}'`, and `'\u{201D}'`][General Punctuation] always have width 1
66+
//!when followed by '\u{FE00}' or '\u{FE02}',and width 2 when followed by '\u{FE01}'.
6767
//! - Script-specific ligatures:
6868
//! - For all the following ligatures, the insertion of any number of [default-ignorable][`Default_Ignorable_Code_Point`]
6969
//! [combining marks] anywhere in the sequence will not change the total width. In addition, for all non-Arabic

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp