4343from itertools import batched
4444from typing import Callable ,Iterable
4545
46- UNICODE_VERSION = "16 .0.0"
46+ UNICODE_VERSION = "17 .0.0"
4747"""The version of the Unicode data files to download."""
4848
4949NUM_CODEPOINTS = 0x110000
@@ -84,11 +84,11 @@ def fetch_open(filename: str, local_prefix: str = "", emoji: bool = False):
8484localname = os .path .join (local_prefix ,basename )
8585if not os .path .exists (localname ):
8686if emoji :
87- prefix = f "emoji/ { UNICODE_VERSION [: - 2 ] } "
87+ prefix = "emoji"
8888else :
89- prefix = f" { UNICODE_VERSION } / ucd"
89+ prefix = " ucd"
9090urllib .request .urlretrieve (
91- f"https://www.unicode.org/Public/{ prefix } /{ filename } " ,
91+ f"https://www.unicode.org/Public/{ UNICODE_VERSION } / { prefix } /{ filename } " ,
9292localname ,
9393 )
9494try :
@@ -178,7 +178,9 @@ class WidthState(enum.IntEnum):
178178 (if set, should also set 3rd and 4th)
179179 - 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
180180 where no ZWJ has been encountered yet; encountering one flips this on
181- - Seventh bit: is VS1 (if CJK) or is VS2 (not CJK)
181+ - Seventh bit:
182+ - CJK mode: is VS1 or VS3
183+ - Not CJK: is VS2
182184 """
183185
184186# BASIC WIDTHS
@@ -275,8 +277,8 @@ class WidthState(enum.IntEnum):
275277
276278# VARIATION SELECTORS
277279
278- VARIATION_SELECTOR_1_OR_2 = 0b0000_0010_0000_0000
279- "\\ uFE00 if CJK, or\\ uFE01 otherwise"
280+ VARIATION_SELECTOR_1_2_OR_3 = 0b0000_0010_0000_0000
281+ "\\ uFE00or \\ uFE02 if CJK, or\\ uFE01 otherwise"
280282
281283# Text presentation sequences (not CJK)
282284VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000
@@ -373,7 +375,7 @@ def width_alone(self) -> int:
373375| WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
374376| WidthState .VARIATION_SELECTOR_15
375377| WidthState .VARIATION_SELECTOR_16
376- | WidthState .VARIATION_SELECTOR_1_OR_2
378+ | WidthState .VARIATION_SELECTOR_1_2_OR_3
377379 ):
378380return 0
379381case (
@@ -657,11 +659,12 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
657659ea [cp ]= width
658660
659661# East-Asian only
660- ea [0xFE00 ]= WidthState .VARIATION_SELECTOR_1_OR_2
661662ea [0x0338 ]= WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
663+ ea [0xFE00 ]= WidthState .VARIATION_SELECTOR_1_2_OR_3
664+ ea [0xFE02 ]= WidthState .VARIATION_SELECTOR_1_2_OR_3
662665
663666# Not East Asian only
664- not_ea [0xFE01 ]= WidthState .VARIATION_SELECTOR_1_OR_2
667+ not_ea [0xFE01 ]= WidthState .VARIATION_SELECTOR_1_2_OR_3
665668not_ea [0xFE0E ]= WidthState .VARIATION_SELECTOR_15
666669
667670return (not_ea ,ea )
@@ -759,7 +762,7 @@ def load_solidus_transparent(
759762num_chars = len (ccc_above_1 )
760763
761764for cp in ccc_above_1 :
762- if cp not in [0xFE00 ,0xFE0F ]:
765+ if cp not in [0xFE00 ,0xFE02 , 0xFE0F ]:
763766assert (
764767cjk_width_map [cp ].table_width ()!= CharWidthInTable .SPECIAL
765768 ),f"U+{ cp :X} "
@@ -1317,14 +1320,14 @@ def lookup_fns(
13171320
13181321if is_cjk :
13191322s += """
1320- ifc == '\\ u{FE00}' {
1321- return (0, next_info.set_vs1_2 ());
1323+ ifmatches!(c, '\\ u{FE00}' | ' \\ u{FE02}') {
1324+ return (0, next_info.set_vs1_2_3 ());
13221325 }
13231326 """
13241327else :
13251328s += """
13261329 if c == '\\ u{FE01}' {
1327- return (0, next_info.set_vs1_2 ());
1330+ return (0, next_info.set_vs1_2_3 ());
13281331 }
13291332 if c == '\\ u{FE0E}' {
13301333 return (0, next_info.set_text_presentation());
@@ -1337,15 +1340,15 @@ def lookup_fns(
13371340 }
13381341 } else """
13391342
1340- s += """if next_info.is_vs1_2 () {
1343+ s += """if next_info.is_vs1_2_3 () {
13411344 if matches!(c, '\\ u{2018}' | '\\ u{2019}' | '\\ u{201C}' | '\\ u{201D}') {
13421345 return ("""
13431346
13441347s += str (2 - is_cjk )
13451348
13461349s += """, WidthInfo::DEFAULT);
13471350 } else {
1348- next_info = next_info.unset_vs1_2 ();
1351+ next_info = next_info.unset_vs1_2_3 ();
13491352 }
13501353 }
13511354 if next_info.is_ligature_transparent() {
@@ -1655,7 +1658,7 @@ def emit_module(
16551658 self.0
16561659 | WidthInfo::VARIATION_SELECTOR_16.0
16571660 & !WidthInfo::VARIATION_SELECTOR_15.0
1658- & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0,
1661+ & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0,
16591662 )
16601663 }} else {{
16611664 Self::VARIATION_SELECTOR_16
@@ -1683,7 +1686,7 @@ def emit_module(
16831686 self.0
16841687 | WidthInfo::VARIATION_SELECTOR_15.0
16851688 & !WidthInfo::VARIATION_SELECTOR_16.0
1686- & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0,
1689+ & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0,
16871690 )
16881691 }} else {{
16891692 Self(WidthInfo::VARIATION_SELECTOR_15.0)
@@ -1696,27 +1699,28 @@ def emit_module(
16961699 }}
16971700
16981701 /// Has 7th bit set
1699- fn is_vs1_2(self) -> bool {{
1700- (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1702+ fn is_vs1_2_3(self) -> bool {{
1703+ (self.0 & WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0)
1704+ == WidthInfo::VARIATION_SELECTOR_1_2_OR_3.0
17011705 }}
17021706
17031707 /// Set 7th bit
1704- fnset_vs1_2 (self) -> Self {{
1708+ fnset_vs1_2_3 (self) -> Self {{
17051709 if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
17061710 Self(
17071711 self.0
1708- | WidthInfo::VARIATION_SELECTOR_1_OR_2 .0
1712+ | WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0
17091713 & !WidthInfo::VARIATION_SELECTOR_15.0
17101714 & !WidthInfo::VARIATION_SELECTOR_16.0,
17111715 )
17121716 }} else {{
1713- Self(WidthInfo::VARIATION_SELECTOR_1_OR_2 .0)
1717+ Self(WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0)
17141718 }}
17151719 }}
17161720
17171721 /// Clear 7th bit
1718- fnunset_vs1_2 (self) -> Self {{
1719- Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2 .0)
1722+ fnunset_vs1_2_3 (self) -> Self {{
1723+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_2_OR_3 .0)
17201724 }}
17211725}}
17221726