@@ -175,8 +175,11 @@ class WidthState(enum.IntEnum):
175175 - 4th bit: whether to set top bit on emoji presentation.
176176 If this is set but 3rd is not, the width mode is related to zwj sequences
177177 - 5th from top: whether this is unaffected by ligature-transparent
178+ (if set, should also set 3rd and 4th)
178179 - 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
179- where no ZWJ has been encountered yet; encountering one flips this on"""
180+ where no ZWJ has been encountered yet; encountering one flips this on
181+ - Seventh bit: is VS1 (if CJK) or is VS2 (not CJK)
182+ """
180183
181184# BASIC WIDTHS
182185
@@ -272,6 +275,9 @@ class WidthState(enum.IntEnum):
272275
273276# VARIATION SELECTORS
274277
278+ VARIATION_SELECTOR_1_OR_2 = 0b0000_0010_0000_0000
279+ "\\ uFE00 if CJK, or\\ uFE01 otherwise"
280+
275281# Text presentation sequences (not CJK)
276282VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000
277283"\\ uFE0E (text presentation sequences)"
@@ -367,6 +373,7 @@ def width_alone(self) -> int:
367373| WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
368374| WidthState .VARIATION_SELECTOR_15
369375| WidthState .VARIATION_SELECTOR_16
376+ | WidthState .VARIATION_SELECTOR_1_OR_2
370377 ):
371378return 0
372379case (
@@ -656,9 +663,11 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
656663ea [cp ]= width
657664
658665# East-Asian only
666+ ea [0xFE00 ]= WidthState .VARIATION_SELECTOR_1_OR_2
659667ea [0x0338 ]= WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
660668
661669# Not East Asian only
670+ not_ea [0xFE01 ]= WidthState .VARIATION_SELECTOR_1_OR_2
662671not_ea [0xFE0E ]= WidthState .VARIATION_SELECTOR_15
663672
664673return (not_ea ,ea )
@@ -724,7 +733,7 @@ def load_solidus_transparent(
724733cjk_width_map :list [WidthState ],
725734)-> list [tuple [Codepoint ,Codepoint ]]:
726735"""Characters expanding to a canonical combining class above 1, plus `ligature_transparent`s from above.
727- Ranges matching ones in `ligature_transparent` exactly are excluded (for compression), so it needs tobechecked also.
736+ Ranges matching ones in `ligature_transparent` exactly are excluded (for compression), so it needs tobe checked also.
728737 """
729738
730739ccc_above_1 = set ()
@@ -756,7 +765,7 @@ def load_solidus_transparent(
756765num_chars = len (ccc_above_1 )
757766
758767for cp in ccc_above_1 :
759- if cp != 0xFE0F :
768+ if cp not in [ 0xFE00 , 0xFE0F ] :
760769assert (
761770cjk_width_map [cp ].table_width ()!= CharWidthInTable .SPECIAL
762771 ),f"U+{ cp :X} "
@@ -1312,8 +1321,17 @@ def lookup_fns(
13121321 return (0, next_info.set_emoji_presentation());
13131322 }"""
13141323
1315- if not is_cjk :
1324+ if is_cjk :
1325+ s += """
1326+ if c == '\\ u{FE00}' {
1327+ return (0, next_info.set_vs1_2());
1328+ }
1329+ """
1330+ else :
13161331s += """
1332+ if c == '\\ u{FE01}' {
1333+ return (0, next_info.set_vs1_2());
1334+ }
13171335 if c == '\\ u{FE0E}' {
13181336 return (0, next_info.set_text_presentation());
13191337 }
@@ -1323,9 +1341,19 @@ def lookup_fns(
13231341 } else {
13241342 next_info = next_info.unset_text_presentation();
13251343 }
1326- }"""
1344+ } else """
13271345
1328- s += """
1346+ s += """if next_info.is_vs1_2() {
1347+ if matches!(c, '\\ u{2018}' | '\\ u{2019}' | '\\ u{201C}' | '\\ u{201D}') {
1348+ return ("""
1349+
1350+ s += str (2 - is_cjk )
1351+
1352+ s += """, WidthInfo::DEFAULT);
1353+ } else {
1354+ next_info = next_info.unset_vs1_2();
1355+ }
1356+ }
13291357 if next_info.is_ligature_transparent() {
13301358 if c == '\\ u{200D}' {
13311359 return (0, next_info.set_zwj_bit());
@@ -1586,6 +1614,8 @@ def emit_module(
15861614#[derive(Clone, Copy, Debug, PartialEq, Eq)]
15871615struct WidthInfo(u16);
15881616
1617+ const LIGATURE_TRANSPARENT_MASK: u16 = 0b0010_0000_0000_0000;
1618+
15891619impl WidthInfo {
15901620 /// No special handling necessary
15911621 const DEFAULT: Self = Self(0);
@@ -1615,51 +1645,84 @@ def emit_module(
16151645
16161646 /// Has top bit set
16171647 fn is_emoji_presentation(self) -> bool {{
1618- (self.0 &0b1000_0000_0000_0000 ) ==0b1000_0000_0000_0000
1648+ (self.0 &WidthInfo::VARIATION_SELECTOR_16.0 ) ==WidthInfo::VARIATION_SELECTOR_16.0
16191649 }}
16201650
1621- /// Has top bit set
16221651 fn is_zwj_emoji_presentation(self) -> bool {{
16231652 (self.0 & 0b1011_0000_0000_0000) == 0b1001_0000_0000_0000
16241653 }}
16251654
16261655 /// Set top bit
16271656 fn set_emoji_presentation(self) -> Self {{
1628- if (self.0 &0b0010_0000_0000_0000 ) ==0b0010_0000_0000_0000
1657+ if (self.0 &LIGATURE_TRANSPARENT_MASK ) ==LIGATURE_TRANSPARENT_MASK
16291658 || (self.0 & 0b1001_0000_0000_0000) == 0b0001_0000_0000_0000
16301659 {{
1631- Self(self.0 | 0b1000_0000_0000_0000)
1660+ Self(
1661+ self.0
1662+ | WidthInfo::VARIATION_SELECTOR_16.0
1663+ & !WidthInfo::VARIATION_SELECTOR_15.0
1664+ & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0,
1665+ )
16321666 }} else {{
16331667 Self::VARIATION_SELECTOR_16
16341668 }}
16351669 }}
16361670
16371671 /// Clear top bit
16381672 fn unset_emoji_presentation(self) -> Self {{
1639- if (self.0 &0b0010_0000_0000_0000 ) ==0b0010_0000_0000_0000 {{
1640- Self(self.0 &0b0111_1111_1111_1111 )
1673+ if (self.0 &LIGATURE_TRANSPARENT_MASK ) ==LIGATURE_TRANSPARENT_MASK {{
1674+ Self(self.0 &!WidthInfo::VARIATION_SELECTOR_16.0 )
16411675 }} else {{
16421676 Self::DEFAULT
16431677 }}
16441678 }}
16451679
16461680 /// Has 2nd bit set
16471681 fn is_text_presentation(self) -> bool {{
1648- (self.0 &0b0100_0000_0000_0000 ) ==0b0100_0000_0000_0000
1682+ (self.0 &WidthInfo::VARIATION_SELECTOR_15.0 ) ==WidthInfo::VARIATION_SELECTOR_15.0
16491683 }}
16501684
16511685 /// Set 2nd bit
16521686 fn set_text_presentation(self) -> Self {{
1653- if (self.0 & 0b0010_0000_0000_0000) == 0b0010_0000_0000_0000 {{
1654- Self(self.0 | 0b0100_0000_0000_0000)
1687+ if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
1688+ Self(
1689+ self.0
1690+ | WidthInfo::VARIATION_SELECTOR_15.0
1691+ & !WidthInfo::VARIATION_SELECTOR_16.0
1692+ & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0,
1693+ )
16551694 }} else {{
1656- Self(0b0100_0000_0000_0000 )
1695+ Self(WidthInfo::VARIATION_SELECTOR_15.0 )
16571696 }}
16581697 }}
16591698
16601699 /// Clear 2nd bit
16611700 fn unset_text_presentation(self) -> Self {{
1662- Self(self.0 & 0b1011_1111_1111_1111)
1701+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_15.0)
1702+ }}
1703+
1704+ /// Has 7th bit set
1705+ fn is_vs1_2(self) -> bool {{
1706+ (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1707+ }}
1708+
1709+ /// Set 7th bit
1710+ fn set_vs1_2(self) -> Self {{
1711+ if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
1712+ Self(
1713+ self.0
1714+ | WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1715+ & !WidthInfo::VARIATION_SELECTOR_15.0
1716+ & !WidthInfo::VARIATION_SELECTOR_16.0,
1717+ )
1718+ }} else {{
1719+ Self(WidthInfo::VARIATION_SELECTOR_1_OR_2.0)
1720+ }}
1721+ }}
1722+
1723+ /// Clear 7th bit
1724+ fn unset_vs1_2(self) -> Self {{
1725+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0)
16631726 }}
16641727}}
16651728