Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5da0090

Browse files
Give U+115F HANGUL CHOSEONG FILLER width 2
1 parenta6b5a52 commit5da0090

File tree

3 files changed

+15
-5
lines changed

3 files changed

+15
-5
lines changed

‎scripts/unicode.py‎

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,8 @@ def load_zero_widths() -> "list[bool]":
210210
forcpinrange(low,high+1):
211211
zw_map[cp]=False
212212

213-
# `Default_Ignorable_Code_Point`s also have 0 width
213+
# `Default_Ignorable_Code_Point`s also have 0 width:
214+
# https://www.unicode.org/faq/unsup_char.html#3
214215
withfetch_open("DerivedCoreProperties.txt")asproperties:
215216
single=re.compile(r"^([0-9A-F]+)\s+;\s+Default_Ignorable_Code_Point\s+")
216217
multiple=re.compile(
@@ -238,6 +239,8 @@ def load_zero_widths() -> "list[bool]":
238239
# into a single wide grapheme. So we treat vowel and trailing jamo as
239240
# 0-width, such that only the width of the leading jamo is counted
240241
# and the resulting grapheme has width 2.
242+
#
243+
# (See the Unicode Standard sections 3.12 and 18.6 for more on Hangul)
241244
withfetch_open("HangulSyllableType.txt")ascategories:
242245
single=re.compile(r"^([0-9A-F]+)\s+;\s+(V|T)\s+")
243246
multiple=re.compile(r"^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(V|T)\s+")
@@ -255,6 +258,12 @@ def load_zero_widths() -> "list[bool]":
255258
forcpinrange(low,high+1):
256259
zw_map[cp]=True
257260

261+
# Special case: U+115F HANGUL CHOSEONG FILLER.
262+
# U+115F is a `Default_Ignorable_Code_Point`, and therefore would normally have
263+
# zero width. However, the expected usage is to combine it with vowel or trailing jamo
264+
# (which are considered 0-width on their own) to form a composed Hangul syllable with
265+
# width 2. Therefore, we treat it as having width 2.
266+
zw_map[0x115F]=False
258267
returnzw_map
259268

260269

@@ -541,8 +550,8 @@ def main(module_filename: str):
541550
542551
We obey the following rules in decreasing order of importance:
543552
- The soft hyphen (`U+00AD`) is single-width. (https://archive.is/fCT3c)
544-
- HangulJamo medial vowels & final consonants are zero-width.
545-
- All `Default_Ignorable_Code_Point`s are zero-width.
553+
- Hanguljamo medial vowels & final consonants are zero-width.
554+
- All `Default_Ignorable_Code_Point`s are zero-width, except for U+115F HANGUL CHOSEONG FILLER.
546555
- All codepoints in general categories `Cc`, `Cf`, `Mn`, or `Me` are zero-width,
547556
except for `Prepended_Concatenation_Mark`s.
548557
- All codepoints with an East Asian Width of `Ambigous` are ambiguous-width.

‎src/tables.rs‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ pub mod charwidth {
340340
0x55,0x55,0x55,0x50,0x05,0x54,0x55,0x55,0x55,0x01,0x54,0x55,0x55,0x45,0x41,
341341
0x55,0x51,0x55,0x55,0x55,0x51,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0xAA,
342342
0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,
343-
0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0x2A,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343+
0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0xAA,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344344
0x00,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x01,0x55,0x55,0x55,0x55,0x55,0x55,
345345
0x55,0x55,0x55,0x55,0x55,0x55,0x05,0x54,0x55,0x55,0x55,0x55,0x55,0x55,0x05,
346346
0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x05,0x55,0x55,0x55,0x55,0x55,0x55,0x55,

‎src/tests.rs‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ fn test_default_ignorable() {
201201

202202
assert_eq!(UnicodeWidthChar::width('\u{E0000}'),Some(0));
203203

204-
assert_eq!(UnicodeWidthChar::width('\u{115F}'),Some(0));
205204
assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
206205
assert_eq!(UnicodeWidthChar::width('\u{3164}'),Some(0));
207206
assert_eq!(UnicodeWidthChar::width('\u{FFA0}'),Some(0));
@@ -215,6 +214,8 @@ fn test_jamo() {
215214

216215
assert_eq!(UnicodeWidthChar::width('\u{1100}'),Some(2));
217216
assert_eq!(UnicodeWidthChar::width('\u{A97C}'),Some(2));
217+
// Special case: U+115F HANGUL CHOSEONG FILLER
218+
assert_eq!(UnicodeWidthChar::width('\u{115F}'),Some(2));
218219
assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
219220
assert_eq!(UnicodeWidthChar::width('\u{D7C6}'),Some(0));
220221
assert_eq!(UnicodeWidthChar::width('\u{11A8}'),Some(0));

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp