Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9d0c1e0

Browse files
committed
Move update script over to Unicode 11; make it handle emoji data
1 parent666eeed commit9d0c1e0

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

‎scripts/unicode.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# these are the surrogate codepoints, which are not valid rust characters
5555
surrogate_codepoints= (0xd800,0xdfff)
5656

57-
UNICODE_VERSION= (10,0,0)
57+
UNICODE_VERSION= (11,0,0)
5858

5959
UNICODE_VERSION_NUMBER="%s.%s.%s"%UNICODE_VERSION
6060

@@ -63,8 +63,12 @@ def is_surrogate(n):
6363

6464
deffetch(f):
6565
ifnotos.path.exists(os.path.basename(f)):
66-
os.system("curl -O http://www.unicode.org/Public/%s/ucd/%s"
67-
% (UNICODE_VERSION_NUMBER,f))
66+
if"emoji"inf:
67+
os.system("curl -O https://www.unicode.org/Public/emoji/%s.%s/%s"
68+
% (UNICODE_VERSION[0],UNICODE_VERSION[1],f))
69+
else:
70+
os.system("curl -O http://www.unicode.org/Public/%s/ucd/%s"
71+
% (UNICODE_VERSION_NUMBER,f))
6872

6973
ifnotos.path.exists(os.path.basename(f)):
7074
sys.stderr.write("cannot load %s"%f)
@@ -266,7 +270,7 @@ def emit_break_module(f, break_table, break_cats, name):
266270
pub use self::%sCat::*;
267271
268272
#[allow(non_camel_case_types)]
269-
#[derive(Clone, Copy, PartialEq, Eq)]
273+
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
270274
pub enum %sCat {
271275
"""% (name,Name,Name))
272276

@@ -340,8 +344,15 @@ def emit_break_module(f, break_table, break_cats, name):
340344
grapheme_table= []
341345
forcatingrapheme_cats:
342346
grapheme_table.extend([(x,y,cat)for (x,y)ingrapheme_cats[cat]])
347+
emoji_props=load_properties("emoji-data.txt", ["Extended_Pictographic"])
348+
grapheme_table.extend([(x,y,"Extended_Pictographic")for (x,y)inemoji_props["Extended_Pictographic"]])
343349
grapheme_table.sort(key=lambdaw:w[0])
344-
emit_break_module(rf,grapheme_table,list(grapheme_cats.keys()),"grapheme")
350+
last=-1
351+
forcharsingrapheme_table:
352+
ifchars[0]<=last:
353+
raise"Grapheme tables and Extended_Pictographic values overlap; need to store these separately!"
354+
last=chars[1]
355+
emit_break_module(rf,grapheme_table,list(grapheme_cats.keys())+ ["Extended_Pictographic"],"grapheme")
345356
rf.write("\n")
346357

347358
word_cats=load_properties("auxiliary/WordBreakProperty.txt", [])
@@ -351,6 +362,11 @@ def emit_break_module(f, break_table, break_cats, name):
351362
word_table.sort(key=lambdaw:w[0])
352363
emit_break_module(rf,word_table,list(word_cats.keys()),"word")
353364

365+
# There are some emoji which are also ALetter, so this needs to be stored separately
366+
# For efficiency, we could still merge the two tables and produce an ALetterEP state
367+
emoji_table= [(x,y,"Extended_Pictographic")for (x,y)inemoji_props["Extended_Pictographic"]]
368+
emit_break_module(rf,emoji_table, ["Extended_Pictographic"],"emoji")
369+
354370
sentence_cats=load_properties("auxiliary/SentenceBreakProperty.txt", [])
355371
sentence_table= []
356372
forcatinsentence_cats:

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp