Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8a26b3e

Browse files
Fix#125
1 parent596e886 commit8a26b3e

File tree

6 files changed

+352
-188
lines changed

6 files changed

+352
-188
lines changed

‎scripts/unicode.py

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,11 @@ def format_table_content(f, content, indent):
155155
line=" "*indent+chunk
156156
f.write(line)
157157

158-
defload_properties(f,interestingprops):
158+
defload_properties(f,interestingprops:"list[str | tuple[str, str]] | None"=None):
159159
fetch(f)
160160
props= {}
161-
re1=re.compile(r"^*([0-9A-F]+) *;*(\w+)")
162-
re2=re.compile(r"^*([0-9A-F]+)\.\.([0-9A-F]+) *;*(\w+)")
161+
re1=re.compile(r"^\s*([0-9A-F]+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
162+
re2=re.compile(r"^\s*([0-9A-F]+)\.\.([0-9A-F]+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
163163

164164
forlineinfileinput.input(os.path.basename(f)):
165165
prop=None
@@ -168,17 +168,21 @@ def load_properties(f, interestingprops):
168168
m=re1.match(line)
169169
ifm:
170170
d_lo=m.group(1)
171-
d_hi=m.group(1)
171+
d_hi=d_lo
172172
prop=m.group(2)
173+
value=m.group(3)
173174
else:
174175
m=re2.match(line)
175176
ifm:
176177
d_lo=m.group(1)
177178
d_hi=m.group(2)
178179
prop=m.group(3)
180+
value=m.group(4)
179181
else:
180182
continue
181-
ifinterestingpropsandpropnotininterestingprops:
183+
ifvalueisnotNone:
184+
prop= (prop,value)
185+
ifinterestingpropsisnotNoneandpropnotininterestingprops:
182186
continue
183187
d_lo=int(d_lo,16)
184188
d_hi=int(d_hi,16)
@@ -195,7 +199,7 @@ def load_properties(f, interestingprops):
195199
defescape_char(c):
196200
return"'\\u{%x}'"%c
197201

198-
defemit_table(f,name,t_data,t_type="&'static[(char, char)]",is_pub=True,
202+
defemit_table(f,name,t_data,t_type="&[(char, char)]",is_pub=True,
199203
pfun=lambdax:"(%s,%s)"% (escape_char(x[0]),escape_char(x[1])),is_const=True):
200204
pub_string="const"
201205
ifnotis_const:
@@ -217,7 +221,7 @@ def emit_util_mod(f):
217221
f.write("""
218222
pub mod util {
219223
#[inline]
220-
pub fn bsearch_range_table(c: char, r: &'static[(char,char)]) -> bool {
224+
pub fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
221225
use core::cmp::Ordering::{Equal, Less, Greater};
222226
r.binary_search_by(|&(lo,hi)| {
223227
if lo <= c && c <= hi { Equal }
@@ -252,13 +256,22 @@ def emit_util_mod(f):
252256
253257
""")
254258

255-
defemit_property_module(f,mod,tbl,emit):
256-
f.write("mod %s {\n"%mod)
257-
forcatinsorted(emit):
258-
emit_table(f,"%s_table"%cat,tbl[cat],is_pub=False)
259+
defemit_property_module(f,mod,tbl,emit:"list[str | tuple[str, str]]"):
260+
f.write("pub mod %s {\n"%mod)
261+
262+
cats= []
263+
forcatinemit:
264+
iftype(cat)istuple:
265+
cats.append((f"{cat[0]}_{cat[1]}",cat))
266+
else:
267+
cats.append((cat,cat))
268+
cats.sort(key=lambdax:x[0])
269+
270+
forcat_str,catincats:
271+
emit_table(f,"%s_table"%cat_str,tbl[cat],is_pub=False)
259272
f.write(" #[inline]\n")
260-
f.write(" pub fn %s(c: char) -> bool {\n"%cat)
261-
f.write(" super::util::bsearch_range_table(c, %s_table)\n"%cat)
273+
f.write(" pub fn %s(c: char) -> bool {\n"%cat_str)
274+
f.write(" super::util::bsearch_range_table(c, %s_table)\n"%cat_str)
262275
f.write(" }\n\n")
263276
f.write("}\n\n")
264277

@@ -303,7 +316,7 @@ def emit_break_module(f, break_table, break_cats, name):
303316
f.write((" %sC_"%Name[0])+cat+",\n")
304317
f.write(""" }
305318
306-
fn bsearch_range_value_table(c: char, r: &'static[(char, char, %sCat)], default_lower: u32, default_upper: u32) -> (u32, u32, %sCat) {
319+
fn bsearch_range_value_table(c: char, r: &[(char, char, %sCat)], default_lower: u32, default_upper: u32) -> (u32, u32, %sCat) {
307320
use core::cmp::Ordering::{Equal, Less, Greater};
308321
match r.binary_search_by(|&(lo, hi, _)| {
309322
if lo <= c && c <= hi { Equal }
@@ -355,11 +368,11 @@ def emit_break_module(f, break_table, break_cats, name):
355368
else:
356369
lookup_type="u32"
357370

358-
emit_table(f,"%s_cat_lookup"%name,lookup_table,"&'static[%s]"%lookup_type,
371+
emit_table(f,"%s_cat_lookup"%name,lookup_table,"&[%s]"%lookup_type,
359372
pfun=lambdax:"%d"%x,
360373
is_pub=False,is_const=True)
361374

362-
emit_table(f,"%s_cat_table"%name,break_table,"&'static[(char, char, %sCat)]"%Name,
375+
emit_table(f,"%s_cat_table"%name,break_table,"&[(char, char, %sCat)]"%Name,
363376
pfun=lambdax:"(%s,%s,%sC_%s)"% (escape_char(x[0]),escape_char(x[1]),Name[0],x[2]),
364377
is_pub=False,is_const=True)
365378
f.write("}\n")
@@ -379,17 +392,26 @@ def emit_break_module(f, break_table, break_cats, name):
379392

380393
# download and parse all the data
381394
gencats=load_gencats("UnicodeData.txt")
382-
derived=load_properties("DerivedCoreProperties.txt", ["Alphabetic"])
395+
derived=load_properties("DerivedCoreProperties.txt", ["Alphabetic", ("InCB","Consonant"), ("InCB","Extend"), ("InCB","Linker")])
383396

384397
emit_util_mod(rf)
385398
for (name,cat,pfuns)in ("general_category",gencats, ["N"]), \
386-
("derived_property",derived, ["Alphabetic"]):
399+
("derived_property",derived, ["Alphabetic", ("InCB","Extend")]):
387400
emit_property_module(rf,name,cat,pfuns)
388401

402+
rf.write("""pub fn is_incb_linker(c: char) -> bool {
403+
matches!(c,""")
404+
405+
for (lo,hi)inderived[("InCB","Linker")]:
406+
rf.write(f" | '\\u{{{lo:X}}}'")
407+
iflo!=hi:
408+
rf.write(f"..'\\u{{{lo:X}}}'")
409+
410+
rf.write(")\n}\n\n")
411+
389412
### grapheme cluster module
390413
# from http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Break_Property_Values
391-
grapheme_cats=load_properties("auxiliary/GraphemeBreakProperty.txt", [])
392-
414+
grapheme_cats=load_properties("auxiliary/GraphemeBreakProperty.txt")
393415
# Control
394416
# Note:
395417
# This category also includes Cs (surrogate codepoints), but Rust's `char`s are
@@ -398,22 +420,22 @@ def emit_break_module(f, break_table, break_cats, name):
398420
grapheme_cats["Control"]=group_cat(list(
399421
set(ungroup_cat(grapheme_cats["Control"]))
400422
-set(ungroup_cat([surrogate_codepoints]))))
401-
423+
grapheme_cats["InCB_Consonant"]=derived[("InCB","Consonant")]
424+
emoji_props=load_properties("emoji-data.txt", ["Extended_Pictographic"])
425+
grapheme_cats["Extended_Pictographic"]=emoji_props["Extended_Pictographic"]
402426
grapheme_table= []
403427
forcatingrapheme_cats:
404428
grapheme_table.extend([(x,y,cat)for (x,y)ingrapheme_cats[cat]])
405-
emoji_props=load_properties("emoji-data.txt", ["Extended_Pictographic"])
406-
grapheme_table.extend([(x,y,"Extended_Pictographic")for (x,y)inemoji_props["Extended_Pictographic"]])
407429
grapheme_table.sort(key=lambdaw:w[0])
408430
last=-1
409431
forcharsingrapheme_table:
410432
ifchars[0]<=last:
411433
raise"Grapheme tables and Extended_Pictographic values overlap; need to store these separately!"
412434
last=chars[1]
413-
emit_break_module(rf,grapheme_table,list(grapheme_cats.keys())+ ["Extended_Pictographic"],"grapheme")
435+
emit_break_module(rf,grapheme_table,list(grapheme_cats.keys()),"grapheme")
414436
rf.write("\n")
415437

416-
word_cats=load_properties("auxiliary/WordBreakProperty.txt", [])
438+
word_cats=load_properties("auxiliary/WordBreakProperty.txt")
417439
word_table= []
418440
forcatinword_cats:
419441
word_table.extend([(x,y,cat)for (x,y)inword_cats[cat]])
@@ -425,7 +447,7 @@ def emit_break_module(f, break_table, break_cats, name):
425447
emoji_table= [(x,y,"Extended_Pictographic")for (x,y)inemoji_props["Extended_Pictographic"]]
426448
emit_break_module(rf,emoji_table, ["Extended_Pictographic"],"emoji")
427449

428-
sentence_cats=load_properties("auxiliary/SentenceBreakProperty.txt", [])
450+
sentence_cats=load_properties("auxiliary/SentenceBreakProperty.txt")
429451
sentence_table= []
430452
forcatinsentence_cats:
431453
sentence_table.extend([(x,y,cat)for (x,y)insentence_cats[cat]])

‎scripts/unicode_gen_breaktests.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ def showfun(x):
140140
returnoutstr
141141

142142
defcreate_grapheme_data(f):
143-
# rules 9.1and 9.2 are for extended graphemes only
144-
optsplits= ['9.1','9.2']
143+
# rules 9.1, 9.2,and 9.3 are for extended graphemes only
144+
optsplits= ['9.1','9.2','9.3']
145145
d=load_test_data("auxiliary/GraphemeBreakTest.txt",optsplits)
146146

147147
test_same= []

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp