Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit949e39c

Browse files
committed
use is_alphanumeric from std
use is_alphanumeric from std and update unicode.py to remove obsoletepartscloses#148
1 parent9b1b7f9 commit949e39c

File tree

4 files changed

+3159
-405
lines changed

4 files changed

+3159
-405
lines changed

‎scripts/unicode.py

Lines changed: 2 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -74,42 +74,6 @@ def fetch(f):
7474
sys.stderr.write("cannot load %s"%f)
7575
exit(1)
7676

77-
defload_gencats(f):
78-
fetch(f)
79-
gencats= {}
80-
81-
udict= {};
82-
range_start=-1;
83-
forlineinfileinput.input(f):
84-
data=line.split(';');
85-
iflen(data)!=15:
86-
continue
87-
cp=int(data[0],16);
88-
ifis_surrogate(cp):
89-
continue
90-
ifrange_start>=0:
91-
foriinrange(range_start,cp):
92-
udict[i]=data;
93-
range_start=-1;
94-
ifdata[1].endswith(", First>"):
95-
range_start=cp;
96-
continue;
97-
udict[cp]=data;
98-
99-
forcodeinudict:
100-
[code_org,name,gencat,combine,bidi,
101-
decomp,deci,digit,num,mirror,
102-
old,iso,upcase,lowcase,titlecase ]=udict[code];
103-
104-
# place letter in categories as appropriate
105-
forcatin [gencat,"Assigned"]+expanded_categories.get(gencat, []):
106-
ifcatnotingencats:
107-
gencats[cat]= []
108-
gencats[cat].append(code)
109-
110-
gencats=group_cats(gencats)
111-
returngencats
112-
11377
defgroup_cats(cats):
11478
cats_out= {}
11579
forcatincats:
@@ -230,36 +194,6 @@ def emit_util_mod(f):
230194
}).is_ok()
231195
}
232196
233-
#[inline]
234-
fn is_alphabetic(c: char) -> bool {
235-
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
236-
c.is_alphabetic()
237-
} else {
238-
match c {
239-
'a' ..= 'z' | 'A' ..= 'Z' => true,
240-
c if c > '\\x7f' => super::derived_property::Alphabetic(c),
241-
_ => false,
242-
}
243-
}
244-
}
245-
246-
#[inline]
247-
fn is_numeric(c: char) -> bool {
248-
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
249-
c.is_numeric()
250-
} else {
251-
match c {
252-
'0' ..= '9' => true,
253-
c if c > '\\x7f' => super::general_category::N(c),
254-
_ => false,
255-
}
256-
}
257-
}
258-
259-
#[inline]
260-
pub fn is_alphanumeric(c: char) -> bool {
261-
is_alphabetic(c) || is_numeric(c)
262-
}
263197
}
264198
265199
""")
@@ -396,20 +330,13 @@ def emit_break_module(f, break_table, break_cats, name):
396330
/// The version of [Unicode](http://www.unicode.org/)
397331
/// that this version of unicode-segmentation is based on.
398332
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
399-
"""%UNICODE_VERSION)
400-
401-
rf.write("""
402-
const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);
403333
"""%UNICODE_VERSION)
404334

405335
# download and parse all the data
406-
gencats=load_gencats("UnicodeData.txt")
407-
derived=load_properties("DerivedCoreProperties.txt", ["Alphabetic", ("InCB","Consonant"), ("InCB","Extend"), ("InCB","Linker")])
336+
derived=load_properties("DerivedCoreProperties.txt", [("InCB","Consonant"), ("InCB","Extend"), ("InCB","Linker")])
408337

409338
emit_util_mod(rf)
410-
for (name,cat,pfuns)in ("general_category",gencats, ["N"]), \
411-
("derived_property",derived, ["Alphabetic", ("InCB","Extend")]):
412-
emit_property_module(rf,name,cat,pfuns)
339+
emit_property_module(rf,"derived_property",derived, [("InCB","Extend")])
413340

414341
rf.write("""pub fn is_incb_linker(c: char) -> bool {
415342
matches!(c,""")

‎src/sentence.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,9 @@ pub fn new_sentence_bound_indices(source: &str) -> USentenceBoundIndices<'_> {
345345
#[inline]
346346
pubfnnew_unicode_sentences(s:&str) ->UnicodeSentences<'_>{
347347
usesuper::UnicodeSegmentation;
348-
usecrate::tables::util::is_alphanumeric;
349348

350349
fnhas_alphanumeric(s:&&str) ->bool{
351-
s.chars().any(is_alphanumeric)
350+
s.chars().any(|c| c.is_alphanumeric())
352351
}
353352
let has_alphanumeric:fn(&&str) ->bool = has_alphanumeric;// coerce to fn pointer
354353

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp