Commit949e39c

committed

use is_alphanumeric from std

use is_alphanumeric from std and update unicode.py to remove obsoletepartscloses#148

1 parent9b1b7f9 commit949e39cCopy full SHA for 949e39c

File tree

4 files changed

+3159

-405

lines changed

4 files changed

+3159

-405

lines changed

`‎scripts/unicode.py`

Lines changed: 2 additions & 75 deletions

Original file line number	Diff line number	Diff line change
`@@ -74,42 +74,6 @@ def fetch(f):`
`74`	`74`	`sys.stderr.write("cannot load %s"%f)`
`75`	`75`	`exit(1)`
`76`	`76`
`77`		`-defload_gencats(f):`
`78`		`-fetch(f)`
`79`		`-gencats= {}`
`80`		`-`
`81`		`-udict= {};`
`82`		`-range_start=-1;`
`83`		`-forlineinfileinput.input(f):`
`84`		`-data=line.split(';');`
`85`		`-iflen(data)!=15:`
`86`		`-continue`
`87`		`-cp=int(data[0],16);`
`88`		`-ifis_surrogate(cp):`
`89`		`-continue`
`90`		`-ifrange_start>=0:`
`91`		`-foriinrange(range_start,cp):`
`92`		`-udict[i]=data;`
`93`		`-range_start=-1;`
`94`		`-ifdata[1].endswith(", First>"):`
`95`		`-range_start=cp;`
`96`		`-continue;`
`97`		`-udict[cp]=data;`
`98`		`-`
`99`		`-forcodeinudict:`
`100`		`- [code_org,name,gencat,combine,bidi,`
`101`		`-decomp,deci,digit,num,mirror,`
`102`		`-old,iso,upcase,lowcase,titlecase ]=udict[code];`
`103`		`-`
`104`		`-# place letter in categories as appropriate`
`105`		`-forcatin [gencat,"Assigned"]+expanded_categories.get(gencat, []):`
`106`		`-ifcatnotingencats:`
`107`		`-gencats[cat]= []`
`108`		`-gencats[cat].append(code)`
`109`		`-`
`110`		`-gencats=group_cats(gencats)`
`111`		`-returngencats`
`112`		`-`
`113`	`77`	`defgroup_cats(cats):`
`114`	`78`	`cats_out= {}`
`115`	`79`	`forcatincats:`
`@@ -230,36 +194,6 @@ def emit_util_mod(f):`
`230`	`194`	`}).is_ok()`
`231`	`195`	`}`
`232`	`196`
`233`		`- #[inline]`
`234`		`- fn is_alphabetic(c: char) -> bool {`
`235`		`- if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {`
`236`		`- c.is_alphabetic()`
`237`		`- } else {`
`238`		`- match c {`
`239`		`- 'a' ..= 'z' \| 'A' ..= 'Z' => true,`
`240`		`- c if c > '\\x7f' => super::derived_property::Alphabetic(c),`
`241`		`- _ => false,`
`242`		`- }`
`243`		`- }`
`244`		`- }`
`245`		`-`
`246`		`- #[inline]`
`247`		`- fn is_numeric(c: char) -> bool {`
`248`		`- if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {`
`249`		`- c.is_numeric()`
`250`		`- } else {`
`251`		`- match c {`
`252`		`- '0' ..= '9' => true,`
`253`		`- c if c > '\\x7f' => super::general_category::N(c),`
`254`		`- _ => false,`
`255`		`- }`
`256`		`- }`
`257`		`- }`
`258`		`-`
`259`		`- #[inline]`
`260`		`- pub fn is_alphanumeric(c: char) -> bool {`
`261`		`- is_alphabetic(c) \|\| is_numeric(c)`
`262`		`- }`
`263`	`197`	`}`
`264`	`198`
`265`	`199`	`""")`
`@@ -396,20 +330,13 @@ def emit_break_module(f, break_table, break_cats, name):`
`396`	`330`	`/// The version of [Unicode](http://www.unicode.org/)`
`397`	`331`	`/// that this version of unicode-segmentation is based on.`
`398`	`332`	`pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);`
`399`		`-"""%UNICODE_VERSION)`
`400`		`-`
`401`		`-rf.write("""`
`402`		`-const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);`
`403`	`333`	`"""%UNICODE_VERSION)`
`404`	`334`
`405`	`335`	`# download and parse all the data`
`406`		`-gencats=load_gencats("UnicodeData.txt")`
`407`		`-derived=load_properties("DerivedCoreProperties.txt", ["Alphabetic", ("InCB","Consonant"), ("InCB","Extend"), ("InCB","Linker")])`
	`336`	`+derived=load_properties("DerivedCoreProperties.txt", [("InCB","Consonant"), ("InCB","Extend"), ("InCB","Linker")])`
`408`	`337`
`409`	`338`	`emit_util_mod(rf)`
`410`		`-for (name,cat,pfuns)in ("general_category",gencats, ["N"]), \`
`411`		`- ("derived_property",derived, ["Alphabetic", ("InCB","Extend")]):`
`412`		`-emit_property_module(rf,name,cat,pfuns)`
	`339`	`+emit_property_module(rf,"derived_property",derived, [("InCB","Extend")])`
`413`	`340`
`414`	`341`	`rf.write("""pub fn is_incb_linker(c: char) -> bool {`
`415`	`342`	`matches!(c,""")`

`‎src/sentence.rs`

Lines changed: 1 addition & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -345,10 +345,9 @@ pub fn new_sentence_bound_indices(source: &str) -> USentenceBoundIndices<'_> {`
`345`	`345`	`#[inline]`
`346`	`346`	`pubfnnew_unicode_sentences(s:&str) ->UnicodeSentences<'_>{`
`347`	`347`	`usesuper::UnicodeSegmentation;`
`348`		`-usecrate::tables::util::is_alphanumeric;`
`349`	`348`
`350`	`349`	`fnhas_alphanumeric(s:&&str) ->bool{`
`351`		`- s.chars().any(is_alphanumeric)`
	`350`	`+ s.chars().any(\|c\| c.is_alphanumeric())`
`352`	`351`	`}`
`353`	`352`	`let has_alphanumeric:fn(&&str) ->bool = has_alphanumeric;// coerce to fn pointer`
`354`	`353`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit949e39c

File tree

4 files changed

4 files changed

`‎scripts/unicode.py`

`‎src/sentence.rs`

0 commit comments