Commit7cb6dca

rth

authored and

Manishearth

committed

MAINT Fixes for Python scripts (#54)

* Fixes to python scripts* Update src/testdata.rs

1 parentc7a6b6f commit7cb6dcaCopy full SHA for 7cb6dca

File tree

3 files changed

+23

-23

lines changed

scripts
- unicode.py
- unicode_gen_breaktests.py
src
- testdata.rs

3 files changed

+23

-23

lines changed

`‎scripts/unicode.py‎`

Lines changed: 9 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`# Since this should not require frequent updates, we just store this`
`21`	`21`	`# out-of-line and check the unicode.rs file into git.`
`22`	`22`
`23`		`-importfileinput,re,os,sys,operator`
	`23`	`+importfileinput,re,os,sys`
`24`	`24`
`25`	`25`	`preamble='''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT`
`26`	`26`	`// file at the top-level directory of this distribution and at`
`@@ -59,7 +59,7 @@ def is_surrogate(n):`
`59`	`59`
`60`	`60`	`deffetch(f):`
`61`	`61`	`ifnotos.path.exists(os.path.basename(f)):`
`62`		`-os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"`
	`62`	`+os.system("curl -O http://www.unicode.org/Public/9.0.0/ucd/%s"`
`63`	`63`	`%f)`
`64`	`64`
`65`	`65`	`ifnotos.path.exists(os.path.basename(f)):`
`@@ -80,7 +80,7 @@ def load_gencats(f):`
`80`	`80`	`ifis_surrogate(cp):`
`81`	`81`	`continue`
`82`	`82`	`ifrange_start>=0:`
`83`		`-foriinxrange(range_start,cp):`
	`83`	`+foriinrange(range_start,cp):`
`84`	`84`	`udict[i]=data;`
`85`	`85`	`range_start=-1;`
`86`	`86`	`ifdata[1].endswith(", First>"):`
`@@ -150,8 +150,8 @@ def format_table_content(f, content, indent):`
`150`	`150`	`defload_properties(f,interestingprops):`
`151`	`151`	`fetch(f)`
`152`	`152`	`props= {}`
`153`		`-re1=re.compile("^ ([0-9A-F]+) ; *(\w+)")`
`154`		`-re2=re.compile("^ ([0-9A-F]+)\.\.([0-9A-F]+) ; *(\w+)")`
	`153`	`+re1=re.compile(r"^ ([0-9A-F]+) ; *(\w+)")`
	`154`	`+re2=re.compile(r"^ ([0-9A-F]+)\.\.([0-9A-F]+) ; *(\w+)")`
`155`	`155`
`156`	`156`	`forlineinfileinput.input(os.path.basename(f)):`
`157`	`157`	`prop=None`
`@@ -309,7 +309,7 @@ def emit_break_module(f, break_table, break_cats, name):`
`309`	`309`	`# download and parse all the data`
`310`	`310`	`fetch("ReadMe.txt")`
`311`	`311`	`withopen("ReadMe.txt")asreadme:`
`312`		`-pattern="for Version (\d+)\.(\d+)\.(\d+) of the Unicode"`
	`312`	`+pattern=r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode"`
`313`	`313`	`unicode_version=re.search(pattern,readme.read()).groups()`
`314`	`314`	`rf.write("""`
`315`	`315`	`/// The version of [Unicode](http://www.unicode.org/)`
`@@ -342,19 +342,19 @@ def emit_break_module(f, break_table, break_cats, name):`
`342`	`342`	`forcatingrapheme_cats:`
`343`	`343`	`grapheme_table.extend([(x,y,cat)for (x,y)ingrapheme_cats[cat]])`
`344`	`344`	`grapheme_table.sort(key=lambdaw:w[0])`
`345`		`-emit_break_module(rf,grapheme_table,grapheme_cats.keys(),"grapheme")`
	`345`	`+emit_break_module(rf,grapheme_table,list(grapheme_cats.keys()),"grapheme")`
`346`	`346`	`rf.write("\n")`
`347`	`347`
`348`	`348`	`word_cats=load_properties("auxiliary/WordBreakProperty.txt", [])`
`349`	`349`	`word_table= []`
`350`	`350`	`forcatinword_cats:`
`351`	`351`	`word_table.extend([(x,y,cat)for (x,y)inword_cats[cat]])`
`352`	`352`	`word_table.sort(key=lambdaw:w[0])`
`353`		`-emit_break_module(rf,word_table,word_cats.keys(),"word")`
	`353`	`+emit_break_module(rf,word_table,list(word_cats.keys()),"word")`
`354`	`354`
`355`	`355`	`sentence_cats=load_properties("auxiliary/SentenceBreakProperty.txt", [])`
`356`	`356`	`sentence_table= []`
`357`	`357`	`forcatinsentence_cats:`
`358`	`358`	`sentence_table.extend([(x,y,cat)for (x,y)insentence_cats[cat]])`
`359`	`359`	`sentence_table.sort(key=lambdaw:w[0])`
`360`		`-emit_break_module(rf,sentence_table,sentence_cats.keys(),"sentence")`
	`360`	`+emit_break_module(rf,sentence_table,list(sentence_cats.keys()),"sentence")`

`‎scripts/unicode_gen_breaktests.py‎`

Lines changed: 11 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -17,23 +17,23 @@`
`17`	`17`	`#`
`18`	`18`	`# Since this should not require frequent updates, we just store this`
`19`	`19`	`# out-of-line and check the unicode.rs file into git.`
	`20`	`+from __future__importprint_function`
`20`	`21`
`21`	`22`	`importunicode,re,os,fileinput`
`22`	`23`
`23`	`24`	`defload_test_data(f,optsplit=[]):`
`24`		`-outls= []`
`25`		`-testRe1=re.compile("^÷\s+([^\s].[^\s])\s+÷\s+#\s+÷\s+\[0.2\].?([÷×].)\s+÷\s+\[0.3\]\s$")`
	`25`	`+testRe1=re.compile(r"^÷\s+([^\s].[^\s])\s+÷\s+#\s+÷\s+\[0.2\].?([÷×].)\s+÷\s+\[0.3\]\s$")`
`26`	`26`
`27`	`27`	`unicode.fetch(f)`
`28`	`28`	`data= []`
`29`	`29`	`forlineinfileinput.input(os.path.basename(f)):`
`30`	`30`	`# lines that include a test start with the ÷ character`
`31`		`-iflen(line)<2orline[0:2]!='÷':`
	`31`	`+iflen(line)<2ornotline.startswith('÷'):`
`32`	`32`	`continue`
`33`	`33`
`34`	`34`	`m=testRe1.match(line)`
`35`	`35`	`ifnotm:`
`36`		`-print"error: no match on line where test was expected: %s"%line`
	`36`	`+print("error: no match on line where test was expected: %s"%line)`
`37`	`37`	`continue`
`38`	`38`
`39`	`39`	`# process the characters in this test case`
`@@ -48,9 +48,9 @@ def load_test_data(f, optsplit=[]):`
`48`	`48`	`# make sure that we have break info for each break!`
`49`	`49`	`assertlen(chars)-1==len(info)`
`50`	`50`
`51`		`-outls.append((chars,info))`
	`51`	`+data.append((chars,info))`
`52`	`52`
`53`		`-returnoutls`
	`53`	`+returndata`
`54`	`54`
`55`	`55`	`defprocess_split_info(s,c,o):`
`56`	`56`	`outcs= []`
`@@ -59,7 +59,7 @@ def process_split_info(s, c, o):`
`59`	`59`
`60`	`60`	`# are we on a × or a ÷?`
`61`	`61`	`isX=False`
`62`		`-ifs[0:2]=='×':`
	`62`	`+ifs.startswith('×'):`
`63`	`63`	`isX=True`
`64`	`64`
`65`	`65`	`# find each instance of '(÷\|×) [x.y] '`
`@@ -81,10 +81,10 @@ def process_split_info(s, c, o):`
`81`	`81`
`82`	`82`	`idx=1`
`83`	`83`	`whileidx<len(s):`
`84`		`-ifs[idx:idx+2]=='×':`
	`84`	`+ifs[idx:].startswith('×'):`
`85`	`85`	`isX=True`
`86`	`86`	`break`
`87`		`-ifs[idx:idx+2]=='÷':`
	`87`	`+ifs[idx:].startswith('÷'):`
`88`	`88`	`isX=False`
`89`	`89`	`break`
`90`	`90`	`idx+=1`
`@@ -172,7 +172,7 @@ def create_grapheme_data(f):`
`172`	`172`	`stype="&'static [(&'static str, &'static [&'static str])]"`
`173`	`173`	`dtype="&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"`
`174`	`174`	`f.write(" // official Unicode test data\n")`
`175`		`-f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt\n")`
	`175`	`+f.write(" // http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.txt\n")`
`176`	`176`	`unicode.emit_table(f,"TEST_SAME",test_same,stype,True,showfun,True)`
`177`	`177`	`unicode.emit_table(f,"TEST_DIFF",test_diff,dtype,True,showfun,True)`
`178`	`178`
`@@ -187,7 +187,7 @@ def create_words_data(f):`
`187`	`187`
`188`	`188`	`wtype="&'static [(&'static str, &'static [&'static str])]"`
`189`	`189`	`f.write(" // official Unicode test data\n")`
`190`		`-f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n")`
	`190`	`+f.write(" // http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt\n")`
`191`	`191`	`unicode.emit_table(f,"TEST_WORD",test,wtype,True,showfun,True)`
`192`	`192`
`193`	`193`	`defcreate_sentence_data(f):`

`‎src/testdata.rs‎`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT`
	`1`	`+// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT`
`2`	`2`	`// file at the top-level directory of this distribution and at`
`3`	`3`	`// http://rust-lang.org/COPYRIGHT.`
`4`	`4`	`//`
`@@ -12,7 +12,7 @@`
`12`	`12`
`13`	`13`	`#![allow(missing_docs, non_upper_case_globals, non_snake_case)]`
`14`	`14`	`// official Unicode test data`
`15`		`-// http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt`
	`15`	`+// http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.txt`
`16`	`16`	`pubconstTEST_SAME:&'static[(&'staticstr,&'static[&'staticstr])] =&[`
`17`	`17`	`("\u{20}\u{20}",&["\u{20}","\u{20}"]),("\u{20}\u{308}\u{20}",&["\u{20}\u{308}",`
`18`	`18`	`"\u{20}"]),("\u{20}\u{d}",&["\u{20}","\u{d}"]),("\u{20}\u{308}\u{d}",&["\u{20}\u{308}",`
`@@ -516,7 +516,7 @@`
`516`	`516`	`];`
`517`	`517`
`518`	`518`	`// official Unicode test data`
`519`		`-// http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt`
	`519`	`+// http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt`
`520`	`520`	`pubconstTEST_WORD:&'static[(&'staticstr,&'static[&'staticstr])] =&[`
`521`	`521`	`("\u{1}\u{1}",&["\u{1}","\u{1}"]),("\u{1}\u{308}\u{1}",&["\u{1}\u{308}","\u{1}"]),`
`522`	`522`	`("\u{1}\u{d}",&["\u{1}","\u{d}"]),("\u{1}\u{308}\u{d}",&["\u{1}\u{308}","\u{d}"]),`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit7cb6dca

File tree

3 files changed

3 files changed

`‎scripts/unicode.py‎`

`‎scripts/unicode_gen_breaktests.py‎`

`‎src/testdata.rs‎`

0 commit comments