2020# Since this should not require frequent updates, we just store this
2121# out-of-line and check the unicode.rs file into git.
2222
23- import fileinput ,re ,os ,sys , operator
23+ import fileinput ,re ,os ,sys
2424
2525preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
2626// file at the top-level directory of this distribution and at
@@ -59,7 +59,7 @@ def is_surrogate(n):
5959
6060def fetch (f ):
6161if not os .path .exists (os .path .basename (f )):
62- os .system ("curl -O http://www.unicode.org/Public/UNIDATA /%s"
62+ os .system ("curl -O http://www.unicode.org/Public/9.0.0/ucd /%s"
6363% f )
6464
6565if not os .path .exists (os .path .basename (f )):
@@ -80,7 +80,7 @@ def load_gencats(f):
8080if is_surrogate (cp ):
8181continue
8282if range_start >= 0 :
83- for i in xrange (range_start ,cp ):
83+ for i in range (range_start ,cp ):
8484udict [i ]= data ;
8585range_start = - 1 ;
8686if data [1 ].endswith (", First>" ):
@@ -150,8 +150,8 @@ def format_table_content(f, content, indent):
150150def load_properties (f ,interestingprops ):
151151fetch (f )
152152props = {}
153- re1 = re .compile ("^ *([0-9A-F]+) *; *(\w+)" )
154- re2 = re .compile ("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
153+ re1 = re .compile (r "^ *([0-9A-F]+) *; *(\w+)" )
154+ re2 = re .compile (r "^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
155155
156156for line in fileinput .input (os .path .basename (f )):
157157prop = None
@@ -309,7 +309,7 @@ def emit_break_module(f, break_table, break_cats, name):
309309# download and parse all the data
310310fetch ("ReadMe.txt" )
311311with open ("ReadMe.txt" )as readme :
312- pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
312+ pattern = r "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
313313unicode_version = re .search (pattern ,readme .read ()).groups ()
314314rf .write ("""
315315/// The version of [Unicode](http://www.unicode.org/)
@@ -342,19 +342,19 @@ def emit_break_module(f, break_table, break_cats, name):
342342for cat in grapheme_cats :
343343grapheme_table .extend ([(x ,y ,cat )for (x ,y )in grapheme_cats [cat ]])
344344grapheme_table .sort (key = lambda w :w [0 ])
345- emit_break_module (rf ,grapheme_table ,grapheme_cats .keys (),"grapheme" )
345+ emit_break_module (rf ,grapheme_table ,list ( grapheme_cats .keys () ),"grapheme" )
346346rf .write ("\n " )
347347
348348word_cats = load_properties ("auxiliary/WordBreakProperty.txt" , [])
349349word_table = []
350350for cat in word_cats :
351351word_table .extend ([(x ,y ,cat )for (x ,y )in word_cats [cat ]])
352352word_table .sort (key = lambda w :w [0 ])
353- emit_break_module (rf ,word_table ,word_cats .keys (),"word" )
353+ emit_break_module (rf ,word_table ,list ( word_cats .keys () ),"word" )
354354
355355sentence_cats = load_properties ("auxiliary/SentenceBreakProperty.txt" , [])
356356sentence_table = []
357357for cat in sentence_cats :
358358sentence_table .extend ([(x ,y ,cat )for (x ,y )in sentence_cats [cat ]])
359359sentence_table .sort (key = lambda w :w [0 ])
360- emit_break_module (rf ,sentence_table ,sentence_cats .keys (),"sentence" )
360+ emit_break_module (rf ,sentence_table ,list ( sentence_cats .keys () ),"sentence" )