|
26 | 26 | # [1] https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/UnicodeData.txt
|
27 | 27 | # [2] https://raw.githubusercontent.com/unicode-org/cldr/${TAG}/common/transforms/Latin-ASCII.xml
|
28 | 28 |
|
29 |
| -# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
30 |
| -# The approach is to be Python3 compatible with Python2 "backports". |
31 |
| -from __future__importprint_function |
32 |
| -from __future__importunicode_literals |
33 |
| -# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
34 |
| - |
35 | 29 | importargparse
|
36 | 30 | importcodecs
|
37 | 31 | importre
|
38 | 32 | importsys
|
39 | 33 | importxml.etree.ElementTreeasET
|
40 | 34 |
|
41 |
| -# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
42 |
| -ifsys.version_info[0]<=2: |
43 |
| -# Encode stdout as UTF-8, so we can just print to it |
44 |
| -sys.stdout=codecs.getwriter('utf8')(sys.stdout) |
45 |
| - |
46 |
| -# Map Python 2's chr to unichr |
47 |
| -chr=unichr |
48 |
| - |
49 |
| -# Python 2 and 3 compatible bytes call |
50 |
| -defbytes(source,encoding='ascii',errors='strict'): |
51 |
| -returnsource.encode(encoding=encoding,errors=errors) |
52 |
| -else: |
53 |
| -# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
54 |
| -sys.stdout=codecs.getwriter('utf8')(sys.stdout.buffer) |
| 35 | +sys.stdout=codecs.getwriter('utf8')(sys.stdout.buffer) |
55 | 36 |
|
56 | 37 | # The ranges of Unicode characters that we consider to be "plain letters".
|
57 | 38 | # For now we are being conservative by including only Latin and Greek. This
|
@@ -213,12 +194,12 @@ def special_cases():
|
213 | 194 | charactersSet=set()
|
214 | 195 |
|
215 | 196 | # Cyrillic
|
216 |
| -charactersSet.add((0x0401,u"\u0415"))# CYRILLIC CAPITAL LETTER IO |
217 |
| -charactersSet.add((0x0451,u"\u0435"))# CYRILLIC SMALL LETTER IO |
| 197 | +charactersSet.add((0x0401,"\u0415"))# CYRILLIC CAPITAL LETTER IO |
| 198 | +charactersSet.add((0x0451,"\u0435"))# CYRILLIC SMALL LETTER IO |
218 | 199 |
|
219 | 200 | # Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F)
|
220 |
| -charactersSet.add((0x2103,u"\xb0C"))# DEGREE CELSIUS |
221 |
| -charactersSet.add((0x2109,u"\xb0F"))# DEGREE FAHRENHEIT |
| 201 | +charactersSet.add((0x2103,"\xb0C"))# DEGREE CELSIUS |
| 202 | +charactersSet.add((0x2109,"\xb0F"))# DEGREE FAHRENHEIT |
222 | 203 | charactersSet.add((0x2117,"(P)"))# SOUND RECORDING COPYRIGHT
|
223 | 204 |
|
224 | 205 | returncharactersSet
|
|