Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3d59da9

Browse files
committed
unaccent: Make generate_unaccent_rules.py Python 3 compatible
Python 2 is still supported.Author: Hugh Ranalli <hugh@whtc.ca>Discussion:https://www.postgresql.org/message-id/CAAhbUMNyZ+PhNr_mQ=G161K0-hvbq13Tz2is9M3WK+yX9cQOCw@mail.gmail.com
1 parentd33faa2 commit3d59da9

File tree

1 file changed

+24
-6
lines changed

1 file changed

+24
-6
lines changed

‎contrib/unaccent/generate_unaccent_rules.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/python2
1+
#!/usr/bin/python
22
# -*- coding: utf-8 -*-
33
#
44
# This script builds unaccent.rules on standard output when given the
@@ -23,6 +23,24 @@
2323
# [1] http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt
2424
# [2] http://unicode.org/cldr/trac/export/12304/tags/release-28/common/transforms/Latin-ASCII.xml
2525

26+
# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped
27+
# The approach is to be Python3 compatible with Python2 "backports".
28+
from __future__importprint_function
29+
from __future__importunicode_literals
30+
importcodecs
31+
importsys
32+
33+
ifsys.version_info[0]<=2:
34+
# Encode stdout as UTF-8, so we can just print to it
35+
sys.stdout=codecs.getwriter('utf8')(sys.stdout)
36+
37+
# Map Python 2's chr to unichr
38+
chr=unichr
39+
40+
# Python 2 and 3 compatible bytes call
41+
defbytes(source,encoding='ascii',errors='strict'):
42+
returnsource.encode(encoding=encoding,errors=errors)
43+
# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped
2644

2745
importre
2846
importargparse
@@ -39,7 +57,7 @@
3957
(0x0391,0x03a9))# GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
4058

4159
defprint_record(codepoint,letter):
42-
print (unichr(codepoint)+"\t"+letter).encode("UTF-8")
60+
print (chr(codepoint)+"\t"+letter)
4361

4462
classCodepoint:
4563
def__init__(self,id,general_category,combining_ids):
@@ -116,7 +134,7 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
116134
charactersSet=set()
117135

118136
# RegEx to parse rules
119-
rulePattern=re.compile(ur'^(?:(.)|(\\u[0-9a-fA-F]{4})) \u2192 (?:\'(.+)\'|(.+)) ;')
137+
rulePattern=re.compile(r'^(?:(.)|(\\u[0-9a-fA-F]{4})) \u2192 (?:\'(.+)\'|(.+)) ;')
120138

121139
# construct tree from XML
122140
transliterationTree=ET.parse(latinAsciiFilePath)
@@ -134,7 +152,7 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
134152
# Group 3: plain "trg" char. Empty if group 4 is not.
135153
# Group 4: plain "trg" char between quotes. Empty if group 3 is not.
136154
ifmatchesisnotNone:
137-
src=matches.group(1)ifmatches.group(1)isnotNoneelsematches.group(2).decode('unicode-escape')
155+
src=matches.group(1)ifmatches.group(1)isnotNoneelsebytes(matches.group(2),'UTF-8').decode('unicode-escape')
138156
trg=matches.group(3)ifmatches.group(3)isnotNoneelsematches.group(4)
139157

140158
# "'" and """ are escaped
@@ -195,10 +213,10 @@ def main(args):
195213
len(codepoint.combining_ids)>1:
196214
ifis_letter_with_marks(codepoint,table):
197215
charactersSet.add((codepoint.id,
198-
unichr(get_plain_letter(codepoint,table).id)))
216+
chr(get_plain_letter(codepoint,table).id)))
199217
elifargs.noLigaturesExpansionisFalseandis_ligature(codepoint,table):
200218
charactersSet.add((codepoint.id,
201-
"".join(unichr(combining_codepoint.id)
219+
"".join(chr(combining_codepoint.id)
202220
forcombining_codepoint \
203221
inget_plain_letters(codepoint,table))))
204222

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp