NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit3d59da9

committed

unaccent: Make generate_unaccent_rules.py Python 3 compatible

Python 2 is still supported.Author: Hugh Ranalli <hugh@whtc.ca>Discussion:https://www.postgresql.org/message-id/CAAhbUMNyZ+PhNr_mQ=G161K0-hvbq13Tz2is9M3WK+yX9cQOCw@mail.gmail.com

1 parentd33faa2 commit3d59da9Copy full SHA for 3d59da9

File tree

1 file changed

+24

-6

lines changed

contrib/unaccent
- generate_unaccent_rules.py

1 file changed

+24

-6

lines changed

`‎contrib/unaccent/generate_unaccent_rules.py`

Lines changed: 24 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#!/usr/bin/python2`
	`1`	`+#!/usr/bin/python`
`2`	`2`	`# -- coding: utf-8 --`
`3`	`3`	`#`
`4`	`4`	`# This script builds unaccent.rules on standard output when given the`
`@@ -23,6 +23,24 @@`
`23`	`23`	`# [1] http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt`
`24`	`24`	`# [2] http://unicode.org/cldr/trac/export/12304/tags/release-28/common/transforms/Latin-ASCII.xml`
`25`	`25`
	`26`	`+# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped`
	`27`	`+# The approach is to be Python3 compatible with Python2 "backports".`
	`28`	`+from __future__importprint_function`
	`29`	`+from __future__importunicode_literals`
	`30`	`+importcodecs`
	`31`	`+importsys`
	`32`	`+`
	`33`	`+ifsys.version_info[0]<=2:`
	`34`	`+# Encode stdout as UTF-8, so we can just print to it`
	`35`	`+sys.stdout=codecs.getwriter('utf8')(sys.stdout)`
	`36`	`+`
	`37`	`+# Map Python 2's chr to unichr`
	`38`	`+chr=unichr`
	`39`	`+`
	`40`	`+# Python 2 and 3 compatible bytes call`
	`41`	`+defbytes(source,encoding='ascii',errors='strict'):`
	`42`	`+returnsource.encode(encoding=encoding,errors=errors)`
	`43`	`+# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped`
`26`	`44`
`27`	`45`	`importre`
`28`	`46`	`importargparse`
`@@ -39,7 +57,7 @@`
`39`	`57`	`(0x0391,0x03a9))# GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA`
`40`	`58`
`41`	`59`	`defprint_record(codepoint,letter):`
`42`		`-print (unichr(codepoint)+"\t"+letter).encode("UTF-8")`
	`60`	`+print (chr(codepoint)+"\t"+letter)`
`43`	`61`
`44`	`62`	`classCodepoint:`
`45`	`63`	`def__init__(self,id,general_category,combining_ids):`
`@@ -116,7 +134,7 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):`
`116`	`134`	`charactersSet=set()`
`117`	`135`
`118`	`136`	`# RegEx to parse rules`
`119`		`-rulePattern=re.compile(ur'^(?:(.)\|(\\u[0-9a-fA-F]{4})) \u2192 (?:\'(.+)\'\|(.+)) ;')`
	`137`	`+rulePattern=re.compile(r'^(?:(.)\|(\\u[0-9a-fA-F]{4})) \u2192 (?:\'(.+)\'\|(.+)) ;')`
`120`	`138`
`121`	`139`	`# construct tree from XML`
`122`	`140`	`transliterationTree=ET.parse(latinAsciiFilePath)`
`@@ -134,7 +152,7 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):`
`134`	`152`	`# Group 3: plain "trg" char. Empty if group 4 is not.`
`135`	`153`	`# Group 4: plain "trg" char between quotes. Empty if group 3 is not.`
`136`	`154`	`ifmatchesisnotNone:`
`137`		`-src=matches.group(1)ifmatches.group(1)isnotNoneelsematches.group(2).decode('unicode-escape')`
	`155`	`+src=matches.group(1)ifmatches.group(1)isnotNoneelsebytes(matches.group(2),'UTF-8').decode('unicode-escape')`
`138`	`156`	`trg=matches.group(3)ifmatches.group(3)isnotNoneelsematches.group(4)`
`139`	`157`
`140`	`158`	`# "'" and """ are escaped`
`@@ -195,10 +213,10 @@ def main(args):`
`195`	`213`	`len(codepoint.combining_ids)>1:`
`196`	`214`	`ifis_letter_with_marks(codepoint,table):`
`197`	`215`	`charactersSet.add((codepoint.id,`
`198`		`-unichr(get_plain_letter(codepoint,table).id)))`
	`216`	`+chr(get_plain_letter(codepoint,table).id)))`
`199`	`217`	`elifargs.noLigaturesExpansionisFalseandis_ligature(codepoint,table):`
`200`	`218`	`charactersSet.add((codepoint.id,`
`201`		`-"".join(unichr(combining_codepoint.id)`
	`219`	`+"".join(chr(combining_codepoint.id)`
`202`	`220`	`forcombining_codepoint \`
`203`	`221`	`inget_plain_letters(codepoint,table))))`
`204`	`222`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit3d59da9

File tree

1 file changed

1 file changed

`‎contrib/unaccent/generate_unaccent_rules.py`

0 commit comments