Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

gh-53203: Fix strptime() for %c, %x and %X formats on many locales#125406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
NextNext commit
gh-53203: Fix strptime() for %c, %x and %X formats on many locales
Fixed most locales that use non-ASCII digits, like Persian, Burmese,Odia and Shan.
  • Loading branch information
@serhiy-storchaka
serhiy-storchaka committedOct 13, 2024
commit8e79673e65fd8edbf8888a15f45badca8a6ab4fd
64 changes: 41 additions & 23 deletionsLib/_strptime.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -15,6 +15,7 @@
import locale
import calendar
from re import compile as re_compile
from re import sub as re_sub
from re import IGNORECASE
from re import escape as re_escape
from datetime import (date as datetime_date,
Expand DownExpand Up@@ -129,11 +130,23 @@ def __calc_date_time(self):
time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0))
replacement_pairs = [
('1999', '%Y'), ('99', '%y'), ('22', '%H'),
('44', '%M'), ('55', '%S'), ('76', '%j'),
('17', '%d'), ('03', '%m'), ('3', '%m'),
# '3' needed for when no leading zero.
('2', '%w'), ('10', '%I')]
('1999', '%Y'), ('99', '%y'), ('22', '%H'),
('44', '%M'), ('55', '%S'), ('76', '%j'),
('17', '%d'), ('03', '%m'), ('3', '%m'),
# '3' needed for when no leading zero.
('2', '%w'), ('10', '%I'),
# Non-ASCII digits
('\u0661\u0669\u0669\u0669', '%Y'),
('\u0669\u0669', '%Oy'),
('\u0662\u0662', '%OH'),
('\u0664\u0664', '%OM'),
('\u0665\u0665', '%OS'),
('\u0661\u0667', '%Od'),
('\u0660\u0663', '%Om'),
('\u0663', '%Om'),
('\u0662', '%Ow'),
('\u0661\u0660', '%OI'),
]
date_time = []
for directive in ('%c', '%x', '%X'):
current_format = time.strftime(directive, time_tuple).lower()
Expand All@@ -158,6 +171,10 @@ def __calc_date_time(self):
for tz in tz_values:
if tz:
current_format = current_format.replace(tz, "%Z")
# Transform all non-ASCII digits to digits in range U+0660 to U+0669.
current_format = re_sub(r'\d(?<![0-9])',
lambda m: chr(0x0660 + int(m[0])),
current_format)
for old, new in replacement_pairs:
current_format = current_format.replace(old, new)
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
Expand DownExpand Up@@ -267,7 +284,7 @@ def __init__(self, locale_time=None):
else:
self.locale_time = LocaleTime()
base = super()
base.__init__({
mapping ={
# The " [1-9]" part of the regex is to make %c from ANSI C work
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
'f': r"(?P<f>[0-9]{1,6})",
Expand DownExpand Up@@ -296,11 +313,15 @@ def __init__(self, locale_time=None):
'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
for tz in tz_names),
'Z'),
'%': '%'})
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
'%': '%'}
for d in 'dmyHIMS':
mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d
mapping['Ow'] = r'(?P<w>\d)'
mapping['W'] = mapping['U'].replace('U', 'W')
base.__init__(mapping)
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))

def __seqToRE(self, to_convert, directive):
"""Convert a list to a regex string for matching a directive.
Expand DownExpand Up@@ -328,28 +349,25 @@ def pattern(self, format):
regex syntax are escaped.

"""
processed_format = ''
# The sub() call escapes all characters that might be misconstrued
# as regex syntax. Cannot use re.escape since we have to deal with
# format directives (%m, etc.).
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
format = regex_chars.sub(r"\\\1", format)
whitespace_replacement = re_compile(r'\s+')
format = whitespace_replacement.sub(r'\\s+', format)
format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format)
format = re_sub(r'\s+', r'\\s+', format)
format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR
year_in_format = False
day_of_month_in_format = False
while '%' in format:
directive_index = format.index('%')+1
format_char = format[directive_index]
processed_format = "%s%s%s" % (processed_format,
format[:directive_index-1],
self[format_char])
format = format[directive_index+1:]
def repl(m):
format_char = m[1]
match format_char:
case 'Y' | 'y' | 'G':
nonlocal year_in_format
year_in_format = True
case 'd':
nonlocal day_of_month_in_format
day_of_month_in_format = True
return self[format_char]
format = re_sub(r'%(O?.)', repl, format)
if day_of_month_in_format and not year_in_format:
import warnings
warnings.warn("""\
Expand All@@ -360,7 +378,7 @@ def pattern(self, format):
See https://github.com/python/cpython/issues/70647.""",
DeprecationWarning,
skip_file_prefixes=(os.path.dirname(__file__),))
return"%s%s" % (processed_format,format)
return format

def compile(self, format):
"""Return a compiled re object for the format string."""
Expand Down
31 changes: 18 additions & 13 deletionsLib/test/test_strptime.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -292,7 +292,7 @@ def test_strptime_exception_context(self):
# additional check for IndexError branch (issue #19545)
with self.assertRaises(ValueError) as e:
_strptime._strptime_time('19', '%Y %')
self.assertIs(e.exception.__suppress_context__, True)
self.assertIsNone(e.exception.__context__)

def test_unconverteddata(self):
# Check ValueError is raised when there is unconverted data
Expand DownExpand Up@@ -487,10 +487,11 @@ def test_bad_timezone(self):
# * Use non-Gregorian calendar: lo_LA, thai, th_TH.
#
# BUG: Generates regexp that does not match the current date and time
# foraz_IR, fa_IR,lzh_TW, my_MM, or_IN, shn_MM.
# for lzh_TW.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG',
'csb_PL', 'br_FR', 'gez_ET', 'brx_IN')
'csb_PL', 'br_FR', 'gez_ET', 'brx_IN',
'fa_IR', 'my_MM', 'or_IN', 'shn_MM', 'az_IR')
def test_date_time_locale(self):
# Test %c directive
loc = locale.getlocale(locale.LC_TIME)[0]
Expand All@@ -512,20 +513,23 @@ def test_date_time_locale(self):
self.roundtrip('%c', slice(0, 6), time.localtime(now - 366*24*3600))

# NB: Dates before 1969 do not roundtrip on some locales:
# bo_CN, bo_IN, dz_BT, eu_ES, eu_FR.
#az_IR,bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG',
'csb_PL', 'br_FR', 'gez_ET', 'brx_IN')
'csb_PL', 'br_FR', 'gez_ET', 'brx_IN',
'my_MM', 'shn_MM')
def test_date_time_locale2(self):
# Test %c directive
self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0))
self.roundtrip('%c', slice(0, 6), (1800, 1, 1, 0, 0, 0, 0, 1, 0))

# NB: Does not roundtrip because use non-Gregorian calendar:
# lo_LA, thai, th_TH.
# BUG: Generates regexp that does not match the current date
# foraz_IR, fa_IR,lzh_TW, my_MM, or_IN, shn_MM.
# for lzh_TW.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'he_IL', 'eu_ES', 'ar_AE')
'he_IL', 'eu_ES', 'ar_AE',
'az_IR', 'fa_IR', 'my_MM', 'or_IN', 'shn_MM')
def test_date_locale(self):
# Test %x directive
now = time.time()
Expand All@@ -545,22 +549,23 @@ def test_date_locale(self):
"musl libc issue on Emscripten, bpo-46390"
)
@run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'eu_ES', 'ar_AE')
'eu_ES', 'ar_AE', 'my_MM', 'shn_MM')
def test_date_locale2(self):
# Test %x directive
self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0))
self.roundtrip('%x', slice(0, 3), (1800, 1, 1, 0, 0, 0, 0, 1, 0))

# NB: Does not roundtrip in some locales due to the ambiguity of
# the time representation (bugs in locales?):
# * Seconds are not included: bokmal, ff_SN, nb_NO, nn_NO, no_NO,
# norwegian, nynorsk.
# * Hours are in 12-hour notation without AM/PM indication: hy_AM,
# ms_MY, sm_WS.
# BUG: Generates regexp that does not match the current time for
# aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET,
# lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET,
# so_SO, ti_ER, ti_ET, tig_ER, wal_ET.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP')
# BUG: Generates regexp that does not match the current time for lzh_TW.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET',
'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO',
'ti_ET', 'tig_ER', 'wal_ET')
def test_time_locale(self):
# Test %X directive
now = time.time()
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
Fix :func:`time.strptime` for ``%c``, ``%x`` and ``%X`` formats in many
locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan.
Loading

[8]ページ先頭

©2009-2025 Movatter.jp