Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5f4e5b5

Browse files
gh-53203: Fix strptime() for %c, %x and %X formats on many locales (GH-125406)
Fixed most locales that use non-ASCII digits, like Persian, Burmese,Odia and Shan.
1 parent06ca330 commit5f4e5b5

File tree

4 files changed

+66
-40
lines changed

4 files changed

+66
-40
lines changed

‎Lib/_strptime.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
importlocale
1616
importcalendar
1717
fromreimportcompileasre_compile
18+
fromreimportsubasre_sub
1819
fromreimportIGNORECASE
1920
fromreimportescapeasre_escape
2021
fromdatetimeimport (dateasdatetime_date,
@@ -129,11 +130,23 @@ def __calc_date_time(self):
129130
time_tuple=time.struct_time((1999,3,17,22,44,55,2,76,0))
130131
time_tuple2=time.struct_time((1999,1,3,1,1,1,6,3,0))
131132
replacement_pairs= [
132-
('1999','%Y'), ('99','%y'), ('22','%H'),
133-
('44','%M'), ('55','%S'), ('76','%j'),
134-
('17','%d'), ('03','%m'), ('3','%m'),
135-
# '3' needed for when no leading zero.
136-
('2','%w'), ('10','%I')]
133+
('1999','%Y'), ('99','%y'), ('22','%H'),
134+
('44','%M'), ('55','%S'), ('76','%j'),
135+
('17','%d'), ('03','%m'), ('3','%m'),
136+
# '3' needed for when no leading zero.
137+
('2','%w'), ('10','%I'),
138+
# Non-ASCII digits
139+
('\u0661\u0669\u0669\u0669','%Y'),
140+
('\u0669\u0669','%Oy'),
141+
('\u0662\u0662','%OH'),
142+
('\u0664\u0664','%OM'),
143+
('\u0665\u0665','%OS'),
144+
('\u0661\u0667','%Od'),
145+
('\u0660\u0663','%Om'),
146+
('\u0663','%Om'),
147+
('\u0662','%Ow'),
148+
('\u0661\u0660','%OI'),
149+
]
137150
date_time= []
138151
fordirectivein ('%c','%x','%X'):
139152
current_format=time.strftime(directive,time_tuple).lower()
@@ -158,6 +171,10 @@ def __calc_date_time(self):
158171
fortzintz_values:
159172
iftz:
160173
current_format=current_format.replace(tz,"%Z")
174+
# Transform all non-ASCII digits to digits in range U+0660 to U+0669.
175+
current_format=re_sub(r'\d(?<![0-9])',
176+
lambdam:chr(0x0660+int(m[0])),
177+
current_format)
161178
forold,newinreplacement_pairs:
162179
current_format=current_format.replace(old,new)
163180
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -267,7 +284,7 @@ def __init__(self, locale_time=None):
267284
else:
268285
self.locale_time=LocaleTime()
269286
base=super()
270-
base.__init__({
287+
mapping={
271288
# The " [1-9]" part of the regex is to make %c from ANSI C work
272289
'd':r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
273290
'f':r"(?P<f>[0-9]{1,6})",
@@ -296,11 +313,15 @@ def __init__(self, locale_time=None):
296313
'Z':self.__seqToRE((tzfortz_namesinself.locale_time.timezone
297314
fortzintz_names),
298315
'Z'),
299-
'%':'%'})
300-
base.__setitem__('W',base.__getitem__('U').replace('U','W'))
301-
base.__setitem__('c',self.pattern(self.locale_time.LC_date_time))
302-
base.__setitem__('x',self.pattern(self.locale_time.LC_date))
316+
'%':'%'}
317+
fordin'dmyHIMS':
318+
mapping['O'+d]=r'(?P<%s>\d\d|\d| \d)'%d
319+
mapping['Ow']=r'(?P<w>\d)'
320+
mapping['W']=mapping['U'].replace('U','W')
321+
base.__init__(mapping)
303322
base.__setitem__('X',self.pattern(self.locale_time.LC_time))
323+
base.__setitem__('x',self.pattern(self.locale_time.LC_date))
324+
base.__setitem__('c',self.pattern(self.locale_time.LC_date_time))
304325

305326
def__seqToRE(self,to_convert,directive):
306327
"""Convert a list to a regex string for matching a directive.
@@ -328,28 +349,25 @@ def pattern(self, format):
328349
regex syntax are escaped.
329350
330351
"""
331-
processed_format=''
332352
# The sub() call escapes all characters that might be misconstrued
333353
# as regex syntax. Cannot use re.escape since we have to deal with
334354
# format directives (%m, etc.).
335-
regex_chars=re_compile(r"([\\.^$*+?\(\){}\[\]|])")
336-
format=regex_chars.sub(r"\\\1",format)
337-
whitespace_replacement=re_compile(r'\s+')
338-
format=whitespace_replacement.sub(r'\\s+',format)
355+
format=re_sub(r"([\\.^$*+?\(\){}\[\]|])",r"\\\1",format)
356+
format=re_sub(r'\s+',r'\\s+',format)
357+
format=re_sub(r"'","['\u02bc]",format)# needed for br_FR
339358
year_in_format=False
340359
day_of_month_in_format=False
341-
while'%'informat:
342-
directive_index=format.index('%')+1
343-
format_char=format[directive_index]
344-
processed_format="%s%s%s"% (processed_format,
345-
format[:directive_index-1],
346-
self[format_char])
347-
format=format[directive_index+1:]
360+
defrepl(m):
361+
format_char=m[1]
348362
matchformat_char:
349363
case'Y'|'y'|'G':
364+
nonlocalyear_in_format
350365
year_in_format=True
351366
case'd':
367+
nonlocalday_of_month_in_format
352368
day_of_month_in_format=True
369+
returnself[format_char]
370+
format=re_sub(r'%(O?.)',repl,format)
353371
ifday_of_month_in_formatandnotyear_in_format:
354372
importwarnings
355373
warnings.warn("""\
@@ -360,7 +378,7 @@ def pattern(self, format):
360378
See https://github.com/python/cpython/issues/70647.""",
361379
DeprecationWarning,
362380
skip_file_prefixes=(os.path.dirname(__file__),))
363-
return"%s%s"% (processed_format,format)
381+
returnformat
364382

365383
defcompile(self,format):
366384
"""Return a compiled re object for the format string."""
@@ -434,8 +452,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
434452
_regex_cache[format]=format_regex
435453
found=format_regex.match(data_string)
436454
ifnotfound:
437-
raiseValueError("time data %r does not match format %r :: /%s/"%
438-
(data_string,format,format_regex.pattern))
455+
raiseValueError("time data %r does not match format %r"%
456+
(data_string,format))
439457
iflen(data_string)!=found.end():
440458
raiseValueError("unconverted data remains: %s"%
441459
data_string[found.end():])

‎Lib/test/test_strptime.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def test_strptime_exception_context(self):
292292
# additional check for IndexError branch (issue #19545)
293293
withself.assertRaises(ValueError)ase:
294294
_strptime._strptime_time('19','%Y %')
295-
self.assertIs(e.exception.__suppress_context__,True)
295+
self.assertIsNone(e.exception.__context__)
296296

297297
deftest_unconverteddata(self):
298298
# Check ValueError is raised when there is unconverted data
@@ -485,12 +485,14 @@ def test_bad_timezone(self):
485485
# id_ID, ms_MY.
486486
# * Year is not included: ha_NG.
487487
# * Use non-Gregorian calendar: lo_LA, thai, th_TH.
488+
# On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
488489
#
489490
# BUG: Generates regexp that does not match the current date and time
490-
# foraz_IR, fa_IR,lzh_TW, my_MM, or_IN, shn_MM.
491+
# for lzh_TW.
491492
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
492493
'he_IL','eu_ES','ar_AE','mfe_MU','yo_NG',
493-
'csb_PL','br_FR','gez_ET','brx_IN')
494+
'csb_PL','br_FR','gez_ET','brx_IN',
495+
'my_MM','or_IN','shn_MM','az_IR')
494496
deftest_date_time_locale(self):
495497
# Test %c directive
496498
loc=locale.getlocale(locale.LC_TIME)[0]
@@ -512,20 +514,23 @@ def test_date_time_locale(self):
512514
self.roundtrip('%c',slice(0,6),time.localtime(now-366*24*3600))
513515

514516
# NB: Dates before 1969 do not roundtrip on some locales:
515-
# bo_CN, bo_IN, dz_BT, eu_ES, eu_FR.
517+
#az_IR,bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN.
516518
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
517519
'he_IL','ar_AE','mfe_MU','yo_NG',
518-
'csb_PL','br_FR','gez_ET','brx_IN')
520+
'csb_PL','br_FR','gez_ET','brx_IN',
521+
'my_MM','shn_MM')
519522
deftest_date_time_locale2(self):
520523
# Test %c directive
521524
self.roundtrip('%c',slice(0,6), (1900,1,1,0,0,0,0,1,0))
525+
self.roundtrip('%c',slice(0,6), (1800,1,1,0,0,0,0,1,0))
522526

523527
# NB: Does not roundtrip because use non-Gregorian calendar:
524-
# lo_LA, thai, th_TH.
528+
# lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
525529
# BUG: Generates regexp that does not match the current date
526-
# foraz_IR, fa_IR,lzh_TW, my_MM, or_IN, shn_MM.
530+
# for lzh_TW.
527531
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
528-
'he_IL','eu_ES','ar_AE')
532+
'he_IL','eu_ES','ar_AE',
533+
'az_IR','my_MM','or_IN','shn_MM')
529534
deftest_date_locale(self):
530535
# Test %x directive
531536
now=time.time()
@@ -545,22 +550,23 @@ def test_date_locale(self):
545550
"musl libc issue on Emscripten, bpo-46390"
546551
)
547552
@run_with_locales('LC_TIME','en_US','fr_FR','de_DE','ja_JP',
548-
'eu_ES','ar_AE')
553+
'eu_ES','ar_AE','my_MM','shn_MM')
549554
deftest_date_locale2(self):
550555
# Test %x directive
551556
self.roundtrip('%x',slice(0,3), (1900,1,1,0,0,0,0,1,0))
557+
self.roundtrip('%x',slice(0,3), (1800,1,1,0,0,0,0,1,0))
552558

553559
# NB: Does not roundtrip in some locales due to the ambiguity of
554560
# the time representation (bugs in locales?):
555561
# * Seconds are not included: bokmal, ff_SN, nb_NO, nn_NO, no_NO,
556562
# norwegian, nynorsk.
557563
# * Hours are in 12-hour notation without AM/PM indication: hy_AM,
558564
# ms_MY, sm_WS.
559-
# BUG: Generates regexp that does not match the current time for
560-
# aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET,
561-
# lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET,
562-
# so_SO, ti_ER, ti_ET, tig_ER, wal_ET.
563-
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP')
565+
# BUG: Generates regexp that does not match the current time for lzh_TW.
566+
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
567+
'aa_ET','am_ET','az_IR','byn_ER','fa_IR','gez_ET',
568+
'my_MM','om_ET','or_IN','shn_MM','sid_ET','so_SO',
569+
'ti_ET','tig_ER','wal_ET')
564570
deftest_time_locale(self):
565571
# Test %X directive
566572
now=time.time()

‎Lib/test/test_time.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def test_strptime_exception_context(self):
298298
# additional check for IndexError branch (issue #19545)
299299
withself.assertRaises(ValueError)ase:
300300
time.strptime('19','%Y %')
301-
self.assertIs(e.exception.__suppress_context__,True)
301+
self.assertIsNone(e.exception.__context__)
302302

303303
deftest_strptime_leap_year(self):
304304
# GH-70647: warns if parsing a format with a day and no year.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix:func:`time.strptime` for ``%c``, ``%x`` and ``%X`` formats in many
2+
locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp