Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9290868

Browse files
[3.12]gh-53203: Fix strptime() for %c, %x and %X formats on many locales (GH-125406) (GH-125454) (GH-125483)
Fixed most locales that use non-ASCII digits, like Persian, Burmese,Odia and Shan.(cherry picked from commit5f4e5b5)(cherry picked from commitcbcdf34)Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
1 parentf1a6f68 commit9290868

File tree

4 files changed

+74
-42
lines changed

4 files changed

+74
-42
lines changed

‎Lib/_strptime.py

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
importlocale
1515
importcalendar
1616
fromreimportcompileasre_compile
17+
fromreimportsubasre_sub
1718
fromreimportIGNORECASE
1819
fromreimportescapeasre_escape
1920
fromdatetimeimport (dateasdatetime_date,
@@ -128,11 +129,23 @@ def __calc_date_time(self):
128129
time_tuple=time.struct_time((1999,3,17,22,44,55,2,76,0))
129130
time_tuple2=time.struct_time((1999,1,3,1,1,1,6,3,0))
130131
replacement_pairs= [
131-
('1999','%Y'), ('99','%y'), ('22','%H'),
132-
('44','%M'), ('55','%S'), ('76','%j'),
133-
('17','%d'), ('03','%m'), ('3','%m'),
134-
# '3' needed for when no leading zero.
135-
('2','%w'), ('10','%I')]
132+
('1999','%Y'), ('99','%y'), ('22','%H'),
133+
('44','%M'), ('55','%S'), ('76','%j'),
134+
('17','%d'), ('03','%m'), ('3','%m'),
135+
# '3' needed for when no leading zero.
136+
('2','%w'), ('10','%I'),
137+
# Non-ASCII digits
138+
('\u0661\u0669\u0669\u0669','%Y'),
139+
('\u0669\u0669','%Oy'),
140+
('\u0662\u0662','%OH'),
141+
('\u0664\u0664','%OM'),
142+
('\u0665\u0665','%OS'),
143+
('\u0661\u0667','%Od'),
144+
('\u0660\u0663','%Om'),
145+
('\u0663','%Om'),
146+
('\u0662','%Ow'),
147+
('\u0661\u0660','%OI'),
148+
]
136149
date_time= []
137150
fordirectivein ('%c','%x','%X'):
138151
current_format=time.strftime(directive,time_tuple).lower()
@@ -157,6 +170,10 @@ def __calc_date_time(self):
157170
fortzintz_values:
158171
iftz:
159172
current_format=current_format.replace(tz,"%Z")
173+
# Transform all non-ASCII digits to digits in range U+0660 to U+0669.
174+
current_format=re_sub(r'\d(?<![0-9])',
175+
lambdam:chr(0x0660+int(m[0])),
176+
current_format)
160177
forold,newinreplacement_pairs:
161178
current_format=current_format.replace(old,new)
162179
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -266,7 +283,7 @@ def __init__(self, locale_time=None):
266283
else:
267284
self.locale_time=LocaleTime()
268285
base=super()
269-
base.__init__({
286+
mapping={
270287
# The " [1-9]" part of the regex is to make %c from ANSI C work
271288
'd':r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
272289
'f':r"(?P<f>[0-9]{1,6})",
@@ -295,11 +312,15 @@ def __init__(self, locale_time=None):
295312
'Z':self.__seqToRE((tzfortz_namesinself.locale_time.timezone
296313
fortzintz_names),
297314
'Z'),
298-
'%':'%'})
299-
base.__setitem__('W',base.__getitem__('U').replace('U','W'))
300-
base.__setitem__('c',self.pattern(self.locale_time.LC_date_time))
301-
base.__setitem__('x',self.pattern(self.locale_time.LC_date))
315+
'%':'%'}
316+
fordin'dmyHIMS':
317+
mapping['O'+d]=r'(?P<%s>\d\d|\d| \d)'%d
318+
mapping['Ow']=r'(?P<w>\d)'
319+
mapping['W']=mapping['U'].replace('U','W')
320+
base.__init__(mapping)
302321
base.__setitem__('X',self.pattern(self.locale_time.LC_time))
322+
base.__setitem__('x',self.pattern(self.locale_time.LC_date))
323+
base.__setitem__('c',self.pattern(self.locale_time.LC_date_time))
303324

304325
def__seqToRE(self,to_convert,directive):
305326
"""Convert a list to a regex string for matching a directive.
@@ -327,21 +348,16 @@ def pattern(self, format):
327348
regex syntax are escaped.
328349
329350
"""
330-
processed_format=''
331351
# The sub() call escapes all characters that might be misconstrued
332352
# as regex syntax. Cannot use re.escape since we have to deal with
333353
# format directives (%m, etc.).
334-
regex_chars=re_compile(r"([\\.^$*+?\(\){}\[\]|])")
335-
format=regex_chars.sub(r"\\\1",format)
336-
whitespace_replacement=re_compile(r'\s+')
337-
format=whitespace_replacement.sub(r'\\s+',format)
338-
while'%'informat:
339-
directive_index=format.index('%')+1
340-
processed_format="%s%s%s"% (processed_format,
341-
format[:directive_index-1],
342-
self[format[directive_index]])
343-
format=format[directive_index+1:]
344-
return"%s%s"% (processed_format,format)
354+
format=re_sub(r"([\\.^$*+?\(\){}\[\]|])",r"\\\1",format)
355+
format=re_sub(r'\s+',r'\\s+',format)
356+
format=re_sub(r"'","['\u02bc]",format)# needed for br_FR
357+
defrepl(m):
358+
returnself[m[1]]
359+
format=re_sub(r'%(O?.)',repl,format)
360+
returnformat
345361

346362
defcompile(self,format):
347363
"""Return a compiled re object for the format string."""
@@ -415,8 +431,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
415431
_regex_cache[format]=format_regex
416432
found=format_regex.match(data_string)
417433
ifnotfound:
418-
raiseValueError("time data %r does not match format %r :: /%s/"%
419-
(data_string,format,format_regex.pattern))
434+
raiseValueError("time data %r does not match format %r"%
435+
(data_string,format))
420436
iflen(data_string)!=found.end():
421437
raiseValueError("unconverted data remains: %s"%
422438
data_string[found.end():])

‎Lib/test/test_strptime.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ def test_strptime_exception_context(self):
290290
# additional check for IndexError branch (issue #19545)
291291
withself.assertRaises(ValueError)ase:
292292
_strptime._strptime_time('19','%Y %')
293-
self.assertIs(e.exception.__suppress_context__,True)
293+
self.assertIsNone(e.exception.__context__)
294294

295295
deftest_unconverteddata(self):
296296
# Check ValueError is raised when there is unconverted data
@@ -483,12 +483,14 @@ def test_bad_timezone(self):
483483
# id_ID, ms_MY.
484484
# * Year is not included: ha_NG.
485485
# * Use non-Gregorian calendar: lo_LA, thai, th_TH.
486+
# On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
486487
#
487488
# BUG: Generates regexp that does not match the current date and time
488-
# foraz_IR, fa_IR,lzh_TW, my_MM, or_IN, shn_MM.
489+
# for lzh_TW.
489490
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
490491
'he_IL','eu_ES','ar_AE','mfe_MU','yo_NG',
491-
'csb_PL','br_FR','gez_ET','brx_IN')
492+
'csb_PL','br_FR','gez_ET','brx_IN',
493+
'my_MM','or_IN','shn_MM','az_IR')
492494
deftest_date_time_locale(self):
493495
# Test %c directive
494496
loc=locale.getlocale(locale.LC_TIME)[0]
@@ -510,20 +512,23 @@ def test_date_time_locale(self):
510512
self.roundtrip('%c',slice(0,6),time.localtime(now-366*24*3600))
511513

512514
# NB: Dates before 1969 do not roundtrip on some locales:
513-
# bo_CN, bo_IN, dz_BT, eu_ES, eu_FR.
515+
#az_IR,bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN.
514516
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
515517
'he_IL','ar_AE','mfe_MU','yo_NG',
516-
'csb_PL','br_FR','gez_ET','brx_IN')
518+
'csb_PL','br_FR','gez_ET','brx_IN',
519+
'my_MM','shn_MM')
517520
deftest_date_time_locale2(self):
518521
# Test %c directive
519522
self.roundtrip('%c',slice(0,6), (1900,1,1,0,0,0,0,1,0))
523+
self.roundtrip('%c',slice(0,6), (1800,1,1,0,0,0,0,1,0))
520524

521525
# NB: Does not roundtrip because use non-Gregorian calendar:
522-
# lo_LA, thai, th_TH.
526+
# lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
523527
# BUG: Generates regexp that does not match the current date
524-
# foraz_IR, fa_IR,lzh_TW, my_MM, or_IN, shn_MM.
528+
# for lzh_TW.
525529
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
526-
'he_IL','eu_ES','ar_AE')
530+
'he_IL','eu_ES','ar_AE',
531+
'az_IR','my_MM','or_IN','shn_MM')
527532
deftest_date_locale(self):
528533
# Test %x directive
529534
now=time.time()
@@ -543,30 +548,39 @@ def test_date_locale(self):
543548
"musl libc issue on Emscripten, bpo-46390"
544549
)
545550
@run_with_locales('LC_TIME','en_US','fr_FR','de_DE','ja_JP',
546-
'eu_ES','ar_AE')
551+
'eu_ES','ar_AE','my_MM','shn_MM')
547552
deftest_date_locale2(self):
548553
# Test %x directive
549554
self.roundtrip('%x',slice(0,3), (1900,1,1,0,0,0,0,1,0))
555+
self.roundtrip('%x',slice(0,3), (1800,1,1,0,0,0,0,1,0))
550556

551557
# NB: Does not roundtrip in some locales due to the ambiguity of
552558
# the time representation (bugs in locales?):
553559
# * Seconds are not included: bokmal, ff_SN, nb_NO, nn_NO, no_NO,
554560
# norwegian, nynorsk.
555561
# * Hours are in 12-hour notation without AM/PM indication: hy_AM,
556562
# ms_MY, sm_WS.
557-
# BUG: Generates regexp that does not match the current time for
558-
# aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET,
559-
# lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET,
560-
# so_SO, ti_ER, ti_ET, tig_ER, wal_ET.
561-
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP')
563+
# BUG: Generates regexp that does not match the current time for lzh_TW.
564+
@run_with_locales('LC_TIME','C','en_US','fr_FR','de_DE','ja_JP',
565+
'aa_ET','am_ET','az_IR','byn_ER','fa_IR','gez_ET',
566+
'my_MM','om_ET','or_IN','shn_MM','sid_ET','so_SO',
567+
'ti_ET','tig_ER','wal_ET')
562568
deftest_time_locale(self):
563569
# Test %X directive
570+
loc=locale.getlocale(locale.LC_TIME)[0]
571+
pos=slice(3,6)
572+
ifglibc_verandglibc_ver< (2,29)andlocin {
573+
'aa_ET','am_ET','byn_ER','gez_ET','om_ET',
574+
'sid_ET','so_SO','ti_ET','tig_ER','wal_ET'}:
575+
# Hours are in 12-hour notation without AM/PM indication.
576+
# Ignore hours.
577+
pos=slice(4,6)
564578
now=time.time()
565-
self.roundtrip('%X',slice(3,6),time.localtime(now))
579+
self.roundtrip('%X',pos,time.localtime(now))
566580
# 1 hour 20 minutes 30 seconds ago
567-
self.roundtrip('%X',slice(3,6),time.localtime(now-4830))
581+
self.roundtrip('%X',pos,time.localtime(now-4830))
568582
# 12 hours ago
569-
self.roundtrip('%X',slice(3,6),time.localtime(now-12*3600))
583+
self.roundtrip('%X',pos,time.localtime(now-12*3600))
570584

571585
deftest_percent(self):
572586
# Make sure % signs are handled properly

‎Lib/test/test_time.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def test_strptime_exception_context(self):
292292
# additional check for IndexError branch (issue #19545)
293293
withself.assertRaises(ValueError)ase:
294294
time.strptime('19','%Y %')
295-
self.assertIs(e.exception.__suppress_context__,True)
295+
self.assertIsNone(e.exception.__context__)
296296

297297
deftest_asctime(self):
298298
time.asctime(time.gmtime(self.t))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix:func:`time.strptime` for ``%c``, ``%x`` and ``%X`` formats in many
2+
locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp