Source code for dateutil.parser._parser

# -*- coding: utf-8 -*-"""This module offers a generic date/time string parser which is able to parsemost known formats to represent a date and/or time.This module attempts to be forgiving with regards to unlikely input formats,returning a datetime object even for dates which are ambiguous. If an elementof a date/time stamp is omitted, the following rules are applied:- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is  specified.- If a time zone is omitted, a timezone-naive datetime is returned.If any other elements are missing, they are taken from the:class:`datetime.datetime` object passed to the parameter ``default``. If thisresults in a day number exceeding the valid number of days per month, thevalue falls back to the end of the month.Additional resources about date/time string formats can be found below:- `A summary of the international standard date and time notation  <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_- `CPAN ParseDate module  <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_- `Java SimpleDateFormat Class  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_"""from__future__importunicode_literalsimportdatetimeimportreimportstringimporttimeimportwarningsfromcalendarimportmonthrangefromioimportStringIOimportsixfromsiximportinteger_types,text_typefromdecimalimportDecimalfromwarningsimportwarnfrom..importrelativedeltafrom..importtz__all__=["parse","parserinfo","ParserError"]# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth# making public and/or figuring out if there is something we can# take off their plate.class_timelex(object):# Fractional seconds are sometimes split by a comma_split_decimal=re.compile("([.,])")def__init__(self,instream):ifisinstance(instream,(bytes,bytearray)):instream=instream.decode()ifisinstance(instream,text_type):instream=StringIO(instream)elifgetattr(instream,'read',None)isNone:raiseTypeError('Parser must be a string or character stream, not ''{itype}'.format(itype=instream.__class__.__name__))self.instream=instreamself.charstack=[]self.tokenstack=[]self.eof=Falsedefget_token(self):"""        This function breaks the time string into lexical units (tokens), which        can be parsed by the parser. Lexical units are demarcated by changes in        the character set, so any continuous string of letters is considered        one unit, any continuous string of numbers is considered one unit.        The main complication arises from the fact that dots ('.') can be used        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.        "4:30:21.447"). As such, it is necessary to read the full context of        any dot-separated strings before breaking it into tokens; as such, this        function maintains a "token stack", for when the ambiguous context        demands that multiple tokens be parsed at once.        """ifself.tokenstack:returnself.tokenstack.pop(0)seenletters=Falsetoken=Nonestate=Nonewhilenotself.eof:# We only realize that we've reached the end of a token when we# find a character that's not part of the current token - since# that character may be part of the next token, it's stored in the# charstack.ifself.charstack:nextchar=self.charstack.pop(0)else:nextchar=self.instream.read(1)whilenextchar=='\x00':nextchar=self.instream.read(1)ifnotnextchar:self.eof=Truebreakelifnotstate:# First character of the token - determines if we're starting# to parse a word, a number or something else.token=nextcharifself.isword(nextchar):state='a'elifself.isnum(nextchar):state='0'elifself.isspace(nextchar):token=' 'break# emit tokenelse:break# emit tokenelifstate=='a':# If we've already started reading a word, we keep reading# letters until we find something that's not part of a word.seenletters=Trueifself.isword(nextchar):token+=nextcharelifnextchar=='.':token+=nextcharstate='a.'else:self.charstack.append(nextchar)break# emit tokenelifstate=='0':# If we've already started reading a number, we keep reading# numbers until we find something that doesn't fit.ifself.isnum(nextchar):token+=nextcharelifnextchar=='.'or(nextchar==','andlen(token)>=2):token+=nextcharstate='0.'else:self.charstack.append(nextchar)break# emit tokenelifstate=='a.':# If we've seen some letters and a dot separator, continue# parsing, and the tokens will be broken up later.seenletters=Trueifnextchar=='.'orself.isword(nextchar):token+=nextcharelifself.isnum(nextchar)andtoken[-1]=='.':token+=nextcharstate='0.'else:self.charstack.append(nextchar)break# emit tokenelifstate=='0.':# If we've seen at least one dot separator, keep going, we'll# break up the tokens later.ifnextchar=='.'orself.isnum(nextchar):token+=nextcharelifself.isword(nextchar)andtoken[-1]=='.':token+=nextcharstate='a.'else:self.charstack.append(nextchar)break# emit tokenif(statein('a.','0.')and(seenlettersortoken.count('.')>1ortoken[-1]in'.,')):l=self._split_decimal.split(token)token=l[0]fortokinl[1:]:iftok:self.tokenstack.append(tok)ifstate=='0.'andtoken.count('.')==0:token=token.replace(',','.')returntokendef__iter__(self):returnselfdef__next__(self):token=self.get_token()iftokenisNone:raiseStopIterationreturntokendefnext(self):returnself.__next__()# Python 2.x support@classmethoddefsplit(cls,s):returnlist(cls(s))@classmethoddefisword(cls,nextchar):""" Whether or not the next character is part of a word """returnnextchar.isalpha()@classmethoddefisnum(cls,nextchar):""" Whether the next character is part of a number """returnnextchar.isdigit()@classmethoddefisspace(cls,nextchar):""" Whether the next character is whitespace """returnnextchar.isspace()class_resultbase(object):def__init__(self):forattrinself.__slots__:setattr(self,attr,None)def_repr(self,classname):l=[]forattrinself.__slots__:value=getattr(self,attr)ifvalueisnotNone:l.append("%s=%s"%(attr,repr(value)))return"%s(%s)"%(classname,", ".join(l))def__len__(self):return(sum(getattr(self,attr)isnotNoneforattrinself.__slots__))def__repr__(self):returnself._repr(self.__class__.__name__)[docs]classparserinfo(object):"""    Class which handles what inputs are accepted. Subclass this to customize    the language and acceptable values for each parameter.    :param dayfirst:        Whether to interpret the first value in an ambiguous 3-integer date        (e.g. 01/05/09) as the day (``True``) or month (``False``). If        ``yearfirst`` is set to ``True``, this distinguishes between YDM        and YMD. Default is ``False``.    :param yearfirst:        Whether to interpret the first value in an ambiguous 3-integer date        (e.g. 01/05/09) as the year. If ``True``, the first number is taken        to be the year, otherwise the last number is taken to be the year.        Default is ``False``.    """# m from a.m/p.m, t from ISO T separatorJUMP=[" ",".",",",";","-","/","'","at","on","and","ad","m","t","of","st","nd","rd","th"]WEEKDAYS=[("Mon","Monday"),("Tue","Tuesday"),# TODO: "Tues"("Wed","Wednesday"),("Thu","Thursday"),# TODO: "Thurs"("Fri","Friday"),("Sat","Saturday"),("Sun","Sunday")]MONTHS=[("Jan","January"),("Feb","February"),# TODO: "Febr"("Mar","March"),("Apr","April"),("May","May"),("Jun","June"),("Jul","July"),("Aug","August"),("Sep","Sept","September"),("Oct","October"),("Nov","November"),("Dec","December")]HMS=[("h","hour","hours"),("m","minute","minutes"),("s","second","seconds")]AMPM=[("am","a"),("pm","p")]UTCZONE=["UTC","GMT","Z","z"]PERTAIN=["of"]TZOFFSET={}# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",#              "Anno Domini", "Year of Our Lord"]def__init__(self,dayfirst=False,yearfirst=False):self._jump=self._convert(self.JUMP)self._weekdays=self._convert(self.WEEKDAYS)self._months=self._convert(self.MONTHS)self._hms=self._convert(self.HMS)self._ampm=self._convert(self.AMPM)self._utczone=self._convert(self.UTCZONE)self._pertain=self._convert(self.PERTAIN)self.dayfirst=dayfirstself.yearfirst=yearfirstself._year=time.localtime().tm_yearself._century=self._year//100*100def_convert(self,lst):dct={}fori,vinenumerate(lst):ifisinstance(v,tuple):forvinv:dct[v.lower()]=ielse:dct[v.lower()]=ireturndct[docs]defjump(self,name):returnname.lower()inself._jump
[docs]defweekday(self,name):try:returnself._weekdays[name.lower()]exceptKeyError:passreturnNone
[docs]defmonth(self,name):try:returnself._months[name.lower()]+1exceptKeyError:passreturnNone
[docs]defhms(self,name):try:returnself._hms[name.lower()]exceptKeyError:returnNone
[docs]defampm(self,name):try:returnself._ampm[name.lower()]exceptKeyError:returnNone
[docs]defpertain(self,name):returnname.lower()inself._pertain
[docs]defutczone(self,name):returnname.lower()inself._utczone
[docs]deftzoffset(self,name):ifnameinself._utczone:return0returnself.TZOFFSET.get(name)
[docs]defconvertyear(self,year,century_specified=False):"""        Converts two-digit years to year within [-50, 49]        range of self._year (current local time)        """# Function contract is that the year is always positiveassertyear>=0ifyear<100andnotcentury_specified:# assume current century to startyear+=self._centuryifyear>=self._year+50:# if too far in futureyear-=100elifyear<self._year-50:# if too far in pastyear+=100returnyear
[docs]defvalidate(self,res):# move to infoifres.yearisnotNone:res.year=self.convertyear(res.year,res.century_specified)if((res.tzoffset==0andnotres.tzname)or(res.tzname=='Z'orres.tzname=='z')):res.tzname="UTC"res.tzoffset=0elifres.tzoffset!=0andres.tznameandself.utczone(res.tzname):res.tzoffset=0returnTrue
class_ymd(list):def__init__(self,*args,**kwargs):super(self.__class__,self).__init__(*args,**kwargs)self.century_specified=Falseself.dstridx=Noneself.mstridx=Noneself.ystridx=None@propertydefhas_year(self):returnself.ystridxisnotNone@propertydefhas_month(self):returnself.mstridxisnotNone@propertydefhas_day(self):returnself.dstridxisnotNonedefcould_be_day(self,value):ifself.has_day:returnFalseelifnotself.has_month:return1<=value<=31elifnotself.has_year:# Be permissive, assume leap yearmonth=self[self.mstridx]return1<=value<=monthrange(2000,month)[1]else:month=self[self.mstridx]year=self[self.ystridx]return1<=value<=monthrange(year,month)[1]defappend(self,val,label=None):ifhasattr(val,'__len__'):ifval.isdigit()andlen(val)>2:self.century_specified=Trueiflabelnotin[None,'Y']:# pragma: no coverraiseValueError(label)label='Y'elifval>100:self.century_specified=Trueiflabelnotin[None,'Y']:# pragma: no coverraiseValueError(label)label='Y'super(self.__class__,self).append(int(val))iflabel=='M':ifself.has_month:raiseValueError('Month is already set')self.mstridx=len(self)-1eliflabel=='D':ifself.has_day:raiseValueError('Day is already set')self.dstridx=len(self)-1eliflabel=='Y':ifself.has_year:raiseValueError('Year is already set')self.ystridx=len(self)-1def_resolve_from_stridxs(self,strids):"""        Try to resolve the identities of year/month/day elements using        ystridx, mstridx, and dstridx, if enough of these are specified.        """iflen(self)==3andlen(strids)==2:# we can back out the remaining stridx valuemissing=[xforxinrange(3)ifxnotinstrids.values()]key=[xforxin['y','m','d']ifxnotinstrids]assertlen(missing)==len(key)==1key=key[0]val=missing[0]strids[key]=valassertlen(self)==len(strids)# otherwise this should not be calledout={key:self[strids[key]]forkeyinstrids}return(out.get('y'),out.get('m'),out.get('d'))defresolve_ymd(self,yearfirst,dayfirst):len_ymd=len(self)year,month,day=(None,None,None)strids=(('y',self.ystridx),('m',self.mstridx),('d',self.dstridx))strids={key:valforkey,valinstridsifvalisnotNone}if(len(self)==len(strids)>0or(len(self)==3andlen(strids)==2)):returnself._resolve_from_stridxs(strids)mstridx=self.mstridxiflen_ymd>3:raiseValueError("More than three YMD values")eliflen_ymd==1or(mstridxisnotNoneandlen_ymd==2):# One member, or two members with a month stringifmstridxisnotNone:month=self[mstridx]# since mstridx is 0 or 1, self[mstridx-1] always# looks up the other elementother=self[mstridx-1]else:other=self[0]iflen_ymd>1ormstridxisNone:ifother>31:year=otherelse:day=othereliflen_ymd==2:# Two members with numbersifself[0]>31:# 99-01year,month=selfelifself[1]>31:# 01-99month,year=selfelifdayfirstandself[1]<=12:# 13-01day,month=selfelse:# 01-13month,day=selfeliflen_ymd==3:# Three membersifmstridx==0:ifself[1]>31:# Apr-2003-25month,year,day=selfelse:month,day,year=selfelifmstridx==1:ifself[0]>31or(yearfirstandself[2]<=31):# 99-Jan-01year,month,day=selfelse:# 01-Jan-01# Give precedence to day-first, since# two-digit years is usually hand-written.day,month,year=selfelifmstridx==2:# WTF!?ifself[1]>31:# 01-99-Janday,year,month=selfelse:# 99-01-Janyear,day,month=selfelse:if(self[0]>31orself.ystridx==0or(yearfirstandself[1]<=12andself[2]<=31)):# 99-01-01ifdayfirstandself[2]<=12:year,day,month=selfelse:year,month,day=selfelifself[0]>12or(dayfirstandself[1]<=12):# 13-01-01day,month,year=selfelse:# 01-13-01month,day,year=selfreturnyear,month,dayclassparser(object):def__init__(self,info=None):self.info=infoorparserinfo()[docs]defparse(self,timestr,default=None,ignoretz=False,tzinfos=None,**kwargs):"""        Parse the date/time string into a :class:`datetime.datetime` object.        :param timestr:            Any date/time string using the supported formats.        :param default:            The default datetime object, if this is a datetime object and not            ``None``, elements specified in ``timestr`` replace elements in the            default object.        :param ignoretz:            If set ``True``, time zones in parsed strings are ignored and a            naive :class:`datetime.datetime` object is returned.        :param tzinfos:            Additional time zone names / aliases which may be present in the            string. This argument maps time zone names (and optionally offsets            from those time zones) to time zones. This parameter can be a            dictionary with timezone aliases mapping time zone names to time            zones or a function taking two parameters (``tzname`` and            ``tzoffset``) and returning a time zone.            The timezones to which the names are mapped can be an integer            offset from UTC in seconds or a :class:`tzinfo` object.            .. doctest::               :options: +NORMALIZE_WHITESPACE                >>> from dateutil.parser import parse                >>> from dateutil.tz import gettz                >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)                datetime.datetime(2012, 1, 19, 17, 21,                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))            This parameter is ignored if ``ignoretz`` is set.        :param \\*\\*kwargs:            Keyword arguments as passed to ``_parse()``.        :return:            Returns a :class:`datetime.datetime` object or, if the            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the            first element being a :class:`datetime.datetime` object, the second            a tuple containing the fuzzy tokens.        :raises ParserError:            Raised for invalid or unknown string format, if the provided            :class:`tzinfo` is not in a valid format, or if an invalid date            would be created.        :raises TypeError:            Raised for non-string or character stream input.        :raises OverflowError:            Raised if the parsed date exceeds the largest valid C integer on            your system.        """ifdefaultisNone:default=datetime.datetime.now().replace(hour=0,minute=0,second=0,microsecond=0)res,skipped_tokens=self._parse(timestr,**kwargs)ifresisNone:raiseParserError("Unknown string format:%s",timestr)iflen(res)==0:raiseParserError("String does not contain a date:%s",timestr)try:ret=self._build_naive(res,default)exceptValueErrorase:six.raise_from(ParserError(str(e)+":%s",timestr),e)ifnotignoretz:ret=self._build_tzaware(ret,res,tzinfos)ifkwargs.get('fuzzy_with_tokens',False):returnret,skipped_tokenselse:returnret
class_result(_resultbase):__slots__=["year","month","day","weekday","hour","minute","second","microsecond","tzname","tzoffset","ampm","any_unused_tokens"]def_parse(self,timestr,dayfirst=None,yearfirst=None,fuzzy=False,fuzzy_with_tokens=False):"""        Private method which performs the heavy lifting of parsing, called from        ``parse()``, which passes on its ``kwargs`` to this function.        :param timestr:            The string to parse.        :param dayfirst:            Whether to interpret the first value in an ambiguous 3-integer date            (e.g. 01/05/09) as the day (``True``) or month (``False``). If            ``yearfirst`` is set to ``True``, this distinguishes between YDM            and YMD. If set to ``None``, this value is retrieved from the            current :class:`parserinfo` object (which itself defaults to            ``False``).        :param yearfirst:            Whether to interpret the first value in an ambiguous 3-integer date            (e.g. 01/05/09) as the year. If ``True``, the first number is taken            to be the year, otherwise the last number is taken to be the year.            If this is set to ``None``, the value is retrieved from the current            :class:`parserinfo` object (which itself defaults to ``False``).        :param fuzzy:            Whether to allow fuzzy parsing, allowing for string like "Today is            January 1, 2047 at 8:21:00AM".        :param fuzzy_with_tokens:            If ``True``, ``fuzzy`` is automatically set to True, and the parser            will return a tuple where the first element is the parsed            :class:`datetime.datetime` datetimestamp and the second element is            a tuple containing the portions of the string which were ignored:            .. doctest::                >>> from dateutil.parser import parse                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))        """iffuzzy_with_tokens:fuzzy=Trueinfo=self.infoifdayfirstisNone:dayfirst=info.dayfirstifyearfirstisNone:yearfirst=info.yearfirstres=self._result()l=_timelex.split(timestr)# Splits the timestr into tokensskipped_idxs=[]# year/month/day listymd=_ymd()len_l=len(l)i=0try:whilei<len_l:# Check if it's a numbervalue_repr=l[i]try:value=float(value_repr)exceptValueError:value=NoneifvalueisnotNone:# Numeric tokeni=self._parse_numeric_token(l,i,info,ymd,res,fuzzy)# Check weekdayelifinfo.weekday(l[i])isnotNone:value=info.weekday(l[i])res.weekday=value# Check month nameelifinfo.month(l[i])isnotNone:value=info.month(l[i])ymd.append(value,'M')ifi+1<len_l:ifl[i+1]in('-','/'):# Jan-01[-99]sep=l[i+1]ymd.append(l[i+2])ifi+3<len_landl[i+3]==sep:# Jan-01-99ymd.append(l[i+4])i+=2i+=2elif(i+4<len_landl[i+1]==l[i+3]==' 'andinfo.pertain(l[i+2])):# Jan of 01# In this case, 01 is clearly yearifl[i+4].isdigit():# Convert it here to become unambiguousvalue=int(l[i+4])year=str(info.convertyear(value))ymd.append(year,'Y')else:# Wrong guesspass# TODO: not hit in testsi+=4# Check am/pmelifinfo.ampm(l[i])isnotNone:value=info.ampm(l[i])val_is_ampm=self._ampm_valid(res.hour,res.ampm,fuzzy)ifval_is_ampm:res.hour=self._adjust_ampm(res.hour,value)res.ampm=valueeliffuzzy:skipped_idxs.append(i)# Check for a timezone nameelifself._could_be_tzname(res.hour,res.tzname,res.tzoffset,l[i]):res.tzname=l[i]res.tzoffset=info.tzoffset(res.tzname)# Check for something like GMT+3, or BRST+3. Notice# that it doesn't mean "I am 3 hours after GMT", but# "my time +3 is GMT". If found, we reverse the# logic so that timezone parsing code will get it# right.ifi+1<len_landl[i+1]in('+','-'):l[i+1]=('+','-')[l[i+1]=='+']res.tzoffset=Noneifinfo.utczone(res.tzname):# With something like GMT+3, the timezone# is *not* GMT.res.tzname=None# Check for a numbered timezoneelifres.hourisnotNoneandl[i]in('+','-'):signal=(-1,1)[l[i]=='+']len_li=len(l[i+1])# TODO: check that l[i + 1] is integer?iflen_li==4:# -0300hour_offset=int(l[i+1][:2])min_offset=int(l[i+1][2:])elifi+2<len_landl[i+2]==':':# -03:00hour_offset=int(l[i+1])min_offset=int(l[i+3])# TODO: Check that l[i+3] is minute-like?i+=2eliflen_li<=2:# -[0]3hour_offset=int(l[i+1][:2])min_offset=0else:raiseValueError(timestr)res.tzoffset=signal*(hour_offset*3600+min_offset*60)# Look for a timezone name between parenthesisif(i+5<len_landinfo.jump(l[i+2])andl[i+3]=='('andl[i+5]==')'and3<=len(l[i+4])andself._could_be_tzname(res.hour,res.tzname,None,l[i+4])):# -0300 (BRST)res.tzname=l[i+4]i+=4i+=1# Check jumpselifnot(info.jump(l[i])orfuzzy):raiseValueError(timestr)else:skipped_idxs.append(i)i+=1# Process year/month/dayyear,month,day=ymd.resolve_ymd(yearfirst,dayfirst)res.century_specified=ymd.century_specifiedres.year=yearres.month=monthres.day=dayexcept(IndexError,ValueError):returnNone,Noneifnotinfo.validate(res):returnNone,Noneiffuzzy_with_tokens:skipped_tokens=self._recombine_skipped(l,skipped_idxs)returnres,tuple(skipped_tokens)else:returnres,Nonedef_parse_numeric_token(self,tokens,idx,info,ymd,res,fuzzy):# Token is a numbervalue_repr=tokens[idx]try:value=self._to_decimal(value_repr)exceptExceptionase:six.raise_from(ValueError('Unknown numeric token'),e)len_li=len(value_repr)len_l=len(tokens)if(len(ymd)==3andlen_liin(2,4)andres.hourisNoneand(idx+1>=len_lor(tokens[idx+1]!=':'andinfo.hms(tokens[idx+1])isNone))):# 19990101T23[59]s=tokens[idx]res.hour=int(s[:2])iflen_li==4:res.minute=int(s[2:])eliflen_li==6or(len_li>6andtokens[idx].find('.')==6):# YYMMDD or HHMMSS[.ss]s=tokens[idx]ifnotymdand'.'notintokens[idx]:ymd.append(s[:2])ymd.append(s[2:4])ymd.append(s[4:])else:# 19990101T235959[.59]# TODO: Check if res attributes already set.res.hour=int(s[:2])res.minute=int(s[2:4])res.second,res.microsecond=self._parsems(s[4:])eliflen_liin(8,12,14):# YYYYMMDDs=tokens[idx]ymd.append(s[:4],'Y')ymd.append(s[4:6])ymd.append(s[6:8])iflen_li>8:res.hour=int(s[8:10])res.minute=int(s[10:12])iflen_li>12:res.second=int(s[12:])elifself._find_hms_idx(idx,tokens,info,allow_jump=True)isnotNone:# HH[ ]h or MM[ ]m or SS[.ss][ ]shms_idx=self._find_hms_idx(idx,tokens,info,allow_jump=True)(idx,hms)=self._parse_hms(idx,tokens,info,hms_idx)ifhmsisnotNone:# TODO: checking that hour/minute/second are not# already set?self._assign_hms(res,value_repr,hms)elifidx+2<len_landtokens[idx+1]==':':# HH:MM[:SS[.ss]]res.hour=int(value)value=self._to_decimal(tokens[idx+2])# TODO: try/except for this?(res.minute,res.second)=self._parse_min_sec(value)ifidx+4<len_landtokens[idx+3]==':':res.second,res.microsecond=self._parsems(tokens[idx+4])idx+=2idx+=2elifidx+1<len_landtokens[idx+1]in('-','/','.'):sep=tokens[idx+1]ymd.append(value_repr)ifidx+2<len_landnotinfo.jump(tokens[idx+2]):iftokens[idx+2].isdigit():# 01-01[-01]ymd.append(tokens[idx+2])else:# 01-Jan[-01]value=info.month(tokens[idx+2])ifvalueisnotNone:ymd.append(value,'M')else:raiseValueError()ifidx+3<len_landtokens[idx+3]==sep:# We have three membersvalue=info.month(tokens[idx+4])ifvalueisnotNone:ymd.append(value,'M')else:ymd.append(tokens[idx+4])idx+=2idx+=1idx+=1elifidx+1>=len_lorinfo.jump(tokens[idx+1]):ifidx+2<len_landinfo.ampm(tokens[idx+2])isnotNone:# 12 amhour=int(value)res.hour=self._adjust_ampm(hour,info.ampm(tokens[idx+2]))idx+=1else:# Year, month or dayymd.append(value)idx+=1elifinfo.ampm(tokens[idx+1])isnotNoneand(0<=value<24):# 12amhour=int(value)res.hour=self._adjust_ampm(hour,info.ampm(tokens[idx+1]))idx+=1elifymd.could_be_day(value):ymd.append(value)elifnotfuzzy:raiseValueError()returnidxdef_find_hms_idx(self,idx,tokens,info,allow_jump):len_l=len(tokens)ifidx+1<len_landinfo.hms(tokens[idx+1])isnotNone:# There is an "h", "m", or "s" label following this token.  We take# assign the upcoming label to the current token.# e.g. the "12" in 12h"hms_idx=idx+1elif(allow_jumpandidx+2<len_landtokens[idx+1]==' 'andinfo.hms(tokens[idx+2])isnotNone):# There is a space and then an "h", "m", or "s" label.# e.g. the "12" in "12 h"hms_idx=idx+2elifidx>0andinfo.hms(tokens[idx-1])isnotNone:# There is a "h", "m", or "s" preceding this token.  Since neither# of the previous cases was hit, there is no label following this# token, so we use the previous label.# e.g. the "04" in "12h04"hms_idx=idx-1elif(1<idx==len_l-1andtokens[idx-1]==' 'andinfo.hms(tokens[idx-2])isnotNone):# If we are looking at the final token, we allow for a# backward-looking check to skip over a space.# TODO: Are we sure this is the right condition here?hms_idx=idx-2else:hms_idx=Nonereturnhms_idxdef_assign_hms(self,res,value_repr,hms):# See GH issue #427, fixing float roundingvalue=self._to_decimal(value_repr)ifhms==0:# Hourres.hour=int(value)ifvalue%1:res.minute=int(60*(value%1))elifhms==1:(res.minute,res.second)=self._parse_min_sec(value)elifhms==2:(res.second,res.microsecond)=self._parsems(value_repr)def_could_be_tzname(self,hour,tzname,tzoffset,token):return(hourisnotNoneandtznameisNoneandtzoffsetisNoneandlen(token)<=5and(all(xinstring.ascii_uppercaseforxintoken)ortokeninself.info.UTCZONE))def_ampm_valid(self,hour,ampm,fuzzy):"""        For fuzzy parsing, 'a' or 'am' (both valid English words)        may erroneously trigger the AM/PM flag. Deal with that        here.        """val_is_ampm=True# If there's already an AM/PM flag, this one isn't one.iffuzzyandampmisnotNone:val_is_ampm=False# If AM/PM is found and hour is not, raise a ValueErrorifhourisNone:iffuzzy:val_is_ampm=Falseelse:raiseValueError('No hour specified with AM or PM flag.')elifnot0<=hour<=12:# If AM/PM is found, it's a 12 hour clock, so raise# an error for invalid rangeiffuzzy:val_is_ampm=Falseelse:raiseValueError('Invalid hour specified for 12-hour clock.')returnval_is_ampmdef_adjust_ampm(self,hour,ampm):ifhour<12andampm==1:hour+=12elifhour==12andampm==0:hour=0returnhourdef_parse_min_sec(self,value):# TODO: Every usage of this function sets res.second to the return# value. Are there any cases where second will be returned as None and# we *don't* want to set res.second = None?minute=int(value)second=Nonesec_remainder=value%1ifsec_remainder:second=int(60*sec_remainder)return(minute,second)def_parse_hms(self,idx,tokens,info,hms_idx):# TODO: Is this going to admit a lot of false-positives for when we# just happen to have digits and "h", "m" or "s" characters in non-date# text?  I guess hex hashes won't have that problem, but there's plenty# of random junk out there.ifhms_idxisNone:hms=Nonenew_idx=idxelifhms_idx>idx:hms=info.hms(tokens[hms_idx])new_idx=hms_idxelse:# Looking backwards, increment one.hms=info.hms(tokens[hms_idx])+1new_idx=idxreturn(new_idx,hms)# ------------------------------------------------------------------# Handling for individual tokens.  These are kept as methods instead#  of functions for the sake of customizability via subclassing.def_parsems(self,value):"""Parse a I[.F] seconds value into (seconds, microseconds)."""if"."notinvalue:returnint(value),0else:i,f=value.split(".")returnint(i),int(f.ljust(6,"0")[:6])def_to_decimal(self,val):try:decimal_value=Decimal(val)# See GH 662, edge case, infinite value should not be converted#  via `_to_decimal`ifnotdecimal_value.is_finite():raiseValueError("Converted decimal value is infinite or NaN")exceptExceptionase:msg="Could not convert%s to decimal"%valsix.raise_from(ValueError(msg),e)else:returndecimal_value# ------------------------------------------------------------------# Post-Parsing construction of datetime output.  These are kept as#  methods instead of functions for the sake of customizability via#  subclassing.def_build_tzinfo(self,tzinfos,tzname,tzoffset):ifcallable(tzinfos):tzdata=tzinfos(tzname,tzoffset)else:tzdata=tzinfos.get(tzname)# handle case where tzinfo is paased an options that returns None# eg tzinfos = {'BRST' : None}ifisinstance(tzdata,datetime.tzinfo)ortzdataisNone:tzinfo=tzdataelifisinstance(tzdata,text_type):tzinfo=tz.tzstr(tzdata)elifisinstance(tzdata,integer_types):tzinfo=tz.tzoffset(tzname,tzdata)else:raiseTypeError("Offset must be tzinfo subclass, tz string, ""or int offset.")returntzinfodef_build_tzaware(self,naive,res,tzinfos):if(callable(tzinfos)or(tzinfosandres.tznameintzinfos)):tzinfo=self._build_tzinfo(tzinfos,res.tzname,res.tzoffset)aware=naive.replace(tzinfo=tzinfo)aware=self._assign_tzname(aware,res.tzname)elifres.tznameandres.tznameintime.tzname:aware=naive.replace(tzinfo=tz.tzlocal())# Handle ambiguous local datetimeaware=self._assign_tzname(aware,res.tzname)# This is mostly relevant for winter GMT zones parsed in the UKif(aware.tzname()!=res.tznameandres.tznameinself.info.UTCZONE):aware=aware.replace(tzinfo=tz.UTC)elifres.tzoffset==0:aware=naive.replace(tzinfo=tz.UTC)elifres.tzoffset:aware=naive.replace(tzinfo=tz.tzoffset(res.tzname,res.tzoffset))elifnotres.tznameandnotres.tzoffset:# i.e. no timezone information was found.aware=naiveelifres.tzname:# tz-like string was parsed but we don't know what to do# with itwarnings.warn("tzname{tzname} identified but not understood.  ""Pass `tzinfos` argument in order to correctly ""return a timezone-aware datetime.  In a future ""version, this will raise an ""exception.".format(tzname=res.tzname),category=UnknownTimezoneWarning)aware=naivereturnawaredef_build_naive(self,res,default):repl={}forattrin("year","month","day","hour","minute","second","microsecond"):value=getattr(res,attr)ifvalueisnotNone:repl[attr]=valueif'day'notinrepl:# If the default day exceeds the last day of the month, fall back# to the end of the month.cyear=default.yearifres.yearisNoneelseres.yearcmonth=default.monthifres.monthisNoneelseres.monthcday=default.dayifres.dayisNoneelseres.dayifcday>monthrange(cyear,cmonth)[1]:repl['day']=monthrange(cyear,cmonth)[1]naive=default.replace(**repl)ifres.weekdayisnotNoneandnotres.day:naive=naive+relativedelta.relativedelta(weekday=res.weekday)returnnaivedef_assign_tzname(self,dt,tzname):ifdt.tzname()!=tzname:new_dt=tz.enfold(dt,fold=1)ifnew_dt.tzname()==tzname:returnnew_dtreturndtdef_recombine_skipped(self,tokens,skipped_idxs):"""        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]        >>> skipped_idxs = [0, 1, 2, 5]        >>> _recombine_skipped(tokens, skipped_idxs)        ["foo bar", "baz"]        """skipped_tokens=[]fori,idxinenumerate(sorted(skipped_idxs)):ifi>0andidx-1==skipped_idxs[i-1]:skipped_tokens[-1]=skipped_tokens[-1]+tokens[idx]else:skipped_tokens.append(tokens[idx])returnskipped_tokensDEFAULTPARSER=parser()defparse(timestr,parserinfo=None,**kwargs):"""    Parse a string in one of the supported formats, using the    ``parserinfo`` parameters.    :param timestr:        A string containing a date/time stamp.    :param parserinfo:        A :class:`parserinfo` object containing parameters for the parser.        If ``None``, the default arguments to the :class:`parserinfo`        constructor are used.    The ``**kwargs`` parameter takes the following keyword arguments:    :param default:        The default datetime object, if this is a datetime object and not        ``None``, elements specified in ``timestr`` replace elements in the        default object.    :param ignoretz:        If set ``True``, time zones in parsed strings are ignored and a naive        :class:`datetime` object is returned.    :param tzinfos:        Additional time zone names / aliases which may be present in the        string. This argument maps time zone names (and optionally offsets        from those time zones) to time zones. This parameter can be a        dictionary with timezone aliases mapping time zone names to time        zones or a function taking two parameters (``tzname`` and        ``tzoffset``) and returning a time zone.        The timezones to which the names are mapped can be an integer        offset from UTC in seconds or a :class:`tzinfo` object.        .. doctest::           :options: +NORMALIZE_WHITESPACE            >>> from dateutil.parser import parse            >>> from dateutil.tz import gettz            >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}            >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)            datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))            >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)            datetime.datetime(2012, 1, 19, 17, 21,                              tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))        This parameter is ignored if ``ignoretz`` is set.    :param dayfirst:        Whether to interpret the first value in an ambiguous 3-integer date        (e.g. 01/05/09) as the day (``True``) or month (``False``). If        ``yearfirst`` is set to ``True``, this distinguishes between YDM and        YMD. If set to ``None``, this value is retrieved from the current        :class:`parserinfo` object (which itself defaults to ``False``).    :param yearfirst:        Whether to interpret the first value in an ambiguous 3-integer date        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to        be the year, otherwise the last number is taken to be the year. If        this is set to ``None``, the value is retrieved from the current        :class:`parserinfo` object (which itself defaults to ``False``).    :param fuzzy:        Whether to allow fuzzy parsing, allowing for string like "Today is        January 1, 2047 at 8:21:00AM".    :param fuzzy_with_tokens:        If ``True``, ``fuzzy`` is automatically set to True, and the parser        will return a tuple where the first element is the parsed        :class:`datetime.datetime` datetimestamp and the second element is        a tuple containing the portions of the string which were ignored:        .. doctest::            >>> from dateutil.parser import parse            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)            (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))    :return:        Returns a :class:`datetime.datetime` object or, if the        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the        first element being a :class:`datetime.datetime` object, the second        a tuple containing the fuzzy tokens.    :raises ParserError:        Raised for invalid or unknown string formats, if the provided        :class:`tzinfo` is not in a valid format, or if an invalid date would        be created.    :raises OverflowError:        Raised if the parsed date exceeds the largest valid C integer on        your system.    """ifparserinfo:returnparser(parserinfo).parse(timestr,**kwargs)else:returnDEFAULTPARSER.parse(timestr,**kwargs)class_tzparser(object):class_result(_resultbase):__slots__=["stdabbr","stdoffset","dstabbr","dstoffset","start","end"]class_attr(_resultbase):__slots__=["month","week","weekday","yday","jyday","day","time"]def__repr__(self):returnself._repr("")def__init__(self):_resultbase.__init__(self)self.start=self._attr()self.end=self._attr()defparse(self,tzstr):res=self._result()l=[xforxinre.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr)ifx]used_idxs=list()try:len_l=len(l)i=0whilei<len_l:# BRST+3[BRDT[+2]]j=iwhilej<len_landnot[xforxinl[j]ifxin"0123456789:,-+"]:j+=1ifj!=i:ifnotres.stdabbr:offattr="stdoffset"res.stdabbr="".join(l[i:j])else:offattr="dstoffset"res.dstabbr="".join(l[i:j])foriiinrange(j):used_idxs.append(ii)i=jif(i<len_land(l[i]in('+','-')orl[i][0]in"0123456789")):ifl[i]in('+','-'):# Yes, that's right.  See the TZ variable# documentation.signal=(1,-1)[l[i]=='+']used_idxs.append(i)i+=1else:signal=-1len_li=len(l[i])iflen_li==4:# -0300setattr(res,offattr,(int(l[i][:2])*3600+int(l[i][2:])*60)*signal)elifi+1<len_landl[i+1]==':':# -03:00setattr(res,offattr,(int(l[i])*3600+int(l[i+2])*60)*signal)used_idxs.append(i)i+=2eliflen_li<=2:# -[0]3setattr(res,offattr,int(l[i][:2])*3600*signal)else:returnNoneused_idxs.append(i)i+=1ifres.dstabbr:breakelse:breakifi<len_l:forjinrange(i,len_l):ifl[j]==';':l[j]=','assertl[i]==','i+=1ifi>=len_l:passelif(8<=l.count(',')<=9andnot[yforxinl[i:]ifx!=','foryinxifynotin"0123456789+-"]):# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]forxin(res.start,res.end):x.month=int(l[i])used_idxs.append(i)i+=2ifl[i]=='-':value=int(l[i+1])*-1used_idxs.append(i)i+=1else:value=int(l[i])used_idxs.append(i)i+=2ifvalue:x.week=valuex.weekday=(int(l[i])-1)%7else:x.day=int(l[i])used_idxs.append(i)i+=2x.time=int(l[i])used_idxs.append(i)i+=2ifi<len_l:ifl[i]in('-','+'):signal=(-1,1)[l[i]=="+"]used_idxs.append(i)i+=1else:signal=1used_idxs.append(i)res.dstoffset=(res.stdoffset+int(l[i])*signal)# This was a made-up format that is not in normal usewarn(('Parsed time zone "%s"'%tzstr)+'is in a non-standard dateutil-specific format, which '+'is now deprecated; support for parsing this format '+'will be removed in future versions. It is recommended '+'that you switch to a standard format like the GNU '+'TZ variable format.',tz.DeprecatedTzFormatWarning)elif(l.count(',')==2andl[i:].count('/')<=2andnot[yforxinl[i:]ifxnotin(',','/','J','M','.','-',':')foryinxifynotin"0123456789"]):forxin(res.start,res.end):ifl[i]=='J':# non-leap year day (1 based)used_idxs.append(i)i+=1x.jyday=int(l[i])elifl[i]=='M':# month[-.]week[-.]weekdayused_idxs.append(i)i+=1x.month=int(l[i])used_idxs.append(i)i+=1assertl[i]in('-','.')used_idxs.append(i)i+=1x.week=int(l[i])ifx.week==5:x.week=-1used_idxs.append(i)i+=1assertl[i]in('-','.')used_idxs.append(i)i+=1x.weekday=(int(l[i])-1)%7else:# year day (zero based)x.yday=int(l[i])+1used_idxs.append(i)i+=1ifi<len_landl[i]=='/':used_idxs.append(i)i+=1# start timelen_li=len(l[i])iflen_li==4:# -0300x.time=(int(l[i][:2])*3600+int(l[i][2:])*60)elifi+1<len_landl[i+1]==':':# -03:00x.time=int(l[i])*3600+int(l[i+2])*60used_idxs.append(i)i+=2ifi+1<len_landl[i+1]==':':used_idxs.append(i)i+=2x.time+=int(l[i])eliflen_li<=2:# -[0]3x.time=(int(l[i][:2])*3600)else:returnNoneused_idxs.append(i)i+=1asserti==len_lorl[i]==','i+=1asserti>=len_lexcept(IndexError,ValueError,AssertionError):returnNoneunused_idxs=set(range(len_l)).difference(used_idxs)res.any_unused_tokens=not{l[n]forninunused_idxs}.issubset({",",":"})returnresDEFAULTTZPARSER=_tzparser()def_parsetz(tzstr):returnDEFAULTTZPARSER.parse(tzstr)[docs]classParserError(ValueError):"""Exception subclass used for any failure to parse a datetime string.    This is a subclass of :py:exc:`ValueError`, and should be raised any time    earlier versions of ``dateutil`` would have raised ``ValueError``.    .. versionadded:: 2.8.1    """def__str__(self):try:returnself.args[0]%self.args[1:]except(TypeError,IndexError):returnsuper(ParserError,self).__str__()def__repr__(self):args=", ".join("'%s'"%argforarginself.args)return"%s(%s)"%(self.__class__.__name__,args)
[docs]classUnknownTimezoneWarning(RuntimeWarning):"""Raised when the parser finds a timezone it cannot parse into a tzinfo.    .. versionadded:: 2.7.0    """
# vim:ts=4:sw=4:et
Movatterモバイル変換

Source code for dateutil.parser._parser