Move to src layout

Move the project to the src layout to ensure we are testing on the generated package rather than the checked out source tree.
author: Mario Corchero <mcorcherojim@bloomberg.net> 2021-07-16 11:06:47 +0200
committer: Mario Corchero <mcorcherojim@bloomberg.net> 2021-07-16 17:08:09 +0200
commit: da9ecd07a257cb5d1c7bfa08459b1ccfcabe6d23 (patch)
tree: 20ce3bac3aaedb3695b299b243544b6284dc0011 /src/dateutil/parser
parent: b21555f3b6afd2a0c660293b21e19b32f1d4f50e (diff)
download: dateutil-git-da9ecd07a257cb5d1c7bfa08459b1ccfcabe6d23.tar.gz
3 files changed, 2090 insertions, 0 deletions
diff --git a/src/dateutil/parser/__init__.py b/src/dateutil/parser/__init__.py
new file mode 100644
index 0000000..d174b0e
--- /dev/null
+++ b/src/dateutil/parser/__init__.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+from ._parser import parse, parser, parserinfo, ParserError
+from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
+from ._parser import UnknownTimezoneWarning
+
+from ._parser import __doc__
+
+from .isoparser import isoparser, isoparse
+
+__all__ = ['parse', 'parser', 'parserinfo',
+           'isoparse', 'isoparser',
+           'ParserError',
+           'UnknownTimezoneWarning']
+
+
+###
+# Deprecate portions of the private interface so that downstream code that
+# is improperly relying on it is given *some* notice.
+
+
+def __deprecated_private_func(f):
+    from functools import wraps
+    import warnings
+
+    msg = ('{name} is a private function and may break without warning, '
+           'it will be moved and or renamed in future versions.')
+    msg = msg.format(name=f.__name__)
+
+    @wraps(f)
+    def deprecated_func(*args, **kwargs):
+        warnings.warn(msg, DeprecationWarning)
+        return f(*args, **kwargs)
+
+    return deprecated_func
+
+def __deprecate_private_class(c):
+    import warnings
+
+    msg = ('{name} is a private class and may break without warning, '
+           'it will be moved and or renamed in future versions.')
+    msg = msg.format(name=c.__name__)
+
+    class private_class(c):
+        __doc__ = c.__doc__
+
+        def __init__(self, *args, **kwargs):
+            warnings.warn(msg, DeprecationWarning)
+            super(private_class, self).__init__(*args, **kwargs)
+
+    private_class.__name__ = c.__name__
+
+    return private_class
+
+
+from ._parser import _timelex, _resultbase
+from ._parser import _tzparser, _parsetz
+
+_timelex = __deprecate_private_class(_timelex)
+_tzparser = __deprecate_private_class(_tzparser)
+_resultbase = __deprecate_private_class(_resultbase)
+_parsetz = __deprecated_private_func(_parsetz)
diff --git a/src/dateutil/parser/_parser.py b/src/dateutil/parser/_parser.py
new file mode 100644
index 0000000..37d1663
--- /dev/null
+++ b/src/dateutil/parser/_parser.py
@@ -0,0 +1,1613 @@
+# -*- coding: utf-8 -*-
+"""
+This module offers a generic date/time string parser which is able to parse
+most known formats to represent a date and/or time.
+
+This module attempts to be forgiving with regards to unlikely input formats,
+returning a datetime object even for dates which are ambiguous. If an element
+of a date/time stamp is omitted, the following rules are applied:
+
+- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
+  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
+  specified.
+- If a time zone is omitted, a timezone-naive datetime is returned.
+
+If any other elements are missing, they are taken from the
+:class:`datetime.datetime` object passed to the parameter ``default``. If this
+results in a day number exceeding the valid number of days per month, the
+value falls back to the end of the month.
+
+Additional resources about date/time string formats can be found below:
+
+- `A summary of the international standard date and time notation
+  <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
+- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
+- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
+- `CPAN ParseDate module
+  <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
+- `Java SimpleDateFormat Class
+  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
+"""
+from __future__ import unicode_literals
+
+import datetime
+import re
+import string
+import time
+import warnings
+
+from calendar import monthrange
+from io import StringIO
+
+import six
+from six import integer_types, text_type
+
+from decimal import Decimal
+
+from warnings import warn
+
+from .. import relativedelta
+from .. import tz
+
+__all__ = ["parse", "parserinfo", "ParserError"]
+
+
+# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
+# making public and/or figuring out if there is something we can
+# take off their plate.
+class _timelex(object):
+    # Fractional seconds are sometimes split by a comma
+    _split_decimal = re.compile("([.,])")
+
+    def __init__(self, instream):
+        if isinstance(instream, (bytes, bytearray)):
+            instream = instream.decode()
+
+        if isinstance(instream, text_type):
+            instream = StringIO(instream)
+        elif getattr(instream, 'read', None) is None:
+            raise TypeError('Parser must be a string or character stream, not '
+                            '{itype}'.format(itype=instream.__class__.__name__))
+
+        self.instream = instream
+        self.charstack = []
+        self.tokenstack = []
+        self.eof = False
+
+    def get_token(self):
+        """
+        This function breaks the time string into lexical units (tokens), which
+        can be parsed by the parser. Lexical units are demarcated by changes in
+        the character set, so any continuous string of letters is considered
+        one unit, any continuous string of numbers is considered one unit.
+
+        The main complication arises from the fact that dots ('.') can be used
+        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
+        "4:30:21.447"). As such, it is necessary to read the full context of
+        any dot-separated strings before breaking it into tokens; as such, this
+        function maintains a "token stack", for when the ambiguous context
+        demands that multiple tokens be parsed at once.
+        """
+        if self.tokenstack:
+            return self.tokenstack.pop(0)
+
+        seenletters = False
+        token = None
+        state = None
+
+        while not self.eof:
+            # We only realize that we've reached the end of a token when we
+            # find a character that's not part of the current token - since
+            # that character may be part of the next token, it's stored in the
+            # charstack.
+            if self.charstack:
+                nextchar = self.charstack.pop(0)
+            else:
+                nextchar = self.instream.read(1)
+                while nextchar == '\x00':
+                    nextchar = self.instream.read(1)
+
+            if not nextchar:
+                self.eof = True
+                break
+            elif not state:
+                # First character of the token - determines if we're starting
+                # to parse a word, a number or something else.
+                token = nextchar
+                if self.isword(nextchar):
+                    state = 'a'
+                elif self.isnum(nextchar):
+                    state = '0'
+                elif self.isspace(nextchar):
+                    token = ' '
+                    break  # emit token
+                else:
+                    break  # emit token
+            elif state == 'a':
+                # If we've already started reading a word, we keep reading
+                # letters until we find something that's not part of a word.
+                seenletters = True
+                if self.isword(nextchar):
+                    token += nextchar
+                elif nextchar == '.':
+                    token += nextchar
+                    state = 'a.'
+                else:
+                    self.charstack.append(nextchar)
+                    break  # emit token
+            elif state == '0':
+                # If we've already started reading a number, we keep reading
+                # numbers until we find something that doesn't fit.
+                if self.isnum(nextchar):
+                    token += nextchar
+                elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
+                    token += nextchar
+                    state = '0.'
+                else:
+                    self.charstack.append(nextchar)
+                    break  # emit token
+            elif state == 'a.':
+                # If we've seen some letters and a dot separator, continue
+                # parsing, and the tokens will be broken up later.
+                seenletters = True
+                if nextchar == '.' or self.isword(nextchar):
+                    token += nextchar
+                elif self.isnum(nextchar) and token[-1] == '.':
+                    token += nextchar
+                    state = '0.'
+                else:
+                    self.charstack.append(nextchar)
+                    break  # emit token
+            elif state == '0.':
+                # If we've seen at least one dot separator, keep going, we'll
+                # break up the tokens later.
+                if nextchar == '.' or self.isnum(nextchar):
+                    token += nextchar
+                elif self.isword(nextchar) and token[-1] == '.':
+                    token += nextchar
+                    state = 'a.'
+                else:
+                    self.charstack.append(nextchar)
+                    break  # emit token
+
+        if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
+                                       token[-1] in '.,')):
+            l = self._split_decimal.split(token)
+            token = l[0]
+            for tok in l[1:]:
+                if tok:
+                    self.tokenstack.append(tok)
+
+        if state == '0.' and token.count('.') == 0:
+            token = token.replace(',', '.')
+
+        return token
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        token = self.get_token()
+        if token is None:
+            raise StopIteration
+
+        return token
+
+    def next(self):
+        return self.__next__()  # Python 2.x support
+
+    @classmethod
+    def split(cls, s):
+        return list(cls(s))
+
+    @classmethod
+    def isword(cls, nextchar):
+        """ Whether or not the next character is part of a word """
+        return nextchar.isalpha()
+
+    @classmethod
+    def isnum(cls, nextchar):
+        """ Whether the next character is part of a number """
+        return nextchar.isdigit()
+
+    @classmethod
+    def isspace(cls, nextchar):
+        """ Whether the next character is whitespace """
+        return nextchar.isspace()
+
+
+class _resultbase(object):
+
+    def __init__(self):
+        for attr in self.__slots__:
+            setattr(self, attr, None)
+
+    def _repr(self, classname):
+        l = []
+        for attr in self.__slots__:
+            value = getattr(self, attr)
+            if value is not None:
+                l.append("%s=%s" % (attr, repr(value)))
+        return "%s(%s)" % (classname, ", ".join(l))
+
+    def __len__(self):
+        return (sum(getattr(self, attr) is not None
+                    for attr in self.__slots__))
+
+    def __repr__(self):
+        return self._repr(self.__class__.__name__)
+
+
+class parserinfo(object):
+    """
+    Class which handles what inputs are accepted. Subclass this to customize
+    the language and acceptable values for each parameter.
+
+    :param dayfirst:
+        Whether to interpret the first value in an ambiguous 3-integer date
+        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
+        ``yearfirst`` is set to ``True``, this distinguishes between YDM
+        and YMD. Default is ``False``.
+
+    :param yearfirst:
+        Whether to interpret the first value in an ambiguous 3-integer date
+        (e.g. 01/05/09) as the year. If ``True``, the first number is taken
+        to be the year, otherwise the last number is taken to be the year.
+        Default is ``False``.
+    """
+
+    # m from a.m/p.m, t from ISO T separator
+    JUMP = [" ", ".", ",", ";", "-", "/", "'",
+            "at", "on", "and", "ad", "m", "t", "of",
+            "st", "nd", "rd", "th"]
+
+    WEEKDAYS = [("Mon", "Monday"),
+                ("Tue", "Tuesday"),     # TODO: "Tues"
+                ("Wed", "Wednesday"),
+                ("Thu", "Thursday"),    # TODO: "Thurs"
+                ("Fri", "Friday"),
+                ("Sat", "Saturday"),
+                ("Sun", "Sunday")]
+    MONTHS = [("Jan", "January"),
+              ("Feb", "February"),      # TODO: "Febr"
+              ("Mar", "March"),
+              ("Apr", "April"),
+              ("May", "May"),
+              ("Jun", "June"),
+              ("Jul", "July"),
+              ("Aug", "August"),
+              ("Sep", "Sept", "September"),
+              ("Oct", "October"),
+              ("Nov", "November"),
+              ("Dec", "December")]
+    HMS = [("h", "hour", "hours"),
+           ("m", "minute", "minutes"),
+           ("s", "second", "seconds")]
+    AMPM = [("am", "a"),
+            ("pm", "p")]
+    UTCZONE = ["UTC", "GMT", "Z", "z"]
+    PERTAIN = ["of"]
+    TZOFFSET = {}
+    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
+    #              "Anno Domini", "Year of Our Lord"]
+
+    def __init__(self, dayfirst=False, yearfirst=False):
+        self._jump = self._convert(self.JUMP)
+        self._weekdays = self._convert(self.WEEKDAYS)
+        self._months = self._convert(self.MONTHS)
+        self._hms = self._convert(self.HMS)
+        self._ampm = self._convert(self.AMPM)
+        self._utczone = self._convert(self.UTCZONE)
+        self._pertain = self._convert(self.PERTAIN)
+
+        self.dayfirst = dayfirst
+        self.yearfirst = yearfirst
+
+        self._year = time.localtime().tm_year
+        self._century = self._year // 100 * 100
+
+    def _convert(self, lst):
+        dct = {}
+        for i, v in enumerate(lst):
+            if isinstance(v, tuple):
+                for v in v:
+                    dct[v.lower()] = i
+            else:
+                dct[v.lower()] = i
+        return dct
+
+    def jump(self, name):
+        return name.lower() in self._jump
+
+    def weekday(self, name):
+        try:
+            return self._weekdays[name.lower()]
+        except KeyError:
+            pass
+        return None
+
+    def month(self, name):
+        try:
+            return self._months[name.lower()] + 1
+        except KeyError:
+            pass
+        return None
+
+    def hms(self, name):
+        try:
+            return self._hms[name.lower()]
+        except KeyError:
+            return None
+
+    def ampm(self, name):
+        try:
+            return self._ampm[name.lower()]
+        except KeyError:
+            return None
+
+    def pertain(self, name):
+        return name.lower() in self._pertain
+
+    def utczone(self, name):
+        return name.lower() in self._utczone
+
+    def tzoffset(self, name):
+        if name in self._utczone:
+            return 0
+
+        return self.TZOFFSET.get(name)
+
+    def convertyear(self, year, century_specified=False):
+        """
+        Converts two-digit years to year within [-50, 49]
+        range of self._year (current local time)
+        """
+
+        # Function contract is that the year is always positive
+        assert year >= 0
+
+        if year < 100 and not century_specified:
+            # assume current century to start
+            year += self._century
+
+            if year >= self._year + 50:  # if too far in future
+                year -= 100
+            elif year < self._year - 50:  # if too far in past
+                year += 100
+
+        return year
+
+    def validate(self, res):
+        # move to info
+        if res.year is not None:
+            res.year = self.convertyear(res.year, res.century_specified)
+
+        if ((res.tzoffset == 0 and not res.tzname) or
+             (res.tzname == 'Z' or res.tzname == 'z')):
+            res.tzname = "UTC"
+            res.tzoffset = 0
+        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
+            res.tzoffset = 0
+        return True
+
+
+class _ymd(list):
+    def __init__(self, *args, **kwargs):
+        super(self.__class__, self).__init__(*args, **kwargs)
+        self.century_specified = False
+        self.dstridx = None
+        self.mstridx = None
+        self.ystridx = None
+
+    @property
+    def has_year(self):
+        return self.ystridx is not None
+
+    @property
+    def has_month(self):
+        return self.mstridx is not None
+
+    @property
+    def has_day(self):
+        return self.dstridx is not None
+
+    def could_be_day(self, value):
+        if self.has_day:
+            return False
+        elif not self.has_month:
+            return 1 <= value <= 31
+        elif not self.has_year:
+            # Be permissive, assume leap year
+            month = self[self.mstridx]
+            return 1 <= value <= monthrange(2000, month)[1]
+        else:
+            month = self[self.mstridx]
+            year = self[self.ystridx]
+            return 1 <= value <= monthrange(year, month)[1]
+
+    def append(self, val, label=None):
+        if hasattr(val, '__len__'):
+            if val.isdigit() and len(val) > 2:
+                self.century_specified = True
+                if label not in [None, 'Y']:  # pragma: no cover
+                    raise ValueError(label)
+                label = 'Y'
+        elif val > 100:
+            self.century_specified = True
+            if label not in [None, 'Y']:  # pragma: no cover
+                raise ValueError(label)
+            label = 'Y'
+
+        super(self.__class__, self).append(int(val))
+
+        if label == 'M':
+            if self.has_month:
+                raise ValueError('Month is already set')
+            self.mstridx = len(self) - 1
+        elif label == 'D':
+            if self.has_day:
+                raise ValueError('Day is already set')
+            self.dstridx = len(self) - 1
+        elif label == 'Y':
+            if self.has_year:
+                raise ValueError('Year is already set')
+            self.ystridx = len(self) - 1
+
+    def _resolve_from_stridxs(self, strids):
+        """
+        Try to resolve the identities of year/month/day elements using
+        ystridx, mstridx, and dstridx, if enough of these are specified.
+        """
+        if len(self) == 3 and len(strids) == 2:
+            # we can back out the remaining stridx value
+            missing = [x for x in range(3) if x not in strids.values()]
+            key = [x for x in ['y', 'm', 'd'] if x not in strids]
+            assert len(missing) == len(key) == 1
+            key = key[0]
+            val = missing[0]
+            strids[key] = val
+
+        assert len(self) == len(strids)  # otherwise this should not be called
+        out = {key: self[strids[key]] for key in strids}
+        return (out.get('y'), out.get('m'), out.get('d'))
+
+    def resolve_ymd(self, yearfirst, dayfirst):
+        len_ymd = len(self)
+        year, month, day = (None, None, None)
+
+        strids = (('y', self.ystridx),
+                  ('m', self.mstridx),
+                  ('d', self.dstridx))
+
+        strids = {key: val for key, val in strids if val is not None}
+        if (len(self) == len(strids) > 0 or
+                (len(self) == 3 and len(strids) == 2)):
+            return self._resolve_from_stridxs(strids)
+
+        mstridx = self.mstridx
+
+        if len_ymd > 3:
+            raise ValueError("More than three YMD values")
+        elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
+            # One member, or two members with a month string
+            if mstridx is not None:
+                month = self[mstridx]
+                # since mstridx is 0 or 1, self[mstridx-1] always
+                # looks up the other element
+                other = self[mstridx - 1]
+            else:
+                other = self[0]
+
+            if len_ymd > 1 or mstridx is None:
+                if other > 31:
+                    year = other
+                else:
+                    day = other
+
+        elif len_ymd == 2:
+            # Two members with numbers
+            if self[0] > 31:
+                # 99-01
+                year, month = self
+            elif self[1] > 31:
+                # 01-99
+                month, year = self
+            elif dayfirst and self[1] <= 12:
+                # 13-01
+                day, month = self
+            else:
+                # 01-13
+                month, day = self
+
+        elif len_ymd == 3:
+            # Three members
+            if mstridx == 0:
+                if self[1] > 31:
+                    # Apr-2003-25
+                    month, year, day = self
+                else:
+                    month, day, year = self
+            elif mstridx == 1:
+                if self[0] > 31 or (yearfirst and self[2] <= 31):
+                    # 99-Jan-01
+                    year, month, day = self
+                else:
+                    # 01-Jan-01
+                    # Give precedence to day-first, since
+                    # two-digit years is usually hand-written.
+                    day, month, year = self
+
+            elif mstridx == 2:
+                # WTF!?
+                if self[1] > 31:
+                    # 01-99-Jan
+                    day, year, month = self
+                else:
+                    # 99-01-Jan
+                    year, day, month = self
+
+            else:
+                if (self[0] > 31 or
+                    self.ystridx == 0 or
+                        (yearfirst and self[1] <= 12 and self[2] <= 31)):
+                    # 99-01-01
+                    if dayfirst and self[2] <= 12:
+                        year, day, month = self
+                    else:
+                        year, month, day = self
+                elif self[0] > 12 or (dayfirst and self[1] <= 12):
+                    # 13-01-01
+                    day, month, year = self
+                else:
+                    # 01-13-01
+                    month, day, year = self
+
+        return year, month, day
+
+
+class parser(object):
+    def __init__(self, info=None):
+        self.info = info or parserinfo()
+
+    def parse(self, timestr, default=None,
+              ignoretz=False, tzinfos=None, **kwargs):
+        """
+        Parse the date/time string into a :class:`datetime.datetime` object.
+
+        :param timestr:
+            Any date/time string using the supported formats.
+
+        :param default:
+            The default datetime object, if this is a datetime object and not
+            ``None``, elements specified in ``timestr`` replace elements in the
+            default object.
+
+        :param ignoretz:
+            If set ``True``, time zones in parsed strings are ignored and a
+            naive :class:`datetime.datetime` object is returned.
+
+        :param tzinfos:
+            Additional time zone names / aliases which may be present in the
+            string. This argument maps time zone names (and optionally offsets
+            from those time zones) to time zones. This parameter can be a
+            dictionary with timezone aliases mapping time zone names to time
+            zones or a function taking two parameters (``tzname`` and
+            ``tzoffset``) and returning a time zone.
+
+            The timezones to which the names are mapped can be an integer
+            offset from UTC in seconds or a :class:`tzinfo` object.
+
+            .. doctest::
+               :options: +NORMALIZE_WHITESPACE
+
+                >>> from dateutil.parser import parse
+                >>> from dateutil.tz import gettz
+                >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
+                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
+                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
+                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
+                datetime.datetime(2012, 1, 19, 17, 21,
+                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
+
+            This parameter is ignored if ``ignoretz`` is set.
+
+        :param \\*\\*kwargs:
+            Keyword arguments as passed to ``_parse()``.
+
+        :return:
+            Returns a :class:`datetime.datetime` object or, if the
+            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
+            first element being a :class:`datetime.datetime` object, the second
+            a tuple containing the fuzzy tokens.
+
+        :raises ParserError:
+            Raised for invalid or unknown string format, if the provided
+            :class:`tzinfo` is not in a valid format, or if an invalid date
+            would be created.
+
+        :raises TypeError:
+            Raised for non-string or character stream input.
+
+        :raises OverflowError:
+            Raised if the parsed date exceeds the largest valid C integer on
+            your system.
+        """
+
+        if default is None:
+            default = datetime.datetime.now().replace(hour=0, minute=0,
+                                                      second=0, microsecond=0)
+
+        res, skipped_tokens = self._parse(timestr, **kwargs)
+
+        if res is None:
+            raise ParserError("Unknown string format: %s", timestr)
+
+        if len(res) == 0:
+            raise ParserError("String does not contain a date: %s", timestr)
+
+        try:
+            ret = self._build_naive(res, default)
+        except ValueError as e:
+            six.raise_from(ParserError(str(e) + ": %s", timestr), e)
+
+        if not ignoretz:
+            ret = self._build_tzaware(ret, res, tzinfos)
+
+        if kwargs.get('fuzzy_with_tokens', False):
+            return ret, skipped_tokens
+        else:
+            return ret
+
+    class _result(_resultbase):
+        __slots__ = ["year", "month", "day", "weekday",
+                     "hour", "minute", "second", "microsecond",
+                     "tzname", "tzoffset", "ampm","any_unused_tokens"]
+
+    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
+               fuzzy_with_tokens=False):
+        """
+        Private method which performs the heavy lifting of parsing, called from
+        ``parse()``, which passes on its ``kwargs`` to this function.
+
+        :param timestr:
+            The string to parse.
+
+        :param dayfirst:
+            Whether to interpret the first value in an ambiguous 3-integer date
+            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
+            ``yearfirst`` is set to ``True``, this distinguishes between YDM
+            and YMD. If set to ``None``, this value is retrieved from the
+            current :class:`parserinfo` object (which itself defaults to
+            ``False``).
+
+        :param yearfirst:
+            Whether to interpret the first value in an ambiguous 3-integer date
+            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
+            to be the year, otherwise the last number is taken to be the year.
+            If this is set to ``None``, the value is retrieved from the current
+            :class:`parserinfo` object (which itself defaults to ``False``).
+
+        :param fuzzy:
+            Whether to allow fuzzy parsing, allowing for string like "Today is
+            January 1, 2047 at 8:21:00AM".
+
+        :param fuzzy_with_tokens:
+            If ``True``, ``fuzzy`` is automatically set to True, and the parser
+            will return a tuple where the first element is the parsed
+            :class:`datetime.datetime` datetimestamp and the second element is
+            a tuple containing the portions of the string which were ignored:
+
+            .. doctest::
+
+                >>> from dateutil.parser import parse
+                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
+                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
+
+        """
+        if fuzzy_with_tokens:
+            fuzzy = True
+
+        info = self.info
+
+        if dayfirst is None:
+            dayfirst = info.dayfirst
+
+        if yearfirst is None:
+            yearfirst = info.yearfirst
+
+        res = self._result()
+        l = _timelex.split(timestr)         # Splits the timestr into tokens
+
+        skipped_idxs = []
+
+        # year/month/day list
+        ymd = _ymd()
+
+        len_l = len(l)
+        i = 0
+        try:
+            while i < len_l:
+
+                # Check if it's a number
+                value_repr = l[i]
+                try:
+                    value = float(value_repr)
+                except ValueError:
+                    value = None
+
+                if value is not None:
+                    # Numeric token
+                    i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
+
+                # Check weekday
+                elif info.weekday(l[i]) is not None:
+                    value = info.weekday(l[i])
+                    res.weekday = value
+
+                # Check month name
+                elif info.month(l[i]) is not None:
+                    value = info.month(l[i])
+                    ymd.append(value, 'M')
+
+                    if i + 1 < len_l:
+                        if l[i + 1] in ('-', '/'):
+                            # Jan-01[-99]
+                            sep = l[i + 1]
+                            ymd.append(l[i + 2])
+
+                            if i + 3 < len_l and l[i + 3] == sep:
+                                # Jan-01-99
+                                ymd.append(l[i + 4])
+                                i += 2
+
+                            i += 2
+
+                        elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
+                              info.pertain(l[i + 2])):
+                            # Jan of 01
+                            # In this case, 01 is clearly year
+                            if l[i + 4].isdigit():
+                                # Convert it here to become unambiguous
+                                value = int(l[i + 4])
+                                year = str(info.convertyear(value))
+                                ymd.append(year, 'Y')
+                            else:
+                                # Wrong guess
+                                pass
+                                # TODO: not hit in tests
+                            i += 4
+
+                # Check am/pm
+                elif info.ampm(l[i]) is not None:
+                    value = info.ampm(l[i])
+                    val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
+
+                    if val_is_ampm:
+                        res.hour = self._adjust_ampm(res.hour, value)
+                        res.ampm = value
+
+                    elif fuzzy:
+                        skipped_idxs.append(i)
+
+                # Check for a timezone name
+                elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
+                    res.tzname = l[i]
+                    res.tzoffset = info.tzoffset(res.tzname)
+
+                    # Check for something like GMT+3, or BRST+3. Notice
+                    # that it doesn't mean "I am 3 hours after GMT", but
+                    # "my time +3 is GMT". If found, we reverse the
+                    # logic so that timezone parsing code will get it
+                    # right.
+                    if i + 1 < len_l and l[i + 1] in ('+', '-'):
+                        l[i + 1] = ('+', '-')[l[i + 1] == '+']
+                        res.tzoffset = None
+                        if info.utczone(res.tzname):
+                            # With something like GMT+3, the timezone
+                            # is *not* GMT.
+                            res.tzname = None
+
+                # Check for a numbered timezone
+                elif res.hour is not None and l[i] in ('+', '-'):
+                    signal = (-1, 1)[l[i] == '+']
+                    len_li = len(l[i + 1])
+
+                    # TODO: check that l[i + 1] is integer?
+                    if len_li == 4:
+                        # -0300
+                        hour_offset = int(l[i + 1][:2])
+                        min_offset = int(l[i + 1][2:])
+                    elif i + 2 < len_l and l[i + 2] == ':':
+                        # -03:00
+                        hour_offset = int(l[i + 1])
+                        min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
+                        i += 2
+                    elif len_li <= 2:
+                        # -[0]3
+                        hour_offset = int(l[i + 1][:2])
+                        min_offset = 0
+                    else:
+                        raise ValueError(timestr)
+
+                    res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
+
+                    # Look for a timezone name between parenthesis
+                    if (i + 5 < len_l and
+                            info.jump(l[i + 2]) and l[i + 3] == '(' and
+                            l[i + 5] == ')' and
+                            3 <= len(l[i + 4]) and
+                            self._could_be_tzname(res.hour, res.tzname,
+                                                  None, l[i + 4])):
+                        # -0300 (BRST)
+                        res.tzname = l[i + 4]
+                        i += 4
+
+                    i += 1
+
+                # Check jumps
+                elif not (info.jump(l[i]) or fuzzy):
+                    raise ValueError(timestr)
+
+                else:
+                    skipped_idxs.append(i)
+                i += 1
+
+            # Process year/month/day
+            year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
+
+            res.century_specified = ymd.century_specified
+            res.year = year
+            res.month = month
+            res.day = day
+
+        except (IndexError, ValueError):
+            return None, None
+
+        if not info.validate(res):
+            return None, None
+
+        if fuzzy_with_tokens:
+            skipped_tokens = self._recombine_skipped(l, skipped_idxs)
+            return res, tuple(skipped_tokens)
+        else:
+            return res, None
+
+    def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
+        # Token is a number
+        value_repr = tokens[idx]
+        try:
+            value = self._to_decimal(value_repr)
+        except Exception as e:
+            six.raise_from(ValueError('Unknown numeric token'), e)
+
+        len_li = len(value_repr)
+
+        len_l = len(tokens)
+
+        if (len(ymd) == 3 and len_li in (2, 4) and
+            res.hour is None and
+            (idx + 1 >= len_l or
+             (tokens[idx + 1] != ':' and
+              info.hms(tokens[idx + 1]) is None))):
+            # 19990101T23[59]
+            s = tokens[idx]
+            res.hour = int(s[:2])
+
+            if len_li == 4:
+                res.minute = int(s[2:])
+
+        elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
+            # YYMMDD or HHMMSS[.ss]
+            s = tokens[idx]
+
+            if not ymd and '.' not in tokens[idx]:
+                ymd.append(s[:2])
+                ymd.append(s[2:4])
+                ymd.append(s[4:])
+            else:
+                # 19990101T235959[.59]
+
+                # TODO: Check if res attributes already set.
+                res.hour = int(s[:2])
+                res.minute = int(s[2:4])
+                res.second, res.microsecond = self._parsems(s[4:])
+
+        elif len_li in (8, 12, 14):
+            # YYYYMMDD
+            s = tokens[idx]
+            ymd.append(s[:4], 'Y')
+            ymd.append(s[4:6])
+            ymd.append(s[6:8])
+
+            if len_li > 8:
+                res.hour = int(s[8:10])
+                res.minute = int(s[10:12])
+
+                if len_li > 12:
+                    res.second = int(s[12:])
+
+        elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
+            # HH[ ]h or MM[ ]m or SS[.ss][ ]s
+            hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
+            (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
+            if hms is not None:
+                # TODO: checking that hour/minute/second are not
+                # already set?
+                self._assign_hms(res, value_repr, hms)
+
+        elif idx + 2 < len_l and tokens[idx + 1] == ':':
+            # HH:MM[:SS[.ss]]
+            res.hour = int(value)
+            value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
+            (res.minute, res.second) = self._parse_min_sec(value)
+
+            if idx + 4 < len_l and tokens[idx + 3] == ':':
+                res.second, res.microsecond = self._parsems(tokens[idx + 4])
+
+                idx += 2
+
+            idx += 2
+
+        elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
+            sep = tokens[idx + 1]
+            ymd.append(value_repr)
+
+            if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
+                if tokens[idx + 2].isdigit():
+                    # 01-01[-01]
+                    ymd.append(tokens[idx + 2])
+                else:
+                    # 01-Jan[-01]
+                    value = info.month(tokens[idx + 2])
+
+                    if value is not None:
+                        ymd.append(value, 'M')
+                    else:
+                        raise ValueError()
+
+                if idx + 3 < len_l and tokens[idx + 3] == sep:
+                    # We have three members
+                    value = info.month(tokens[idx + 4])
+
+                    if value is not None:
+                        ymd.append(value, 'M')
+                    else:
+                        ymd.append(tokens[idx + 4])
+                    idx += 2
+
+                idx += 1
+            idx += 1
+
+        elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
+            if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
+                # 12 am
+                hour = int(value)
+                res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
+                idx += 1
+            else:
+                # Year, month or day
+                ymd.append(value)
+            idx += 1
+
+        elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
+            # 12am
+            hour = int(value)
+            res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
+            idx += 1
+
+        elif ymd.could_be_day(value):
+            ymd.append(value)
+
+        elif not fuzzy:
+            raise ValueError()
+
+        return idx
+
+    def _find_hms_idx(self, idx, tokens, info, allow_jump):
+        len_l = len(tokens)
+
+        if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
+            # There is an "h", "m", or "s" label following this token.  We take
+            # assign the upcoming label to the current token.
+            # e.g. the "12" in 12h"
+            hms_idx = idx + 1
+
+        elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
+              info.hms(tokens[idx+2]) is not None):
+            # There is a space and then an "h", "m", or "s" label.
+            # e.g. the "12" in "12 h"
+            hms_idx = idx + 2
+
+        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
+            # There is a "h", "m", or "s" preceding this token.  Since neither
+            # of the previous cases was hit, there is no label following this
+            # token, so we use the previous label.
+            # e.g. the "04" in "12h04"
+            hms_idx = idx-1
+
+        elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
+              info.hms(tokens[idx-2]) is not None):
+            # If we are looking at the final token, we allow for a
+            # backward-looking check to skip over a space.
+            # TODO: Are we sure this is the right condition here?
+            hms_idx = idx - 2
+
+        else:
+            hms_idx = None
+
+        return hms_idx
+
+    def _assign_hms(self, res, value_repr, hms):
+        # See GH issue #427, fixing float rounding
+        value = self._to_decimal(value_repr)
+
+        if hms == 0:
+            # Hour
+            res.hour = int(value)
+            if value % 1:
+                res.minute = int(60*(value % 1))
+
+        elif hms == 1:
+            (res.minute, res.second) = self._parse_min_sec(value)
+
+        elif hms == 2:
+            (res.second, res.microsecond) = self._parsems(value_repr)
+
+    def _could_be_tzname(self, hour, tzname, tzoffset, token):
+        return (hour is not None and
+                tzname is None and
+                tzoffset is None and
+                len(token) <= 5 and
+                (all(x in string.ascii_uppercase for x in token)
+                 or token in self.info.UTCZONE))
+
+    def _ampm_valid(self, hour, ampm, fuzzy):
+        """
+        For fuzzy parsing, 'a' or 'am' (both valid English words)
+        may erroneously trigger the AM/PM flag. Deal with that
+        here.
+        """
+        val_is_ampm = True
+
+        # If there's already an AM/PM flag, this one isn't one.
+        if fuzzy and ampm is not None:
+            val_is_ampm = False
+
+        # If AM/PM is found and hour is not, raise a ValueError
+        if hour is None:
+            if fuzzy:
+                val_is_ampm = False
+            else:
+                raise ValueError('No hour specified with AM or PM flag.')
+        elif not 0 <= hour <= 12:
+            # If AM/PM is found, it's a 12 hour clock, so raise
+            # an error for invalid range
+            if fuzzy:
+                val_is_ampm = False
+            else:
+                raise ValueError('Invalid hour specified for 12-hour clock.')
+
+        return val_is_ampm
+
+    def _adjust_ampm(self, hour, ampm):
+        if hour < 12 and ampm == 1:
+            hour += 12
+        elif hour == 12 and ampm == 0:
+            hour = 0
+        return hour
+
+    def _parse_min_sec(self, value):
+        # TODO: Every usage of this function sets res.second to the return
+        # value. Are there any cases where second will be returned as None and
+        # we *don't* want to set res.second = None?
+        minute = int(value)
+        second = None
+
+        sec_remainder = value % 1
+        if sec_remainder:
+            second = int(60 * sec_remainder)
+        return (minute, second)
+
+    def _parse_hms(self, idx, tokens, info, hms_idx):
+        # TODO: Is this going to admit a lot of false-positives for when we
+        # just happen to have digits and "h", "m" or "s" characters in non-date
+        # text?  I guess hex hashes won't have that problem, but there's plenty
+        # of random junk out there.
+        if hms_idx is None:
+            hms = None
+            new_idx = idx
+        elif hms_idx > idx:
+            hms = info.hms(tokens[hms_idx])
+            new_idx = hms_idx
+        else:
+            # Looking backwards, increment one.
+            hms = info.hms(tokens[hms_idx]) + 1
+            new_idx = idx
+
+        return (new_idx, hms)
+
+    # ------------------------------------------------------------------
+    # Handling for individual tokens.  These are kept as methods instead
+    #  of functions for the sake of customizability via subclassing.
+
+    def _parsems(self, value):
+        """Parse a I[.F] seconds value into (seconds, microseconds)."""
+        if "." not in value:
+            return int(value), 0
+        else:
+            i, f = value.split(".")
+            return int(i), int(f.ljust(6, "0")[:6])
+
+    def _to_decimal(self, val):
+        try:
+            decimal_value = Decimal(val)
+            # See GH 662, edge case, infinite value should not be converted
+            #  via `_to_decimal`
+            if not decimal_value.is_finite():
+                raise ValueError("Converted decimal value is infinite or NaN")
+        except Exception as e:
+            msg = "Could not convert %s to decimal" % val
+            six.raise_from(ValueError(msg), e)
+        else:
+            return decimal_value
+
+    # ------------------------------------------------------------------
+    # Post-Parsing construction of datetime output.  These are kept as
+    #  methods instead of functions for the sake of customizability via
+    #  subclassing.
+
+    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
+        if callable(tzinfos):
+            tzdata = tzinfos(tzname, tzoffset)
+        else:
+            tzdata = tzinfos.get(tzname)
+        # handle case where tzinfo is paased an options that returns None
+        # eg tzinfos = {'BRST' : None}
+        if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
+            tzinfo = tzdata
+        elif isinstance(tzdata, text_type):
+            tzinfo = tz.tzstr(tzdata)
+        elif isinstance(tzdata, integer_types):
+            tzinfo = tz.tzoffset(tzname, tzdata)
+        else:
+            raise TypeError("Offset must be tzinfo subclass, tz string, "
+                            "or int offset.")
+        return tzinfo
+
+    def _build_tzaware(self, naive, res, tzinfos):
+        if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
+            tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
+            aware = naive.replace(tzinfo=tzinfo)
+            aware = self._assign_tzname(aware, res.tzname)
+
+        elif res.tzname and res.tzname in time.tzname:
+            aware = naive.replace(tzinfo=tz.tzlocal())
+
+            # Handle ambiguous local datetime
+            aware = self._assign_tzname(aware, res.tzname)
+
+            # This is mostly relevant for winter GMT zones parsed in the UK
+            if (aware.tzname() != res.tzname and
+                    res.tzname in self.info.UTCZONE):
+                aware = aware.replace(tzinfo=tz.UTC)
+
+        elif res.tzoffset == 0:
+            aware = naive.replace(tzinfo=tz.UTC)
+
+        elif res.tzoffset:
+            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
+
+        elif not res.tzname and not res.tzoffset:
+            # i.e. no timezone information was found.
+            aware = naive
+
+        elif res.tzname:
+            # tz-like string was parsed but we don't know what to do
+            # with it
+            warnings.warn("tzname {tzname} identified but not understood.  "
+                          "Pass `tzinfos` argument in order to correctly "
+                          "return a timezone-aware datetime.  In a future "
+                          "version, this will raise an "
+                          "exception.".format(tzname=res.tzname),
+                          category=UnknownTimezoneWarning)
+            aware = naive
+
+        return aware
+
+    def _build_naive(self, res, default):
+        repl = {}
+        for attr in ("year", "month", "day", "hour",
+                     "minute", "second", "microsecond"):
+            value = getattr(res, attr)
+            if value is not None:
+                repl[attr] = value
+
+        if 'day' not in repl:
+            # If the default day exceeds the last day of the month, fall back
+            # to the end of the month.
+            cyear = default.year if res.year is None else res.year
+            cmonth = default.month if res.month is None else res.month
+            cday = default.day if res.day is None else res.day
+
+            if cday > monthrange(cyear, cmonth)[1]:
+                repl['day'] = monthrange(cyear, cmonth)[1]
+
+        naive = default.replace(**repl)
+
+        if res.weekday is not None and not res.day:
+            naive = naive + relativedelta.relativedelta(weekday=res.weekday)
+
+        return naive
+
+    def _assign_tzname(self, dt, tzname):
+        if dt.tzname() != tzname:
+            new_dt = tz.enfold(dt, fold=1)
+            if new_dt.tzname() == tzname:
+                return new_dt
+
+        return dt
+
+    def _recombine_skipped(self, tokens, skipped_idxs):
+        """
+        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
+        >>> skipped_idxs = [0, 1, 2, 5]
+        >>> _recombine_skipped(tokens, skipped_idxs)
+        ["foo bar", "baz"]
+        """
+        skipped_tokens = []
+        for i, idx in enumerate(sorted(skipped_idxs)):
+            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
+                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
+            else:
+                skipped_tokens.append(tokens[idx])
+
+        return skipped_tokens
+
+
+DEFAULTPARSER = parser()
+
+
+def parse(timestr, parserinfo=None, **kwargs):
+    """
+
+    Parse a string in one of the supported formats, using the
+    ``parserinfo`` parameters.
+
+    :param timestr:
+        A string containing a date/time stamp.
+
+    :param parserinfo:
+        A :class:`parserinfo` object containing parameters for the parser.
+        If ``None``, the default arguments to the :class:`parserinfo`
+        constructor are used.
+
+    The ``**kwargs`` parameter takes the following keyword arguments:
+
+    :param default:
+        The default datetime object, if this is a datetime object and not
+        ``None``, elements specified in ``timestr`` replace elements in the
+        default object.
+
+    :param ignoretz:
+        If set ``True``, time zones in parsed strings are ignored and a naive
+        :class:`datetime` object is returned.
+
+    :param tzinfos:
+        Additional time zone names / aliases which may be present in the
+        string. This argument maps time zone names (and optionally offsets
+        from those time zones) to time zones. This parameter can be a
+        dictionary with timezone aliases mapping time zone names to time
+        zones or a function taking two parameters (``tzname`` and
+        ``tzoffset``) and returning a time zone.
+
+        The timezones to which the names are mapped can be an integer
+        offset from UTC in seconds or a :class:`tzinfo` object.
+
+        .. doctest::
+           :options: +NORMALIZE_WHITESPACE
+
+            >>> from dateutil.parser import parse
+            >>> from dateutil.tz import gettz
+            >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
+            >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
+            datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
+            >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
+            datetime.datetime(2012, 1, 19, 17, 21,
+                              tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
+
+        This parameter is ignored if ``ignoretz`` is set.
+
+    :param dayfirst:
+        Whether to interpret the first value in an ambiguous 3-integer date
+        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
+        ``yearfirst`` is set to ``True``, this distinguishes between YDM and
+        YMD. If set to ``None``, this value is retrieved from the current
+        :class:`parserinfo` object (which itself defaults to ``False``).
+
+    :param yearfirst:
+        Whether to interpret the first value in an ambiguous 3-integer date
+        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
+        be the year, otherwise the last number is taken to be the year. If
+        this is set to ``None``, the value is retrieved from the current
+        :class:`parserinfo` object (which itself defaults to ``False``).
+
+    :param fuzzy:
+        Whether to allow fuzzy parsing, allowing for string like "Today is
+        January 1, 2047 at 8:21:00AM".
+
+    :param fuzzy_with_tokens:
+        If ``True``, ``fuzzy`` is automatically set to True, and the parser
+        will return a tuple where the first element is the parsed
+        :class:`datetime.datetime` datetimestamp and the second element is
+        a tuple containing the portions of the string which were ignored:
+
+        .. doctest::
+
+            >>> from dateutil.parser import parse
+            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
+            (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
+
+    :return:
+        Returns a :class:`datetime.datetime` object or, if the
+        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
+        first element being a :class:`datetime.datetime` object, the second
+        a tuple containing the fuzzy tokens.
+
+    :raises ParserError:
+        Raised for invalid or unknown string formats, if the provided
+        :class:`tzinfo` is not in a valid format, or if an invalid date would
+        be created.
+
+    :raises OverflowError:
+        Raised if the parsed date exceeds the largest valid C integer on
+        your system.
+    """
+    if parserinfo:
+        return parser(parserinfo).parse(timestr, **kwargs)
+    else:
+        return DEFAULTPARSER.parse(timestr, **kwargs)
+
+
+class _tzparser(object):
+
+    class _result(_resultbase):
+
+        __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
+                     "start", "end"]
+
+        class _attr(_resultbase):
+            __slots__ = ["month", "week", "weekday",
+                         "yday", "jyday", "day", "time"]
+
+        def __repr__(self):
+            return self._repr("")
+
+        def __init__(self):
+            _resultbase.__init__(self)
+            self.start = self._attr()
+            self.end = self._attr()
+
+    def parse(self, tzstr):
+        res = self._result()
+        l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
+        used_idxs = list()
+        try:
+
+            len_l = len(l)
+
+            i = 0
+            while i < len_l:
+                # BRST+3[BRDT[+2]]
+                j = i
+                while j < len_l and not [x for x in l[j]
+                                         if x in "0123456789:,-+"]:
+                    j += 1
+                if j != i:
+                    if not res.stdabbr:
+                        offattr = "stdoffset"
+                        res.stdabbr = "".join(l[i:j])
+                    else:
+                        offattr = "dstoffset"
+                        res.dstabbr = "".join(l[i:j])
+
+                    for ii in range(j):
+                        used_idxs.append(ii)
+                    i = j
+                    if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
+                                       "0123456789")):
+                        if l[i] in ('+', '-'):
+                            # Yes, that's right.  See the TZ variable
+                            # documentation.
+                            signal = (1, -1)[l[i] == '+']
+                            used_idxs.append(i)
+                            i += 1
+                        else:
+                            signal = -1
+                        len_li = len(l[i])
+                        if len_li == 4:
+                            # -0300
+                            setattr(res, offattr, (int(l[i][:2]) * 3600 +
+                                                   int(l[i][2:]) * 60) * signal)
+                        elif i + 1 < len_l and l[i + 1] == ':':
+                            # -03:00
+                            setattr(res, offattr,
+                                    (int(l[i]) * 3600 +
+                                     int(l[i + 2]) * 60) * signal)
+                            used_idxs.append(i)
+                            i += 2
+                        elif len_li <= 2:
+                            # -[0]3
+                            setattr(res, offattr,
+                                    int(l[i][:2]) * 3600 * signal)
+                        else:
+                            return None
+                        used_idxs.append(i)
+                        i += 1
+                    if res.dstabbr:
+                        break
+                else:
+                    break
+
+
+            if i < len_l:
+                for j in range(i, len_l):
+                    if l[j] == ';':
+                        l[j] = ','
+
+                assert l[i] == ','
+
+                i += 1
+
+            if i >= len_l:
+                pass
+            elif (8 <= l.count(',') <= 9 and
+                  not [y for x in l[i:] if x != ','
+                       for y in x if y not in "0123456789+-"]):
+                # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
+                for x in (res.start, res.end):
+                    x.month = int(l[i])
+                    used_idxs.append(i)
+                    i += 2
+                    if l[i] == '-':
+                        value = int(l[i + 1]) * -1
+                        used_idxs.append(i)
+                        i += 1
+                    else:
+                        value = int(l[i])
+                    used_idxs.append(i)
+                    i += 2
+                    if value:
+                        x.week = value
+                        x.weekday = (int(l[i]) - 1) % 7
+                    else:
+                        x.day = int(l[i])
+                    used_idxs.append(i)
+                    i += 2
+                    x.time = int(l[i])
+                    used_idxs.append(i)
+                    i += 2
+                if i < len_l:
+                    if l[i] in ('-', '+'):
+                        signal = (-1, 1)[l[i] == "+"]
+                        used_idxs.append(i)
+                        i += 1
+                    else:
+                        signal = 1
+                    used_idxs.append(i)
+                    res.dstoffset = (res.stdoffset + int(l[i]) * signal)
+
+                # This was a made-up format that is not in normal use
+                warn(('Parsed time zone "%s"' % tzstr) +
+                     'is in a non-standard dateutil-specific format, which ' +
+                     'is now deprecated; support for parsing this format ' +
+                     'will be removed in future versions. It is recommended ' +
+                     'that you switch to a standard format like the GNU ' +
+                     'TZ variable format.', tz.DeprecatedTzFormatWarning)
+            elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
+                  not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
+                                                     '.', '-', ':')
+                       for y in x if y not in "0123456789"]):
+                for x in (res.start, res.end):
+                    if l[i] == 'J':
+                        # non-leap year day (1 based)
+                        used_idxs.append(i)
+                        i += 1
+                        x.jyday = int(l[i])
+                    elif l[i] == 'M':
+                        # month[-.]week[-.]weekday
+                        used_idxs.append(i)
+                        i += 1
+                        x.month = int(l[i])
+                        used_idxs.append(i)
+                        i += 1
+                        assert l[i] in ('-', '.')
+                        used_idxs.append(i)
+                        i += 1
+                        x.week = int(l[i])
+                        if x.week == 5:
+                            x.week = -1
+                        used_idxs.append(i)
+                        i += 1
+                        assert l[i] in ('-', '.')
+                        used_idxs.append(i)
+                        i += 1
+                        x.weekday = (int(l[i]) - 1) % 7
+                    else:
+                        # year day (zero based)
+                        x.yday = int(l[i]) + 1
+
+                    used_idxs.append(i)
+                    i += 1
+
+                    if i < len_l and l[i] == '/':
+                        used_idxs.append(i)
+                        i += 1
+                        # start time
+                        len_li = len(l[i])
+                        if len_li == 4:
+                            # -0300
+                            x.time = (int(l[i][:2]) * 3600 +
+                                      int(l[i][2:]) * 60)
+                        elif i + 1 < len_l and l[i + 1] == ':':
+                            # -03:00
+                            x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
+                            used_idxs.append(i)
+                            i += 2
+                            if i + 1 < len_l and l[i + 1] == ':':
+                                used_idxs.append(i)
+                                i += 2
+                                x.time += int(l[i])
+                        elif len_li <= 2:
+                            # -[0]3
+                            x.time = (int(l[i][:2]) * 3600)
+                        else:
+                            return None
+                        used_idxs.append(i)
+                        i += 1
+
+                    assert i == len_l or l[i] == ','
+
+                    i += 1
+
+                assert i >= len_l
+
+        except (IndexError, ValueError, AssertionError):
+            return None
+
+        unused_idxs = set(range(len_l)).difference(used_idxs)
+        res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
+        return res
+
+
+DEFAULTTZPARSER = _tzparser()
+
+
+def _parsetz(tzstr):
+    return DEFAULTTZPARSER.parse(tzstr)
+
+
+class ParserError(ValueError):
+    """Exception subclass used for any failure to parse a datetime string.
+
+    This is a subclass of :py:exc:`ValueError`, and should be raised any time
+    earlier versions of ``dateutil`` would have raised ``ValueError``.
+
+    .. versionadded:: 2.8.1
+    """
+    def __str__(self):
+        try:
+            return self.args[0] % self.args[1:]
+        except (TypeError, IndexError):
+            return super(ParserError, self).__str__()
+
+    def __repr__(self):
+        args = ", ".join("'%s'" % arg for arg in self.args)
+        return "%s(%s)" % (self.__class__.__name__, args)
+
+
+class UnknownTimezoneWarning(RuntimeWarning):
+    """Raised when the parser finds a timezone it cannot parse into a tzinfo.
+
+    .. versionadded:: 2.7.0
+    """
+# vim:ts=4:sw=4:et
diff --git a/src/dateutil/parser/isoparser.py b/src/dateutil/parser/isoparser.py
new file mode 100644
index 0000000..5d7bee3
--- /dev/null
+++ b/src/dateutil/parser/isoparser.py
@@ -0,0 +1,416 @@
+# -*- coding: utf-8 -*-
+"""
+This module offers a parser for ISO-8601 strings
+
+It is intended to support all valid date, time and datetime formats per the
+ISO-8601 specification.
+
+..versionadded:: 2.7.0
+"""
+from datetime import datetime, timedelta, time, date
+import calendar
+from dateutil import tz
+
+from functools import wraps
+
+import re
+import six
+
+__all__ = ["isoparse", "isoparser"]
+
+
+def _takes_ascii(f):
+    @wraps(f)
+    def func(self, str_in, *args, **kwargs):
+        # If it's a stream, read the whole thing
+        str_in = getattr(str_in, 'read', lambda: str_in)()
+
+        # If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
+        if isinstance(str_in, six.text_type):
+            # ASCII is the same in UTF-8
+            try:
+                str_in = str_in.encode('ascii')
+            except UnicodeEncodeError as e:
+                msg = 'ISO-8601 strings should contain only ASCII characters'
+                six.raise_from(ValueError(msg), e)
+
+        return f(self, str_in, *args, **kwargs)
+
+    return func
+
+
+class isoparser(object):
+    def __init__(self, sep=None):
+        """
+        :param sep:
+            A single character that separates date and time portions. If
+            ``None``, the parser will accept any single character.
+            For strict ISO-8601 adherence, pass ``'T'``.
+        """
+        if sep is not None:
+            if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
+                raise ValueError('Separator must be a single, non-numeric ' +
+                                 'ASCII character')
+
+            sep = sep.encode('ascii')
+
+        self._sep = sep
+
+    @_takes_ascii
+    def isoparse(self, dt_str):
+        """
+        Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
+
+        An ISO-8601 datetime string consists of a date portion, followed
+        optionally by a time portion - the date and time portions are separated
+        by a single character separator, which is ``T`` in the official
+        standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be
+        combined with a time portion.
+
+        Supported date formats are:
+
+        Common:
+
+        - ``YYYY``
+        - ``YYYY-MM`` or ``YYYYMM``
+        - ``YYYY-MM-DD`` or ``YYYYMMDD``
+
+        Uncommon:
+
+        - ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0)
+        - ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day
+
+        The ISO week and day numbering follows the same logic as
+        :func:`datetime.date.isocalendar`.
+
+        Supported time formats are:
+
+        - ``hh``
+        - ``hh:mm`` or ``hhmm``
+        - ``hh:mm:ss`` or ``hhmmss``
+        - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
+
+        Midnight is a special case for `hh`, as the standard supports both
+        00:00 and 24:00 as a representation. The decimal separator can be
+        either a dot or a comma.
+
+
+        .. caution::
+
+            Support for fractional components other than seconds is part of the
+            ISO-8601 standard, but is not currently implemented in this parser.
+
+        Supported time zone offset formats are:
+
+        - `Z` (UTC)
+        - `±HH:MM`
+        - `±HHMM`
+        - `±HH`
+
+        Offsets will be represented as :class:`dateutil.tz.tzoffset` objects,
+        with the exception of UTC, which will be represented as
+        :class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such
+        as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`.
+
+        :param dt_str:
+            A string or stream containing only an ISO-8601 datetime string
+
+        :return:
+            Returns a :class:`datetime.datetime` representing the string.
+            Unspecified components default to their lowest value.
+
+        .. warning::
+
+            As of version 2.7.0, the strictness of the parser should not be
+            considered a stable part of the contract. Any valid ISO-8601 string
+            that parses correctly with the default settings will continue to
+            parse correctly in future versions, but invalid strings that
+            currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not
+            guaranteed to continue failing in future versions if they encode
+            a valid date.
+
+        .. versionadded:: 2.7.0
+        """
+        components, pos = self._parse_isodate(dt_str)
+
+        if len(dt_str) > pos:
+            if self._sep is None or dt_str[pos:pos + 1] == self._sep:
+                components += self._parse_isotime(dt_str[pos + 1:])
+            else:
+                raise ValueError('String contains unknown ISO components')
+
+        if len(components) > 3 and components[3] == 24:
+            components[3] = 0
+            return datetime(*components) + timedelta(days=1)
+
+        return datetime(*components)
+
+    @_takes_ascii
+    def parse_isodate(self, datestr):
+        """
+        Parse the date portion of an ISO string.
+
+        :param datestr:
+            The string portion of an ISO string, without a separator
+
+        :return:
+            Returns a :class:`datetime.date` object
+        """
+        components, pos = self._parse_isodate(datestr)
+        if pos < len(datestr):
+            raise ValueError('String contains unknown ISO ' +
+                             'components: {!r}'.format(datestr.decode('ascii')))
+        return date(*components)
+
+    @_takes_ascii
+    def parse_isotime(self, timestr):
+        """
+        Parse the time portion of an ISO string.
+
+        :param timestr:
+            The time portion of an ISO string, without a separator
+
+        :return:
+            Returns a :class:`datetime.time` object
+        """
+        components = self._parse_isotime(timestr)
+        if components[0] == 24:
+            components[0] = 0
+        return time(*components)
+
+    @_takes_ascii
+    def parse_tzstr(self, tzstr, zero_as_utc=True):
+        """
+        Parse a valid ISO time zone string.
+
+        See :func:`isoparser.isoparse` for details on supported formats.
+
+        :param tzstr:
+            A string representing an ISO time zone offset
+
+        :param zero_as_utc:
+            Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones
+
+        :return:
+            Returns :class:`dateutil.tz.tzoffset` for offsets and
+            :class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is
+            specified) offsets equivalent to UTC.
+        """
+        return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
+
+    # Constants
+    _DATE_SEP = b'-'
+    _TIME_SEP = b':'
+    _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
+
+    def _parse_isodate(self, dt_str):
+        try:
+            return self._parse_isodate_common(dt_str)
+        except ValueError:
+            return self._parse_isodate_uncommon(dt_str)
+
+    def _parse_isodate_common(self, dt_str):
+        len_str = len(dt_str)
+        components = [1, 1, 1]
+
+        if len_str < 4:
+            raise ValueError('ISO string too short')
+
+        # Year
+        components[0] = int(dt_str[0:4])
+        pos = 4
+        if pos >= len_str:
+            return components, pos
+
+        has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
+        if has_sep:
+            pos += 1
+
+        # Month
+        if len_str - pos < 2:
+            raise ValueError('Invalid common month')
+
+        components[1] = int(dt_str[pos:pos + 2])
+        pos += 2
+
+        if pos >= len_str:
+            if has_sep:
+                return components, pos
+            else:
+                raise ValueError('Invalid ISO format')
+
+        if has_sep:
+            if dt_str[pos:pos + 1] != self._DATE_SEP:
+                raise ValueError('Invalid separator in ISO string')
+            pos += 1
+
+        # Day
+        if len_str - pos < 2:
+            raise ValueError('Invalid common day')
+        components[2] = int(dt_str[pos:pos + 2])
+        return components, pos + 2
+
+    def _parse_isodate_uncommon(self, dt_str):
+        if len(dt_str) < 4:
+            raise ValueError('ISO string too short')
+
+        # All ISO formats start with the year
+        year = int(dt_str[0:4])
+
+        has_sep = dt_str[4:5] == self._DATE_SEP
+
+        pos = 4 + has_sep       # Skip '-' if it's there
+        if dt_str[pos:pos + 1] == b'W':
+            # YYYY-?Www-?D?
+            pos += 1
+            weekno = int(dt_str[pos:pos + 2])
+            pos += 2
+
+            dayno = 1
+            if len(dt_str) > pos:
+                if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
+                    raise ValueError('Inconsistent use of dash separator')
+
+                pos += has_sep
+
+                dayno = int(dt_str[pos:pos + 1])
+                pos += 1
+
+            base_date = self._calculate_weekdate(year, weekno, dayno)
+        else:
+            # YYYYDDD or YYYY-DDD
+            if len(dt_str) - pos < 3:
+                raise ValueError('Invalid ordinal day')
+
+            ordinal_day = int(dt_str[pos:pos + 3])
+            pos += 3
+
+            if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
+                raise ValueError('Invalid ordinal day' +
+                                 ' {} for year {}'.format(ordinal_day, year))
+
+            base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)
+
+        components = [base_date.year, base_date.month, base_date.day]
+        return components, pos
+
+    def _calculate_weekdate(self, year, week, day):
+        """
+        Calculate the day of corresponding to the ISO year-week-day calendar.
+
+        This function is effectively the inverse of
+        :func:`datetime.date.isocalendar`.
+
+        :param year:
+            The year in the ISO calendar
+
+        :param week:
+            The week in the ISO calendar - range is [1, 53]
+
+        :param day:
+            The day in the ISO calendar - range is [1 (MON), 7 (SUN)]
+
+        :return:
+            Returns a :class:`datetime.date`
+        """
+        if not 0 < week < 54:
+            raise ValueError('Invalid week: {}'.format(week))
+
+        if not 0 < day < 8:     # Range is 1-7
+            raise ValueError('Invalid weekday: {}'.format(day))
+
+        # Get week 1 for the specific year:
+        jan_4 = date(year, 1, 4)   # Week 1 always has January 4th in it
+        week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)
+
+        # Now add the specific number of weeks and days to get what we want
+        week_offset = (week - 1) * 7 + (day - 1)
+        return week_1 + timedelta(days=week_offset)
+
+    def _parse_isotime(self, timestr):
+        len_str = len(timestr)
+        components = [0, 0, 0, 0, None]
+        pos = 0
+        comp = -1
+
+        if len_str < 2:
+            raise ValueError('ISO time too short')
+
+        has_sep = False
+
+        while pos < len_str and comp < 5:
+            comp += 1
+
+            if timestr[pos:pos + 1] in b'-+Zz':
+                # Detect time zone boundary
+                components[-1] = self._parse_tzstr(timestr[pos:])
+                pos = len_str
+                break
+
+            if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
+                has_sep = True
+                pos += 1
+            elif comp == 2 and has_sep:
+                if timestr[pos:pos+1] != self._TIME_SEP:
+                    raise ValueError('Inconsistent use of colon separator')
+                pos += 1
+
+            if comp < 3:
+                # Hour, minute, second
+                components[comp] = int(timestr[pos:pos + 2])
+                pos += 2
+
+            if comp == 3:
+                # Fraction of a second
+                frac = self._FRACTION_REGEX.match(timestr[pos:])
+                if not frac:
+                    continue
+
+                us_str = frac.group(1)[:6]  # Truncate to microseconds
+                components[comp] = int(us_str) * 10**(6 - len(us_str))
+                pos += len(frac.group())
+
+        if pos < len_str:
+            raise ValueError('Unused components in ISO string')
+
+        if components[0] == 24:
+            # Standard supports 00:00 and 24:00 as representations of midnight
+            if any(component != 0 for component in components[1:4]):
+                raise ValueError('Hour may only be 24 at 24:00:00.000')
+
+        return components
+
+    def _parse_tzstr(self, tzstr, zero_as_utc=True):
+        if tzstr == b'Z' or tzstr == b'z':
+            return tz.UTC
+
+        if len(tzstr) not in {3, 5, 6}:
+            raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
+
+        if tzstr[0:1] == b'-':
+            mult = -1
+        elif tzstr[0:1] == b'+':
+            mult = 1
+        else:
+            raise ValueError('Time zone offset requires sign')
+
+        hours = int(tzstr[1:3])
+        if len(tzstr) == 3:
+            minutes = 0
+        else:
+            minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
+
+        if zero_as_utc and hours == 0 and minutes == 0:
+            return tz.UTC
+        else:
+            if minutes > 59:
+                raise ValueError('Invalid minutes in time zone offset')
+
+            if hours > 23:
+                raise ValueError('Invalid hours in time zone offset')
+
+            return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)
+
+
+DEFAULT_ISOPARSER = isoparser()
+isoparse = DEFAULT_ISOPARSER.isoparse
author	Mario Corchero <mcorcherojim@bloomberg.net>	2021-07-16 11:06:47 +0200
committer	Mario Corchero <mcorcherojim@bloomberg.net>	2021-07-16 17:08:09 +0200
commit	da9ecd07a257cb5d1c7bfa08459b1ccfcabe6d23 (patch)
tree	20ce3bac3aaedb3695b299b243544b6284dc0011 /src/dateutil/parser
parent	b21555f3b6afd2a0c660293b21e19b32f1d4f50e (diff)
download	dateutil-git-da9ecd07a257cb5d1c7bfa08459b1ccfcabe6d23.tar.gz