summaryrefslogtreecommitdiff
path: root/pyparsing.py
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-08 20:54:30 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-08 20:54:30 -0500
commitd5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5 (patch)
tree61ef123efeb7f2d6ef6c4466f6e73f4f0eab5ec1 /pyparsing.py
parente30333f5eb4262069a8863e8fa276ecbcd364660 (diff)
downloadpyparsing-git-d5b06c46cb87c0491bf7a2c02ebdfe7aac0bbdd5.tar.gz
Add __diag__ namespace to enable diagnostic switches; add asKeyword optional arg for oneOf to emit Keyword expressions instead of Literals
Diffstat (limited to 'pyparsing.py')
-rw-r--r--pyparsing.py150
1 files changed, 122 insertions, 28 deletions
diff --git a/pyparsing.py b/pyparsing.py
index d41ceba..77617d2 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -157,11 +157,34 @@ __compat__.__doc__ = """
- collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
of results names when an And expression is nested within an Or or MatchFirst; set to
- True to enable bugfix to be released in pyparsing 2.4
+ True to enable bugfix released in pyparsing 2.3.0, or False to preserve
+ pre-2.3.0 handling of named results
"""
__compat__.collect_all_And_tokens = True
+__diag__ = SimpleNamespace()
+__diag__.__doc__ = """
+Diagnostic configuration
+ - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
+ name is defined on a MatchFirst or Or expression with one or more And subexpressions
+ (default=True) (only warns if __compat__.collect_all_And_tokens is False)
+ - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
+ name is defined on a containing expression with ungrouped subexpressions that also
+ have results names (default=True)
+ - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
+ with a results name, but has no contents defined (default=False)
+ - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
+ incorrectly called with multiple str arguments (default=True)
+ - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
+ calls to ParserElement.setName() (default=False)
+"""
+__diag__.warn_multiple_tokens_in_named_alternation = True
+__diag__.warn_ungrouped_named_tokens_in_collection = True
+__diag__.warn_name_set_on_empty_Forward = False
+__diag__.warn_on_multiple_string_args_to_oneof = True
+__diag__.enable_debug_on_named_expressions = False
+
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [ '__version__', '__versionTime__', '__author__', '__compat__',
@@ -1385,8 +1408,8 @@ class ParserElement(object):
"""
self.name = name
self.errmsg = "Expected " + self.name
- if hasattr(self,"exception"):
- self.exception.msg = self.errmsg
+ if __diag__.enable_debug_on_named_expressions:
+ self.setDebug()
return self
def setResultsName( self, name, listAllMatches=False ):
@@ -1410,6 +1433,9 @@ class ParserElement(object):
# equivalent form:
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
"""
+ return self._setResultsName(name, listAllMatches)
+
+ def _setResultsName(self, name, listAllMatches=False):
newself = self.copy()
if name.endswith("*"):
name = name[:-1]
@@ -2338,7 +2364,7 @@ class ParserElement(object):
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
"""
if name is not None:
- return self.setResultsName(name)
+ return self._setResultsName(name)
else:
return self.copy()
@@ -3843,6 +3869,20 @@ class ParseExpression(ParserElement):
ret.exprs = [e.copy() for e in self.exprs]
return ret
+ def _setResultsName(self, name, listAllMatches=False):
+ if __diag__.warn_ungrouped_named_tokens_in_collection:
+ for e in self.exprs:
+ if isinstance(e, ParserElement) and e.resultsName:
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
+ name,
+ type(self).__name__,
+ e.resultsName),
+ stacklevel=3)
+
+ return super(ParseExpression, self)._setResultsName(name, listAllMatches)
+
+
class And(ParseExpression):
"""
Requires all given :class:`ParseExpression` s to be found in the given order.
@@ -4052,6 +4092,17 @@ class Or(ParseExpression):
for e in self.exprs:
e.checkRecursion( subRecCheckList )
+ def _setResultsName(self, name, listAllMatches=False):
+ if (not __compat__.collect_all_And_tokens
+ and __diag__.warn_multiple_tokens_in_named_alternation):
+ if any(isinstance(e, And) for e in self.exprs):
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "may only return a single token for an And alternative".format(
+ "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
+ stacklevel=3)
+
+ return super(Or, self)._setResultsName(name, listAllMatches)
+
class MatchFirst(ParseExpression):
"""Requires that at least one :class:`ParseExpression` is found. If
@@ -4126,6 +4177,17 @@ class MatchFirst(ParseExpression):
for e in self.exprs:
e.checkRecursion( subRecCheckList )
+ def _setResultsName(self, name, listAllMatches=False):
+ if (not __compat__.collect_all_And_tokens
+ and __diag__.warn_multiple_tokens_in_named_alternation):
+ if any(isinstance(e, And) for e in self.exprs):
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "may only return a single token for an And alternative".format(
+ "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
+ stacklevel=3)
+
+ return super(MatchFirst, self)._setResultsName(name, listAllMatches)
+
class Each(ParseExpression):
"""Requires all given :class:`ParseExpression` s to be found, but in
@@ -4532,6 +4594,20 @@ class _MultipleMatch(ParseElementEnhance):
return loc, tokens
+ def _setResultsName(self, name, listAllMatches=False):
+ if __diag__.warn_ungrouped_named_tokens_in_collection:
+ for e in [self.expr] + getattr(self.expr, 'exprs', []):
+ if isinstance(e, ParserElement) and e.resultsName:
+ warnings.warn("{0}: setting results name {1!r} on {2} expression "
+ "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
+ name,
+ type(self).__name__,
+ e.resultsName),
+ stacklevel=3)
+
+ return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
+
+
class OneOrMore(_MultipleMatch):
"""Repetition of one or more of the given expression.
@@ -4881,6 +4957,17 @@ class Forward(ParseElementEnhance):
ret <<= self
return ret
+ def _setResultsName(self, name, listAllMatches=False):
+ if __diag__.warn_name_set_on_empty_Forward:
+ if self.expr is None:
+ warnings.warn("{0}: setting results name {0!r} on {1} expression "
+ "that has no contained expression".format("warn_name_set_on_empty_Forward",
+ name,
+ type(self).__name__),
+ stacklevel=3)
+
+ return super(Forward, self)._setResultsName(name, listAllMatches)
+
class TokenConverter(ParseElementEnhance):
"""
Abstract subclass of :class:`ParseExpression`, for converting parsed results.
@@ -5249,7 +5336,7 @@ def _escapeRegexRangeChars(s):
s = s.replace("\t",r"\t")
return _ustr(s)
-def oneOf( strs, caseless=False, useRegex=True ):
+def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
"""Helper to quickly define a set of alternative Literals, and makes
sure to do longest-first testing when there is a conflict,
regardless of the input order, but returns
@@ -5263,8 +5350,10 @@ def oneOf( strs, caseless=False, useRegex=True ):
caseless
- useRegex - (default= ``True``) - as an optimization, will
generate a Regex object; otherwise, will generate
- a :class:`MatchFirst` object (if ``caseless=True``, or if
+ a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
creating a :class:`Regex` raises an exception)
+ - asKeyword - (default=``False``) - enforce Keyword-style matching on the
+ generated expressions
Example::
@@ -5279,14 +5368,18 @@ def oneOf( strs, caseless=False, useRegex=True ):
[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
"""
+ if isinstance(caseless, basestring):
+ warnings.warn("More than one string argument passed to oneOf, pass "
+ "choices as a list or space-delimited string", stacklevel=2)
+
if caseless:
- isequal = ( lambda a,b: a.upper() == b.upper() )
- masks = ( lambda a,b: b.upper().startswith(a.upper()) )
- parseElementClass = CaselessLiteral
+ isequal = (lambda a, b: a.upper() == b.upper())
+ masks = (lambda a, b: b.upper().startswith(a.upper()))
+ parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
else:
- isequal = ( lambda a,b: a == b )
- masks = ( lambda a,b: b.startswith(a) )
- parseElementClass = Literal
+ isequal = (lambda a, b: a == b)
+ masks = (lambda a, b: b.startswith(a))
+ parseElementClass = Keyword if asKeyword else Literal
symbols = []
if isinstance(strs,basestring):
@@ -5299,22 +5392,24 @@ def oneOf( strs, caseless=False, useRegex=True ):
if not symbols:
return NoMatch()
- i = 0
- while i < len(symbols)-1:
- cur = symbols[i]
- for j,other in enumerate(symbols[i+1:]):
- if ( isequal(other, cur) ):
- del symbols[i+j+1]
- break
- elif ( masks(cur, other) ):
- del symbols[i+j+1]
- symbols.insert(i,other)
- cur = other
- break
- else:
- i += 1
+ if not asKeyword:
+ # if not producing keywords, need to reorder to take care to avoid masking
+ # longer choices with shorter ones
+ i = 0
+ while i < len(symbols)-1:
+ cur = symbols[i]
+ for j, other in enumerate(symbols[i+1:]):
+ if (isequal(other, cur)):
+ del symbols[i+j+1]
+ break
+ elif (masks(cur, other)):
+ del symbols[i+j+1]
+ symbols.insert(i,other)
+ break
+ else:
+ i += 1
- if not caseless and useRegex:
+ if not (caseless or asKeyword) and useRegex:
#~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
try:
if len(symbols)==len("".join(symbols)):
@@ -5325,7 +5420,6 @@ def oneOf( strs, caseless=False, useRegex=True ):
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
SyntaxWarning, stacklevel=2)
-
# last resort, just use MatchFirst
return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))