summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-09-02 11:20:50 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-09-02 11:20:50 -0500
commit8339b045406477f1fe8d483b1d3b1fb36fa39b31 (patch)
tree515d34265fa1fd43f0f84630593e9e162beb8951
parent6a899ffd880d7937e6e8fd540552c65f945dce53 (diff)
downloadpyparsing-git-8339b045406477f1fe8d483b1d3b1fb36fa39b31.tar.gz
Propagate setDefaultWhitespaceChars to helper expressions defined in pyparsing module
-rw-r--r--CHANGES12
-rw-r--r--pyparsing.py27
-rw-r--r--unitTests.py115
3 files changed, 144 insertions, 10 deletions
diff --git a/CHANGES b/CHANGES
index 48a34e1..26ac8ff 100644
--- a/CHANGES
+++ b/CHANGES
@@ -44,6 +44,18 @@ Version 3.0.0a1
asDict() contents will now see additional entries for parsers
having named ZeroOrMore expressions, whose values will be `[]`.
+- POTENTIAL API CHANGE:
+ Fixed a bug in which calls to ParserElement.setDefaultWhitespaceChars
+ did not change whitespace definitions on any pyparsing built-in
+ expressions defined at import time (such as quotedString, or those
+ defined in pyparsing_common). This would lead to confusion when
+ built-in expressions would not use updated default whitespace
+ characters. Now a call to ParserElement.setDefaultWhitespaceChars
+ will also go and update all pyparsing built-ins to use the new
+ default whitespace characters. (Note that this will only modify
+ expressions defined within the pyparsing module.) Prompted by
+ work on a StackOverflow question posted by jtiai.
+
- Expanded __diag__ and __compat__ to actual classes instead of
just namespaces, to add some helpful behavior:
- enable() and .disable() methods to give extra
diff --git a/pyparsing.py b/pyparsing.py
index 31a1560..0add1c7 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -1206,6 +1206,11 @@ class ParserElement(object):
"""
ParserElement.DEFAULT_WHITE_CHARS = chars
+ # update whitespace all parse expressions defined in this module
+ for expr in _builtin_exprs:
+ if expr.copyDefaultWhiteChars:
+ expr.whiteChars = chars
+
@staticmethod
def inlineLiteralsUsing(cls):
"""
@@ -2252,13 +2257,13 @@ class ParserElement(object):
self.skipWhitespace = False
return self
- def setWhitespaceChars(self, chars):
+ def setWhitespaceChars(self, chars, copy_defaults=False):
"""
Overrides the default whitespace chars
"""
self.skipWhitespace = True
self.whiteChars = chars
- self.copyDefaultWhiteChars = False
+ self.copyDefaultWhiteChars = copy_defaults
return self
def parseWithTabs(self):
@@ -3462,7 +3467,8 @@ class White(Token):
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
super().__init__()
self.matchWhite = ws
- self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite))
+ self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite),
+ copy_defaults=True)
# ~ self.leaveWhitespace()
self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
self.mayReturnEmpty = True
@@ -3565,7 +3571,8 @@ class LineEnd(_PositionToken):
"""
def __init__(self):
super().__init__()
- self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""))
+ self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""),
+ copy_defaults=False)
self.errmsg = "Expected end of line"
def parseImpl(self, instring, loc, doActions=True):
@@ -3815,7 +3822,8 @@ class And(ParseExpression):
exprs[:] = tmp
super().__init__(exprs, savelist)
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
- self.setWhitespaceChars(self.exprs[0].whiteChars)
+ self.setWhitespaceChars(self.exprs[0].whiteChars,
+ copy_defaults=self.exprs[0].copyDefaultWhiteChars)
self.skipWhitespace = self.exprs[0].skipWhitespace
self.callPreparse = True
@@ -4286,7 +4294,7 @@ class ParseElementEnhance(ParserElement):
if expr is not None:
self.mayIndexError = expr.mayIndexError
self.mayReturnEmpty = expr.mayReturnEmpty
- self.setWhitespaceChars(expr.whiteChars)
+ self.setWhitespaceChars(expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars)
self.skipWhitespace = expr.skipWhitespace
self.saveAsList = expr.saveAsList
self.callPreparse = expr.callPreparse
@@ -4856,7 +4864,7 @@ class Forward(ParseElementEnhance):
self.strRepr = None
self.mayIndexError = self.expr.mayIndexError
self.mayReturnEmpty = self.expr.mayReturnEmpty
- self.setWhitespaceChars(self.expr.whiteChars)
+ self.setWhitespaceChars(self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars)
self.skipWhitespace = self.expr.skipWhitespace
self.saveAsList = self.expr.saveAsList
self.ignoreExprs.extend(self.expr.ignoreExprs)
@@ -6766,3 +6774,8 @@ if __name__ == "__main__":
pyparsing_common.uuid.runTests("""
12345678-1234-5678-1234-567812345678
""")
+
+# build list of built-in expressions, for future reference if a global default value
+# gets updated
+_builtin_exprs = [v for v in itertools.chain(vars().values(), vars(pyparsing_common).values())
+ if isinstance(v, ParserElement)]
diff --git a/unitTests.py b/unitTests.py
index 11ebcc3..1bf584c 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -62,7 +62,7 @@ BUFFER_OUTPUT = True
class ParseTestCase(TestCase):
def __init__(self):
- super(ParseTestCase, self).__init__(methodName='_runTest')
+ super().__init__(methodName='_runTest')
def _runTest(self):
@@ -102,6 +102,108 @@ class PyparsingTestInit(ParseTestCase):
pass
+class UpdateDefaultWhitespaceTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+
+ prev_default_whitespace_chars = pp.ParserElement.DEFAULT_WHITE_CHARS
+ try:
+ pp.dblQuotedString.copyDefaultWhiteChars = False
+ pp.ParserElement.setDefaultWhitespaceChars(" \t")
+ self.assertEqual(set(pp.sglQuotedString.whiteChars), set(" \t"),
+ "setDefaultWhitespaceChars did not update sglQuotedString")
+ self.assertEqual(set(pp.dblQuotedString.whiteChars), set(prev_default_whitespace_chars),
+ "setDefaultWhitespaceChars updated dblQuotedString but should not")
+ finally:
+ pp.dblQuotedString.copyDefaultWhiteChars = True
+ pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars)
+
+ self.assertEqual(set(pp.dblQuotedString.whiteChars), set(prev_default_whitespace_chars),
+ "setDefaultWhitespaceChars updated dblQuotedString")
+
+ try:
+ pp.ParserElement.setDefaultWhitespaceChars(" \t")
+ self.assertNotEqual(set(pp.dblQuotedString.whiteChars), set(prev_default_whitespace_chars),
+ "setDefaultWhitespaceChars updated dblQuotedString but should not")
+
+ EOL = pp.LineEnd().suppress().setName("EOL")
+
+ # Identifiers is a string + optional $
+ identifier = pp.Combine(pp.Word(pp.alphas) + pp.Optional("$"))
+
+ # Literals (number or double quoted string)
+ literal = pp.pyparsing_common.number | pp.dblQuotedString
+ expression = literal | identifier
+ # expression.setName("expression").setDebug()
+ # pp.pyparsing_common.number.setDebug()
+ # pp.pyparsing_common.integer.setDebug()
+
+ line_number = pp.pyparsing_common.integer
+
+ # Keywords
+ PRINT = pp.CaselessKeyword("print")
+ print_stmt = PRINT - pp.ZeroOrMore(expression | ";")
+ statement = print_stmt
+ code_line = pp.Group(line_number + statement + EOL)
+ program = pp.ZeroOrMore(code_line)
+
+ test = """\
+ 10 print 123;
+ 20 print 234; 567;
+ 30 print 890
+ """
+
+ parsed_program = program.parseString(test)
+ print(parsed_program.dump())
+ self.assertEqual(len(parsed_program), 3, "failed to apply new whitespace chars to existing builtins")
+
+ finally:
+ pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars)
+
+class UpdateDefaultWhitespace2Test(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+ ppc = pp.pyparsing_common
+
+ prev_default_whitespace_chars = pp.ParserElement.DEFAULT_WHITE_CHARS
+ try:
+ expr_tests = [
+ (pp.dblQuotedString, '"abc"'),
+ (pp.sglQuotedString, "'def'"),
+ (ppc.integer, "123"),
+ (ppc.number, "4.56"),
+ (ppc.identifier, "a_bc"),
+ ]
+ NL = pp.LineEnd()
+
+ for expr, test_str in expr_tests:
+ parser = pp.Group(expr[1, ...] + pp.Optional(NL))[1, ...]
+ test_string = '\n'.join([test_str]*3)
+ result = parser.parseString(test_string, parseAll=True)
+ print(result.dump())
+ self.assertEqual(len(result), 1, "failed {!r}".format(test_string))
+
+ pp.ParserElement.setDefaultWhitespaceChars(" \t")
+
+ for expr, test_str in expr_tests:
+ parser = pp.Group(expr[1, ...] + pp.Optional(NL))[1, ...]
+ test_string = '\n'.join([test_str]*3)
+ result = parser.parseString(test_string, parseAll=True)
+ print(result.dump())
+ self.assertEqual(len(result), 3, "failed {!r}".format(test_string))
+
+ pp.ParserElement.setDefaultWhitespaceChars(" \n\t")
+
+ for expr, test_str in expr_tests:
+ parser = pp.Group(expr[1, ...] + pp.Optional(NL))[1, ...]
+ test_string = '\n'.join([test_str]*3)
+ result = parser.parseString(test_string, parseAll=True)
+ print(result.dump())
+ self.assertEqual(len(result), 1, "failed {!r}".format(test_string))
+
+ finally:
+ pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars)
+
class ParseFourFnTest(ParseTestCase):
def runTest(self):
import examples.fourFn as fourFn
@@ -2083,7 +2185,9 @@ class LineStartTest(ParseTestCase):
success = test_patt.runTests(fail_tests, failureTests=True)[0]
self.assertTrue(success, "failed LineStart failure mode tests (1)")
- with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"):
+ # with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"):
+ prev_default_whitespace_chars = pp.ParserElement.DEFAULT_WHITE_CHARS
+ try:
print(r'no \n in default whitespace chars')
pp.ParserElement.setDefaultWhitespaceChars(' ')
@@ -2103,6 +2207,8 @@ class LineStartTest(ParseTestCase):
success = test_patt.runTests(fail_tests, failureTests=True)[0]
self.assertTrue(success, "failed LineStart failure mode tests (3)")
+ finally:
+ pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars)
test = """\
AAA 1
@@ -2123,12 +2229,15 @@ class LineStartTest(ParseTestCase):
print()
self.assertEqual(test[s], 'A', 'failed LineStart with insignificant newlines')
- with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"):
+ # with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"):
+ try:
pp.ParserElement.setDefaultWhitespaceChars(' ')
for t, s, e in (pp.LineStart() + 'AAA').scanString(test):
print(s, e, pp.lineno(s, test), pp.line(s, test), ord(test[s]))
print()
self.assertEqual(test[s], 'A', 'failed LineStart with insignificant newlines')
+ finally:
+ pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars)
class LineAndStringEndTest(ParseTestCase):