diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-09-02 11:20:50 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-09-02 11:20:50 -0500 |
commit | 8339b045406477f1fe8d483b1d3b1fb36fa39b31 (patch) | |
tree | 515d34265fa1fd43f0f84630593e9e162beb8951 | |
parent | 6a899ffd880d7937e6e8fd540552c65f945dce53 (diff) | |
download | pyparsing-git-8339b045406477f1fe8d483b1d3b1fb36fa39b31.tar.gz |
Propagate setDefaultWhitespaceChars to helper expressions defined in pyparsing module
-rw-r--r-- | CHANGES | 12 | ||||
-rw-r--r-- | pyparsing.py | 27 | ||||
-rw-r--r-- | unitTests.py | 115 |
3 files changed, 144 insertions, 10 deletions
@@ -44,6 +44,18 @@ Version 3.0.0a1 asDict() contents will now see additional entries for parsers having named ZeroOrMore expressions, whose values will be `[]`. +- POTENTIAL API CHANGE: + Fixed a bug in which calls to ParserElement.setDefaultWhitespaceChars + did not change whitespace definitions on any pyparsing built-in + expressions defined at import time (such as quotedString, or those + defined in pyparsing_common). This would lead to confusion when + built-in expressions would not use updated default whitespace + characters. Now a call to ParserElement.setDefaultWhitespaceChars + will also go and update all pyparsing built-ins to use the new + default whitespace characters. (Note that this will only modify + expressions defined within the pyparsing module.) Prompted by + work on a StackOverflow question posted by jtiai. + - Expanded __diag__ and __compat__ to actual classes instead of just namespaces, to add some helpful behavior: - enable() and .disable() methods to give extra diff --git a/pyparsing.py b/pyparsing.py index 31a1560..0add1c7 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -1206,6 +1206,11 @@ class ParserElement(object): """ ParserElement.DEFAULT_WHITE_CHARS = chars + # update whitespace all parse expressions defined in this module + for expr in _builtin_exprs: + if expr.copyDefaultWhiteChars: + expr.whiteChars = chars + @staticmethod def inlineLiteralsUsing(cls): """ @@ -2252,13 +2257,13 @@ class ParserElement(object): self.skipWhitespace = False return self - def setWhitespaceChars(self, chars): + def setWhitespaceChars(self, chars, copy_defaults=False): """ Overrides the default whitespace chars """ self.skipWhitespace = True self.whiteChars = chars - self.copyDefaultWhiteChars = False + self.copyDefaultWhiteChars = copy_defaults return self def parseWithTabs(self): @@ -3462,7 +3467,8 @@ class White(Token): def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): super().__init__() self.matchWhite = ws - self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite)) + self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite), + copy_defaults=True) # ~ self.leaveWhitespace() self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) self.mayReturnEmpty = True @@ -3565,7 +3571,8 @@ class LineEnd(_PositionToken): """ def __init__(self): super().__init__() - self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) + self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""), + copy_defaults=False) self.errmsg = "Expected end of line" def parseImpl(self, instring, loc, doActions=True): @@ -3815,7 +3822,8 @@ class And(ParseExpression): exprs[:] = tmp super().__init__(exprs, savelist) self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.setWhitespaceChars(self.exprs[0].whiteChars) + self.setWhitespaceChars(self.exprs[0].whiteChars, + copy_defaults=self.exprs[0].copyDefaultWhiteChars) self.skipWhitespace = self.exprs[0].skipWhitespace self.callPreparse = True @@ -4286,7 +4294,7 @@ class ParseElementEnhance(ParserElement): if expr is not None: self.mayIndexError = expr.mayIndexError self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars(expr.whiteChars) + self.setWhitespaceChars(expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars) self.skipWhitespace = expr.skipWhitespace self.saveAsList = expr.saveAsList self.callPreparse = expr.callPreparse @@ -4856,7 +4864,7 @@ class Forward(ParseElementEnhance): self.strRepr = None self.mayIndexError = self.expr.mayIndexError self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars(self.expr.whiteChars) + self.setWhitespaceChars(self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars) self.skipWhitespace = self.expr.skipWhitespace self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) @@ -6766,3 +6774,8 @@ if __name__ == "__main__": pyparsing_common.uuid.runTests(""" 12345678-1234-5678-1234-567812345678 """) + +# build list of built-in expressions, for future reference if a global default value +# gets updated +_builtin_exprs = [v for v in itertools.chain(vars().values(), vars(pyparsing_common).values()) + if isinstance(v, ParserElement)] diff --git a/unitTests.py b/unitTests.py index 11ebcc3..1bf584c 100644 --- a/unitTests.py +++ b/unitTests.py @@ -62,7 +62,7 @@ BUFFER_OUTPUT = True class ParseTestCase(TestCase): def __init__(self): - super(ParseTestCase, self).__init__(methodName='_runTest') + super().__init__(methodName='_runTest') def _runTest(self): @@ -102,6 +102,108 @@ class PyparsingTestInit(ParseTestCase): pass +class UpdateDefaultWhitespaceTest(ParseTestCase): + def runTest(self): + import pyparsing as pp + + prev_default_whitespace_chars = pp.ParserElement.DEFAULT_WHITE_CHARS + try: + pp.dblQuotedString.copyDefaultWhiteChars = False + pp.ParserElement.setDefaultWhitespaceChars(" \t") + self.assertEqual(set(pp.sglQuotedString.whiteChars), set(" \t"), + "setDefaultWhitespaceChars did not update sglQuotedString") + self.assertEqual(set(pp.dblQuotedString.whiteChars), set(prev_default_whitespace_chars), + "setDefaultWhitespaceChars updated dblQuotedString but should not") + finally: + pp.dblQuotedString.copyDefaultWhiteChars = True + pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars) + + self.assertEqual(set(pp.dblQuotedString.whiteChars), set(prev_default_whitespace_chars), + "setDefaultWhitespaceChars updated dblQuotedString") + + try: + pp.ParserElement.setDefaultWhitespaceChars(" \t") + self.assertNotEqual(set(pp.dblQuotedString.whiteChars), set(prev_default_whitespace_chars), + "setDefaultWhitespaceChars updated dblQuotedString but should not") + + EOL = pp.LineEnd().suppress().setName("EOL") + + # Identifiers is a string + optional $ + identifier = pp.Combine(pp.Word(pp.alphas) + pp.Optional("$")) + + # Literals (number or double quoted string) + literal = pp.pyparsing_common.number | pp.dblQuotedString + expression = literal | identifier + # expression.setName("expression").setDebug() + # pp.pyparsing_common.number.setDebug() + # pp.pyparsing_common.integer.setDebug() + + line_number = pp.pyparsing_common.integer + + # Keywords + PRINT = pp.CaselessKeyword("print") + print_stmt = PRINT - pp.ZeroOrMore(expression | ";") + statement = print_stmt + code_line = pp.Group(line_number + statement + EOL) + program = pp.ZeroOrMore(code_line) + + test = """\ + 10 print 123; + 20 print 234; 567; + 30 print 890 + """ + + parsed_program = program.parseString(test) + print(parsed_program.dump()) + self.assertEqual(len(parsed_program), 3, "failed to apply new whitespace chars to existing builtins") + + finally: + pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars) + +class UpdateDefaultWhitespace2Test(ParseTestCase): + def runTest(self): + import pyparsing as pp + ppc = pp.pyparsing_common + + prev_default_whitespace_chars = pp.ParserElement.DEFAULT_WHITE_CHARS + try: + expr_tests = [ + (pp.dblQuotedString, '"abc"'), + (pp.sglQuotedString, "'def'"), + (ppc.integer, "123"), + (ppc.number, "4.56"), + (ppc.identifier, "a_bc"), + ] + NL = pp.LineEnd() + + for expr, test_str in expr_tests: + parser = pp.Group(expr[1, ...] + pp.Optional(NL))[1, ...] + test_string = '\n'.join([test_str]*3) + result = parser.parseString(test_string, parseAll=True) + print(result.dump()) + self.assertEqual(len(result), 1, "failed {!r}".format(test_string)) + + pp.ParserElement.setDefaultWhitespaceChars(" \t") + + for expr, test_str in expr_tests: + parser = pp.Group(expr[1, ...] + pp.Optional(NL))[1, ...] + test_string = '\n'.join([test_str]*3) + result = parser.parseString(test_string, parseAll=True) + print(result.dump()) + self.assertEqual(len(result), 3, "failed {!r}".format(test_string)) + + pp.ParserElement.setDefaultWhitespaceChars(" \n\t") + + for expr, test_str in expr_tests: + parser = pp.Group(expr[1, ...] + pp.Optional(NL))[1, ...] + test_string = '\n'.join([test_str]*3) + result = parser.parseString(test_string, parseAll=True) + print(result.dump()) + self.assertEqual(len(result), 1, "failed {!r}".format(test_string)) + + finally: + pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars) + class ParseFourFnTest(ParseTestCase): def runTest(self): import examples.fourFn as fourFn @@ -2083,7 +2185,9 @@ class LineStartTest(ParseTestCase): success = test_patt.runTests(fail_tests, failureTests=True)[0] self.assertTrue(success, "failed LineStart failure mode tests (1)") - with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"): + # with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"): + prev_default_whitespace_chars = pp.ParserElement.DEFAULT_WHITE_CHARS + try: print(r'no \n in default whitespace chars') pp.ParserElement.setDefaultWhitespaceChars(' ') @@ -2103,6 +2207,8 @@ class LineStartTest(ParseTestCase): success = test_patt.runTests(fail_tests, failureTests=True)[0] self.assertTrue(success, "failed LineStart failure mode tests (3)") + finally: + pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars) test = """\ AAA 1 @@ -2123,12 +2229,15 @@ class LineStartTest(ParseTestCase): print() self.assertEqual(test[s], 'A', 'failed LineStart with insignificant newlines') - with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"): + # with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"): + try: pp.ParserElement.setDefaultWhitespaceChars(' ') for t, s, e in (pp.LineStart() + 'AAA').scanString(test): print(s, e, pp.lineno(s, test), pp.line(s, test), ord(test[s])) print() self.assertEqual(test[s], 'A', 'failed LineStart with insignificant newlines') + finally: + pp.ParserElement.setDefaultWhitespaceChars(prev_default_whitespace_chars) class LineAndStringEndTest(ParseTestCase): |