diff options
author | ptmcg <ptmcg@austin.rr.com> | 2023-01-18 05:04:15 -0600 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2023-01-18 05:04:15 -0600 |
commit | a29ec51c1eed55c90db61afcb937ab8beaf6f60c (patch) | |
tree | b243b17f7806c52969018e0af97f31437800d2c5 | |
parent | cc94b5a6d608e7f25be15c4487cbab25f606e0d8 (diff) | |
download | pyparsing-git-a29ec51c1eed55c90db61afcb937ab8beaf6f60c.tar.gz |
Remove ^ and $ tags from pp.common.url regex - fixes #459
-rw-r--r-- | CHANGES | 3 | ||||
-rw-r--r-- | pyparsing/common.py | 5 | ||||
-rw-r--r-- | tests/test_unit.py | 18 |
3 files changed, 23 insertions, 3 deletions
@@ -123,6 +123,9 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit - Fixed exception messages for some `ParserElements` with custom names, which instead showed their contained expression names. +- Fixed bug in pyparsing.common.url, when input URL is not alone + on an input line. Fixes Issue #459, reported by David Kennedy. + - Multiple added and corrected type annotations. With much help from Stephen Rosen, thanks! diff --git a/pyparsing/common.py b/pyparsing/common.py index bb8472a..90ac78e 100644 --- a/pyparsing/common.py +++ b/pyparsing/common.py @@ -363,7 +363,6 @@ class pyparsing_common: url = Regex( # https://mathiasbynens.be/demo/url-regex # https://gist.github.com/dperini/729294 - r"^" + # protocol identifier (optional) # short syntax // still required r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" + @@ -404,9 +403,9 @@ class pyparsing_common: # query string (optional) r"(\?(?P<query>[^#]*))?" + # fragment (optional) - r"(#(?P<fragment>\S*))?" + - r"$" + r"(#(?P<fragment>\S*))?" ).set_name("url") + """URL (http/https/ftp scheme)""" # fmt: on # pre-PEP8 compatibility names diff --git a/tests/test_unit.py b/tests/test_unit.py index 34c2736..96cbe28 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -6404,6 +6404,24 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): self.assertParseAndCheckDict(ppc.url, sample_url, expected, verbose=True) + def testCommonUrlExprs(self): + def extract_parts(s, split=' '): + return [[_.strip(split)] for _ in s.strip(split).split(split)] + + test_string = "http://example.com https://blah.org " + self.assertParseAndCheckList( + pp.Group(ppc.url)[...], + test_string, + extract_parts(test_string) + ) + + test_string = test_string.replace(" ", " , ") + self.assertParseAndCheckList( + pp.delimited_list(pp.Group(ppc.url), allow_trailing_delim=True), + test_string, + extract_parts(test_string, " , ") + ) + def testNumericExpressions(self): # disable parse actions that do type conversion so we don't accidentally trigger |