summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2023-01-18 05:04:15 -0600
committerptmcg <ptmcg@austin.rr.com>2023-01-18 05:04:15 -0600
commita29ec51c1eed55c90db61afcb937ab8beaf6f60c (patch)
treeb243b17f7806c52969018e0af97f31437800d2c5
parentcc94b5a6d608e7f25be15c4487cbab25f606e0d8 (diff)
downloadpyparsing-git-a29ec51c1eed55c90db61afcb937ab8beaf6f60c.tar.gz
Remove ^ and $ tags from pp.common.url regex - fixes #459
-rw-r--r--CHANGES3
-rw-r--r--pyparsing/common.py5
-rw-r--r--tests/test_unit.py18
3 files changed, 23 insertions, 3 deletions
diff --git a/CHANGES b/CHANGES
index 18edee1..d0f3905 100644
--- a/CHANGES
+++ b/CHANGES
@@ -123,6 +123,9 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
- Fixed exception messages for some `ParserElements` with custom names,
which instead showed their contained expression names.
+- Fixed bug in pyparsing.common.url, when input URL is not alone
+ on an input line. Fixes Issue #459, reported by David Kennedy.
+
- Multiple added and corrected type annotations. With much help from
Stephen Rosen, thanks!
diff --git a/pyparsing/common.py b/pyparsing/common.py
index bb8472a..90ac78e 100644
--- a/pyparsing/common.py
+++ b/pyparsing/common.py
@@ -363,7 +363,6 @@ class pyparsing_common:
url = Regex(
# https://mathiasbynens.be/demo/url-regex
# https://gist.github.com/dperini/729294
- r"^" +
# protocol identifier (optional)
# short syntax // still required
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
@@ -404,9 +403,9 @@ class pyparsing_common:
# query string (optional)
r"(\?(?P<query>[^#]*))?" +
# fragment (optional)
- r"(#(?P<fragment>\S*))?" +
- r"$"
+ r"(#(?P<fragment>\S*))?"
).set_name("url")
+ """URL (http/https/ftp scheme)"""
# fmt: on
# pre-PEP8 compatibility names
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 34c2736..96cbe28 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -6404,6 +6404,24 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
self.assertParseAndCheckDict(ppc.url, sample_url, expected, verbose=True)
+ def testCommonUrlExprs(self):
+ def extract_parts(s, split=' '):
+ return [[_.strip(split)] for _ in s.strip(split).split(split)]
+
+ test_string = "http://example.com https://blah.org "
+ self.assertParseAndCheckList(
+ pp.Group(ppc.url)[...],
+ test_string,
+ extract_parts(test_string)
+ )
+
+ test_string = test_string.replace(" ", " , ")
+ self.assertParseAndCheckList(
+ pp.delimited_list(pp.Group(ppc.url), allow_trailing_delim=True),
+ test_string,
+ extract_parts(test_string, " , ")
+ )
+
def testNumericExpressions(self):
# disable parse actions that do type conversion so we don't accidentally trigger