summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Goldschmidt <adamgold7@gmail.com>2021-02-15 00:41:57 +0200
committerGitHub <noreply@github.com>2021-02-14 14:41:57 -0800
commitfcbe0cb04d35189401c0c880ebfb4311e952d776 (patch)
tree48ca1701d13be00517881423fcfd99b8a9ae9445
parent1b57426e3a7842b4e6f9fc13ffb657c78e5443d4 (diff)
downloadcpython-git-fcbe0cb04d35189401c0c880ebfb4311e952d776.tar.gz
bpo-42967: only use '&' as a query string separator (#24297)
bpo-42967: [security] Address a web cache-poisoning issue reported in urllib.parse.parse_qsl(). urllib.parse will only us "&" as query string separator by default instead of both ";" and "&" as allowed in earlier versions. An optional argument seperator with default value "&" is added to specify the separator. Co-authored-by: Éric Araujo <merwok@netwok.org> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-authored-by: Éric Araujo <merwok@netwok.org>
-rw-r--r--Doc/library/cgi.rst9
-rw-r--r--Doc/library/urllib.parse.rst16
-rw-r--r--Doc/whatsnew/3.10.rst13
-rw-r--r--Doc/whatsnew/3.6.rst13
-rw-r--r--Doc/whatsnew/3.7.rst13
-rw-r--r--Doc/whatsnew/3.8.rst13
-rw-r--r--Doc/whatsnew/3.9.rst15
-rwxr-xr-xLib/cgi.py23
-rw-r--r--Lib/test/test_cgi.py29
-rw-r--r--Lib/test/test_urlparse.py68
-rw-r--r--Lib/urllib/parse.py20
-rw-r--r--Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst1
12 files changed, 186 insertions, 47 deletions
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
index 4048592e73..05d9cdf424 100644
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -277,14 +277,14 @@ These are useful if you want more control, or if you want to employ some of the
algorithms implemented in this module in other circumstances.
-.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False)
+.. function:: parse(fp=None, environ=os.environ, keep_blank_values=False, strict_parsing=False, separator="&")
Parse a query in the environment or from a file (the file defaults to
- ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are
+ ``sys.stdin``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are
passed to :func:`urllib.parse.parse_qs` unchanged.
-.. function:: parse_multipart(fp, pdict, encoding="utf-8", errors="replace")
+.. function:: parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator="&")
Parse input of type :mimetype:`multipart/form-data` (for file uploads).
Arguments are *fp* for the input file, *pdict* for a dictionary containing
@@ -303,6 +303,9 @@ algorithms implemented in this module in other circumstances.
Added the *encoding* and *errors* parameters. For non-file fields, the
value is now a list of strings, not bytes.
+ .. versionchanged:: 3.10
+ Added the *separator* parameter.
+
.. function:: parse_header(string)
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index f9c8ba7398..1a79078239 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -165,7 +165,7 @@ or on combining URL components into a URL string.
now raise :exc:`ValueError`.
-.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
+.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a
@@ -190,6 +190,8 @@ or on combining URL components into a URL string.
read. If set, then throws a :exc:`ValueError` if there are more than
*max_num_fields* fields read.
+ The optional argument *separator* is the symbol to use for separating the query arguments. It defaults to `&`.
+
Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
parameter set to ``True``) to convert such dictionaries into query
strings.
@@ -201,8 +203,12 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.8
Added *max_num_fields* parameter.
+ .. versionchanged:: 3.10
+ Added *separator* parameter with the default value of `&`. Python versions earlier than Python 3.10 allowed using both ";" and "&" as
+ query parameter separator. This has been changed to allow only a single separator key, with "&" as the default separator.
+
-.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
+.. function:: parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
Parse a query string given as a string argument (data of type
:mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
@@ -226,6 +232,8 @@ or on combining URL components into a URL string.
read. If set, then throws a :exc:`ValueError` if there are more than
*max_num_fields* fields read.
+ The optional argument *separator* is the symbol to use for separating the query arguments. It defaults to `&`.
+
Use the :func:`urllib.parse.urlencode` function to convert such lists of pairs into
query strings.
@@ -235,6 +243,10 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.8
Added *max_num_fields* parameter.
+ .. versionchanged:: 3.10
+ Added *separator* parameter with the default value of `&`. Python versions earlier than Python 3.10 allowed using both ";" and "&" as
+ query parameter separator. This has been changed to allow only a single separator key, with "&" as the default separator.
+
.. function:: urlunparse(parts)
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index ed2fd0eb0d..c282edcc9d 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -546,6 +546,19 @@ Add new method :meth:`~unittest.TestCase.assertNoLogs` to complement the
existing :meth:`~unittest.TestCase.assertLogs`. (Contributed by Kit Yan Choi
in :issue:`39385`.)
+urllib.parse
+------------
+
+Python versions earlier than Python 3.10 allowed using both ``;`` and ``&`` as
+query parameter separators in :func:`urllib.parse.parse_qs` and
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
+newer W3C recommendations, this has been changed to allow only a single
+separator key, with ``&`` as the default. This change also affects
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
+functions internally. For more details, please see their respective
+documentation.
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.)
+
xml
---
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
index 85a6657fdf..8a64da1b24 100644
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -2443,3 +2443,16 @@ because of the behavior of the socket option ``SO_REUSEADDR`` in UDP. For more
details, see the documentation for ``loop.create_datagram_endpoint()``.
(Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in
:issue:`37228`.)
+
+Notable changes in Python 3.6.13
+================================
+
+Earlier Python versions allowed using both ";" and "&" as
+query parameter separators in :func:`urllib.parse.parse_qs` and
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
+newer W3C recommendations, this has been changed to allow only a single
+separator key, with "&" as the default. This change also affects
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
+functions internally. For more details, please see their respective
+documentation.
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.)
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index 7590af35e2..75e1973b3e 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -2557,3 +2557,16 @@ because of the behavior of the socket option ``SO_REUSEADDR`` in UDP. For more
details, see the documentation for ``loop.create_datagram_endpoint()``.
(Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in
:issue:`37228`.)
+
+Notable changes in Python 3.7.10
+================================
+
+Earlier Python versions allowed using both ``;`` and ``&`` as
+query parameter separators in :func:`urllib.parse.parse_qs` and
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
+newer W3C recommendations, this has been changed to allow only a single
+separator key, with ``&`` as the default. This change also affects
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
+functions internally. For more details, please see their respective
+documentation.
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.)
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index 0b4820f333..d21921d3dd 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -2234,3 +2234,16 @@ because of the behavior of the socket option ``SO_REUSEADDR`` in UDP. For more
details, see the documentation for ``loop.create_datagram_endpoint()``.
(Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in
:issue:`37228`.)
+
+Notable changes in Python 3.8.8
+===============================
+
+Earlier Python versions allowed using both ";" and "&" as
+query parameter separators in :func:`urllib.parse.parse_qs` and
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
+newer W3C recommendations, this has been changed to allow only a single
+separator key, with "&" as the default. This change also affects
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
+functions internally. For more details, please see their respective
+documentation.
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) \ No newline at end of file
diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
index b94f1bfadd..5f4f8ba211 100644
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -1515,4 +1515,17 @@ need to account for this change. A :exc:`DeprecationWarning` may be emitted for
invalid forms of parameterizing :class:`collections.abc.Callable` which may have
passed silently in Python 3.9.1. This :exc:`DeprecationWarning` will
become a :exc:`TypeError` in Python 3.10.
-(Contributed by Ken Jin in :issue:`42195`.) \ No newline at end of file
+(Contributed by Ken Jin in :issue:`42195`.)
+
+urllib.parse
+------------
+
+Earlier Python versions allowed using both ";" and "&" as
+query parameter separators in :func:`urllib.parse.parse_qs` and
+:func:`urllib.parse.parse_qsl`. Due to security concerns, and to conform with
+newer W3C recommendations, this has been changed to allow only a single
+separator key, with "&" as the default. This change also affects
+:func:`cgi.parse` and :func:`cgi.parse_multipart` as they use the affected
+functions internally. For more details, please see their respective
+documentation.
+(Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.)
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 6018c36086..6c72507c20 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -115,7 +115,8 @@ log = initlog # The current logging function
# 0 ==> unlimited input
maxlen = 0
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
+ strict_parsing=0, separator='&'):
"""Parse a query in the environment or from a file (default stdin)
Arguments, all optional:
@@ -134,6 +135,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
+
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
"""
if fp is None:
fp = sys.stdin
@@ -154,7 +158,7 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
if environ['REQUEST_METHOD'] == 'POST':
ctype, pdict = parse_header(environ['CONTENT_TYPE'])
if ctype == 'multipart/form-data':
- return parse_multipart(fp, pdict)
+ return parse_multipart(fp, pdict, separator=separator)
elif ctype == 'application/x-www-form-urlencoded':
clength = int(environ['CONTENT_LENGTH'])
if maxlen and clength > maxlen:
@@ -178,10 +182,10 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
qs = ""
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
- encoding=encoding)
+ encoding=encoding, separator=separator)
-def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"):
+def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'):
"""Parse multipart input.
Arguments:
@@ -205,7 +209,7 @@ def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"):
except KeyError:
pass
fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors,
- environ={'REQUEST_METHOD': 'POST'})
+ environ={'REQUEST_METHOD': 'POST'}, separator=separator)
return {k: fs.getlist(k) for k in fs}
def _parseparam(s):
@@ -315,7 +319,7 @@ class FieldStorage:
def __init__(self, fp=None, headers=None, outerboundary=b'',
environ=os.environ, keep_blank_values=0, strict_parsing=0,
limit=None, encoding='utf-8', errors='replace',
- max_num_fields=None):
+ max_num_fields=None, separator='&'):
"""Constructor. Read multipart/* until last part.
Arguments, all optional:
@@ -363,6 +367,7 @@ class FieldStorage:
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
self.max_num_fields = max_num_fields
+ self.separator = separator
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
@@ -589,7 +594,7 @@ class FieldStorage:
query = urllib.parse.parse_qsl(
qs, self.keep_blank_values, self.strict_parsing,
encoding=self.encoding, errors=self.errors,
- max_num_fields=self.max_num_fields)
+ max_num_fields=self.max_num_fields, separator=self.separator)
self.list = [MiniFieldStorage(key, value) for key, value in query]
self.skip_lines()
@@ -605,7 +610,7 @@ class FieldStorage:
query = urllib.parse.parse_qsl(
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
encoding=self.encoding, errors=self.errors,
- max_num_fields=self.max_num_fields)
+ max_num_fields=self.max_num_fields, separator=self.separator)
self.list.extend(MiniFieldStorage(key, value) for key, value in query)
klass = self.FieldStorageClass or self.__class__
@@ -649,7 +654,7 @@ class FieldStorage:
else self.limit - self.bytes_read
part = klass(self.fp, headers, ib, environ, keep_blank_values,
strict_parsing, limit,
- self.encoding, self.errors, max_num_fields)
+ self.encoding, self.errors, max_num_fields, self.separator)
if max_num_fields is not None:
max_num_fields -= 1
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index 6b29759da4..239d97589c 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -53,12 +53,9 @@ parse_strict_test_cases = [
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
- (";", ValueError("bad query field: ''")),
- (";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
- ("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
@@ -73,8 +70,6 @@ parse_strict_test_cases = [
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
@@ -201,6 +196,30 @@ Content-Length: 3
else:
self.assertEqual(fs.getvalue(key), expect_val[0])
+ def test_separator(self):
+ parse_semicolon = [
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+ (";", ValueError("bad query field: ''")),
+ (";;", ValueError("bad query field: ''")),
+ ("=;a", ValueError("bad query field: 'a'")),
+ (";b=a", ValueError("bad query field: ''")),
+ ("b;=a", ValueError("bad query field: 'b'")),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+ ]
+ for orig, expect in parse_semicolon:
+ env = {'QUERY_STRING': orig}
+ fs = cgi.FieldStorage(separator=';', environ=env)
+ if isinstance(expect, dict):
+ for key in expect.keys():
+ expect_val = expect[key]
+ self.assertIn(key, fs)
+ if len(expect_val) > 1:
+ self.assertEqual(fs.getvalue(key), expect_val)
+ else:
+ self.assertEqual(fs.getvalue(key), expect_val[0])
+
def test_log(self):
cgi.log("Testing")
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 762500789f..3b1c360625 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -32,16 +32,10 @@ parse_qsl_test_cases = [
(b"&a=b", [(b'a', b'b')]),
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
- (";", []),
- (";;", []),
- (";a=b", [('a', 'b')]),
- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
- ("a=1;a=2", [('a', '1'), ('a', '2')]),
- (b";", []),
- (b";;", []),
- (b";a=b", [(b'a', b'b')]),
- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ (";a=b", [(';a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
+ (b";a=b", [(b';a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
]
# Each parse_qs testcase is a two-tuple that contains
@@ -68,16 +62,10 @@ parse_qs_test_cases = [
(b"&a=b", {b'a': [b'b']}),
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1&a=2", {b'a': [b'1', b'2']}),
- (";", {}),
- (";;", {}),
- (";a=b", {'a': ['b']}),
- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
- ("a=1;a=2", {'a': ['1', '2']}),
- (b";", {}),
- (b";;", {}),
- (b";a=b", {b'a': [b'b']}),
- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
- (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ (";a=b", {';a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
+ (b";a=b", {b';a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
]
class UrlParseTestCase(unittest.TestCase):
@@ -886,10 +874,46 @@ class UrlParseTestCase(unittest.TestCase):
def test_parse_qsl_max_num_fields(self):
with self.assertRaises(ValueError):
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
- with self.assertRaises(ValueError):
- urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
+ def test_parse_qs_separator(self):
+ parse_qs_semicolon_cases = [
+ (";", {}),
+ (";;", {}),
+ (";a=b", {'a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=1;a=2", {'a': ['1', '2']}),
+ (b";", {}),
+ (b";;", {}),
+ (b";a=b", {b'a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ ]
+ for orig, expect in parse_qs_semicolon_cases:
+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
+ result = urllib.parse.parse_qs(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
+ def test_parse_qsl_separator(self):
+ parse_qsl_semicolon_cases = [
+ (";", []),
+ (";;", []),
+ (";a=b", [('a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
+ (b";", []),
+ (b";;", []),
+ (b";a=b", [(b'a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ ]
+ for orig, expect in parse_qsl_semicolon_cases:
+ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
+ result = urllib.parse.parse_qsl(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
def test_urlencode_sequences(self):
# Other tests incidentally urlencode things; test non-covered cases:
# Sequence and object values.
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index ea897c3032..5bd067895b 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -662,7 +662,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -686,12 +686,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a dictionary.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors,
- max_num_fields=max_num_fields)
+ max_num_fields=max_num_fields, separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@@ -701,7 +704,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -724,19 +727,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError
if there are more than n fields read by parse_qsl().
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
+ if not separator or (not isinstance(separator, str)
+ and not isinstance(separator, bytes)):
+ raise ValueError("Separator must be of type string or bytes.")
+
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
- num_fields = 1 + qs.count('&') + qs.count(';')
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
new file mode 100644
index 0000000000..f08489b414
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
@@ -0,0 +1 @@
+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator.