diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-28 16:35:35 +0000 |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-28 16:35:35 +0000 |
commit | a4f79f97db7920387d6c7704a2b212d6b1503d9d (patch) | |
tree | c2ba4db2d64510712e0dd6ccf51c49d342e87634 /Lib/robotparser.py | |
parent | 946963fdc563835da3734d12a947a014c9abd066 (diff) | |
download | cpython-git-a4f79f97db7920387d6c7704a2b212d6b1503d9d.tar.gz |
Merged revisions 83209 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines
Fix Issue6325 - robotparse to honor urls with query strings.
........
Diffstat (limited to 'Lib/robotparser.py')
-rw-r--r-- | Lib/robotparser.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/Lib/robotparser.py b/Lib/robotparser.py index 447563fe65..730426f6ae 100644 --- a/Lib/robotparser.py +++ b/Lib/robotparser.py @@ -131,7 +131,12 @@ class RobotFileParser: return True # search for given user agent matches # the first match counts - url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/" + parsed_url = urlparse.urlparse(urllib.unquote(url)) + url = urlparse.urlunparse(('', '', parsed_url.path, + parsed_url.params, parsed_url.query, parsed_url.fragment)) + url = urllib.quote(url) + if not url: + url = "/" for entry in self.entries: if entry.applies_to(useragent): return entry.allowance(url) |