summaryrefslogtreecommitdiff
path: root/Lib/test/test_robotparser.py
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2008-07-27 00:49:02 +0000
committerSkip Montanaro <skip@pobox.com>2008-07-27 00:49:02 +0000
commit1ef19f0de12da730b5ba3096ab4a1d78af5185b4 (patch)
tree474c5c3011d3fafb38929aa0e263cabe1f855186 /Lib/test/test_robotparser.py
parent4b99e9b4790af9951b81925e28bd07850cb5c630 (diff)
downloadcpython-git-1ef19f0de12da730b5ba3096ab4a1d78af5185b4.tar.gz
Close issue 3437 - missing state change when Allow lines are processed.
Adds test cases which use Allow: as well.
Diffstat (limited to 'Lib/test/test_robotparser.py')
-rw-r--r--Lib/test/test_robotparser.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index b7911fd61c..431b8ffbd9 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -134,6 +134,75 @@ bad = [] # Bug report says "/" should be denied, but that is not in the RFC
RobotTest(7, doc, good, bad)
+# From Google: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=40364
+
+# 8.
+doc = """
+User-agent: Googlebot
+Allow: /folder1/myfile.html
+Disallow: /folder1/
+"""
+
+good = ['/folder1/myfile.html']
+bad = ['/folder1/anotherfile.html']
+
+RobotTest(8, doc, good, bad, agent="Googlebot")
+
+# 9. This file is incorrect because "Googlebot" is a substring of
+# "Googlebot-Mobile", so test 10 works just like test 9.
+doc = """
+User-agent: Googlebot
+Disallow: /
+
+User-agent: Googlebot-Mobile
+Allow: /
+"""
+
+good = []
+bad = ['/something.jpg']
+
+RobotTest(9, doc, good, bad, agent="Googlebot")
+
+good = []
+bad = ['/something.jpg']
+
+RobotTest(10, doc, good, bad, agent="Googlebot-Mobile")
+
+# 11. Get the order correct.
+doc = """
+User-agent: Googlebot-Mobile
+Allow: /
+
+User-agent: Googlebot
+Disallow: /
+"""
+
+good = []
+bad = ['/something.jpg']
+
+RobotTest(11, doc, good, bad, agent="Googlebot")
+
+good = ['/something.jpg']
+bad = []
+
+RobotTest(12, doc, good, bad, agent="Googlebot-Mobile")
+
+
+# 13. Google also got the order wrong in #8. You need to specify the
+# URLs from more specific to more general.
+doc = """
+User-agent: Googlebot
+Allow: /folder1/myfile.html
+Disallow: /folder1/
+"""
+
+good = ['/folder1/myfile.html']
+bad = ['/folder1/anotherfile.html']
+
+RobotTest(13, doc, good, bad, agent="googlebot")
+
+
+
class TestCase(unittest.TestCase):
def runTest(self):
test_support.requires('network')