"""Tests for the distutils2.pypi.simple module."""
import re
import os
import sys
import httplib
import urllib2
from distutils2.pypi.simple import Crawler
from distutils2.tests import unittest
from distutils2.tests.support import (TempdirManager, LoggingCatcher,
fake_dec)
try:
import thread as _thread
from distutils2.tests.pypi_server import (use_pypi_server, PyPIServer,
PYPI_DEFAULT_STATIC_PATH)
except ImportError:
_thread = None
use_pypi_server = fake_dec
PYPI_DEFAULT_STATIC_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
class SimpleCrawlerTestCase(TempdirManager,
LoggingCatcher,
unittest.TestCase):
def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
*args, **kwargs):
"""Build and return a SimpleIndex with the test server urls"""
if hosts is None:
hosts = (server.full_address.replace("http://", ""),)
kwargs['hosts'] = hosts
return Crawler(server.full_address + base_url, *args,
**kwargs)
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server()
def test_bad_urls(self, server):
crawler = Crawler()
url = 'http://127.0.0.1:0/nonesuch/test_simple'
try:
v = crawler._open_url(url)
except Exception, v:
self.assertIn(url, str(v))
else:
v.close()
self.assertIsInstance(v, urllib2.HTTPError)
# issue 16
# easy_install inquant.contentmirror.plone breaks because of a typo
# in its home URL
crawler = Crawler(hosts=('example.org',))
url = ('url:%20https://svn.plone.org/svn/collective/'
'inquant.contentmirror.plone/trunk')
try:
v = crawler._open_url(url)
except Exception, v:
self.assertIn(url, str(v))
else:
v.close()
self.assertIsInstance(v, urllib2.HTTPError)
def _urlopen(*args):
raise httplib.BadStatusLine('line')
old_urlopen = urllib2.urlopen
urllib2.urlopen = _urlopen
url = 'http://example.org'
try:
try:
v = crawler._open_url(url)
except Exception, v:
self.assertIn('line', str(v))
else:
v.close()
# TODO use self.assertRaises
raise AssertionError('Should have raise here!')
finally:
urllib2.urlopen = old_urlopen
# issue 20
url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
try:
crawler._open_url(url)
except Exception, v:
if sys.version_info[:3] < (2, 7, 3):
wanted = 'nonnumeric port'
else:
wanted = 'Download error'
self.assertIn(wanted, str(v))
# issue #160
url = server.full_address
page = ('')
crawler._process_url(url, page)
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server("test_found_links")
def test_found_links(self, server):
# Browse the index, asking for a specified release version
# The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
crawler = self._get_simple_crawler(server)
last_release = crawler.get_release("foobar")
# we have scanned the index page
self.assertIn(server.full_address + "/simple/foobar/",
crawler._processed_urls)
# we have found 4 releases in this page
self.assertEqual(len(crawler._projects["foobar"]), 4)
# and returned the most recent one
self.assertEqual("%s" % last_release.version, '2.0.1')
def test_is_browsable(self):
crawler = Crawler(follow_externals=False)
self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
# Now, when following externals, we can have a list of hosts to trust.
# and don't follow other external links than the one described here.
crawler = Crawler(hosts=["pypi.python.org", "example.org"],
follow_externals=True)
good_urls = (
"http://pypi.python.org/foo/bar",
"http://pypi.python.org/simple/foobar",
"http://example.org",
"http://example.org/",
"http://example.org/simple/",
)
bad_urls = (
"http://python.org",
"http://example.tld",
)
for url in good_urls:
self.assertTrue(crawler._is_browsable(url))
for url in bad_urls:
self.assertFalse(crawler._is_browsable(url))
# allow all hosts
crawler = Crawler(follow_externals=True, hosts=("*",))
self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
# specify a list of hosts we want to allow
crawler = Crawler(follow_externals=True,
hosts=("*.example.org",))
self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
self.assertTrue(
crawler._is_browsable("http://pypi.example.org/a/path"))
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server("with_externals")
def test_follow_externals(self, server):
# Include external pages
# Try to request the package index, wich contains links to "externals"
# resources. They have to be scanned too.
crawler = self._get_simple_crawler(server, follow_externals=True)
crawler.get_release("foobar")
self.assertIn(server.full_address + "/external/external.html",
crawler._processed_urls)
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server("with_real_externals")
def test_restrict_hosts(self, server):
# Only use a list of allowed hosts is possible
# Test that telling the simple pyPI client to not retrieve external
# works
crawler = self._get_simple_crawler(server, follow_externals=False)
crawler.get_release("foobar")
self.assertNotIn(server.full_address + "/external/external.html",
crawler._processed_urls)
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server(static_filesystem_paths=["with_externals"],
static_uri_paths=["simple", "external"])
def test_links_priority(self, server):
# Download links from the pypi simple index should be used before
# external download links.
# http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
#
# Usecase :
# - someone uploads a package on pypi, a md5 is generated
# - someone manually coindexes this link (with the md5 in the url) onto
# an external page accessible from the package page.
# - someone reuploads the package (with a different md5)
# - while easy_installing, an MD5 error occurs because the external
# link is used
# -> The index should use the link from pypi, not the external one.
# start an index server
index_url = server.full_address + '/simple/'
# scan a test index
crawler = Crawler(index_url, follow_externals=True)
releases = crawler.get_releases("foobar")
server.stop()
# we have only one link, because links are compared without md5
self.assertEqual(1, len(releases))
self.assertEqual(1, len(releases[0].dists))
# the link should be from the index
self.assertEqual(2, len(releases[0].dists['sdist'].urls))
self.assertEqual('12345678901234567',
releases[0].dists['sdist'].url['hashval'])
self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server(static_filesystem_paths=["with_norel_links"],
static_uri_paths=["simple", "external"])
def test_not_scan_all_links(self, server):
# Do not follow all index page links.
# The links not tagged with rel="download" and rel="homepage" have
# to not be processed by the package index, while processing "pages".
# process the pages
crawler = self._get_simple_crawler(server, follow_externals=True)
crawler.get_releases("foobar")
# now it should have processed only pages with links rel="download"
# and rel="homepage"
self.assertIn("%s/simple/foobar/" % server.full_address,
crawler._processed_urls) # it's the simple index page
self.assertIn("%s/external/homepage.html" % server.full_address,
crawler._processed_urls) # the external homepage is rel="homepage"
self.assertNotIn("%s/external/nonrel.html" % server.full_address,
crawler._processed_urls) # this link contains no rel=*
self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
crawler._processed_urls) # linked from simple index (no rel)
self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
crawler._processed_urls) # linked from simple index (rel)
self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
crawler._processed_urls) # linked from external homepage (rel)
@unittest.skipIf(_thread is None, 'needs threads')
def test_uses_mirrors(self):
# When the main repository seems down, try using the given mirrors"""
server = PyPIServer("foo_bar_baz")
mirror = PyPIServer("foo_bar_baz")
mirror.start() # we dont start the server here
try:
# create the index using both servers
crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
# set the timeout to 1s for the tests
timeout=1, mirrors=[mirror.full_address])
# this should not raise a timeout
self.assertEqual(4, len(crawler.get_releases("foo")))
finally:
mirror.stop()
server.stop()
def test_simple_link_matcher(self):
# Test that the simple link matcher finds the right links"""
crawler = Crawler(follow_externals=False)
# Here, we define:
# 1. one link that must be followed, cause it's a download one
# 2. one link that must *not* be followed, cause the is_browsable
# returns false for it.
# 3. one link that must be followed cause it's a homepage that is
# browsable
# 4. one link that must be followed, because it contain a md5 hash
self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
self.assertFalse(crawler._is_browsable("http://dl-link2"))
content = """
download_link1
homepage_link1
homepage_link2
link
link2
link2
"""
found_links = set(uri for uri, _ in
crawler._default_link_matcher(content, base_url))
self.assertIn('http://example.org/some/homepage', found_links)
self.assertIn('http://example.org/some/simpleurl', found_links)
self.assertIn('http://example.org/some/download', found_links)
@unittest.skipIf(_thread is None, 'needs threads')
@use_pypi_server("project_list")
def test_search_projects(self, server):
# we can search the index for some projects, on their names
# the case used no matters here
crawler = self._get_simple_crawler(server)
tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
('foobar*', ['FooBar-bar', 'Foobar-baz']),
('*foobar', ['Baz-FooBar']))
for search, expected in tests:
projects = [p.name for p in crawler.search_projects(search)]
self.assertListEqual(expected, projects)
def test_suite():
return unittest.makeSuite(SimpleCrawlerTestCase)
if __name__ == '__main__':
unittest.main(defaultTest="test_suite")