"""Tests for the distutils2.pypi.simple module.""" import re import os import sys import httplib import urllib2 from distutils2.pypi.simple import Crawler from distutils2.tests import unittest from distutils2.tests.support import (TempdirManager, LoggingCatcher, fake_dec) try: import thread as _thread from distutils2.tests.pypi_server import (use_pypi_server, PyPIServer, PYPI_DEFAULT_STATIC_PATH) except ImportError: _thread = None use_pypi_server = fake_dec PYPI_DEFAULT_STATIC_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'pypiserver') class SimpleCrawlerTestCase(TempdirManager, LoggingCatcher, unittest.TestCase): def _get_simple_crawler(self, server, base_url="/simple/", hosts=None, *args, **kwargs): """Build and return a SimpleIndex with the test server urls""" if hosts is None: hosts = (server.full_address.replace("http://", ""),) kwargs['hosts'] = hosts return Crawler(server.full_address + base_url, *args, **kwargs) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server() def test_bad_urls(self, server): crawler = Crawler() url = 'http://127.0.0.1:0/nonesuch/test_simple' try: v = crawler._open_url(url) except Exception, v: self.assertIn(url, str(v)) else: v.close() self.assertIsInstance(v, urllib2.HTTPError) # issue 16 # easy_install inquant.contentmirror.plone breaks because of a typo # in its home URL crawler = Crawler(hosts=('example.org',)) url = ('url:%20https://svn.plone.org/svn/collective/' 'inquant.contentmirror.plone/trunk') try: v = crawler._open_url(url) except Exception, v: self.assertIn(url, str(v)) else: v.close() self.assertIsInstance(v, urllib2.HTTPError) def _urlopen(*args): raise httplib.BadStatusLine('line') old_urlopen = urllib2.urlopen urllib2.urlopen = _urlopen url = 'http://example.org' try: try: v = crawler._open_url(url) except Exception, v: self.assertIn('line', str(v)) else: v.close() # TODO use self.assertRaises raise AssertionError('Should have raise here!') finally: urllib2.urlopen = old_urlopen # issue 20 url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk' try: crawler._open_url(url) except Exception, v: if sys.version_info[:3] < (2, 7, 3): wanted = 'nonnumeric port' else: wanted = 'Download error' self.assertIn(wanted, str(v)) # issue #160 url = server.full_address page = ('') crawler._process_url(url, page) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server("test_found_links") def test_found_links(self, server): # Browse the index, asking for a specified release version # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1 crawler = self._get_simple_crawler(server) last_release = crawler.get_release("foobar") # we have scanned the index page self.assertIn(server.full_address + "/simple/foobar/", crawler._processed_urls) # we have found 4 releases in this page self.assertEqual(len(crawler._projects["foobar"]), 4) # and returned the most recent one self.assertEqual("%s" % last_release.version, '2.0.1') def test_is_browsable(self): crawler = Crawler(follow_externals=False) self.assertTrue(crawler._is_browsable(crawler.index_url + "test")) # Now, when following externals, we can have a list of hosts to trust. # and don't follow other external links than the one described here. crawler = Crawler(hosts=["pypi.python.org", "example.org"], follow_externals=True) good_urls = ( "http://pypi.python.org/foo/bar", "http://pypi.python.org/simple/foobar", "http://example.org", "http://example.org/", "http://example.org/simple/", ) bad_urls = ( "http://python.org", "http://example.tld", ) for url in good_urls: self.assertTrue(crawler._is_browsable(url)) for url in bad_urls: self.assertFalse(crawler._is_browsable(url)) # allow all hosts crawler = Crawler(follow_externals=True, hosts=("*",)) self.assertTrue(crawler._is_browsable("http://an-external.link/path")) self.assertTrue(crawler._is_browsable("pypi.example.org/a/path")) # specify a list of hosts we want to allow crawler = Crawler(follow_externals=True, hosts=("*.example.org",)) self.assertFalse(crawler._is_browsable("http://an-external.link/path")) self.assertTrue( crawler._is_browsable("http://pypi.example.org/a/path")) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server("with_externals") def test_follow_externals(self, server): # Include external pages # Try to request the package index, wich contains links to "externals" # resources. They have to be scanned too. crawler = self._get_simple_crawler(server, follow_externals=True) crawler.get_release("foobar") self.assertIn(server.full_address + "/external/external.html", crawler._processed_urls) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server("with_real_externals") def test_restrict_hosts(self, server): # Only use a list of allowed hosts is possible # Test that telling the simple pyPI client to not retrieve external # works crawler = self._get_simple_crawler(server, follow_externals=False) crawler.get_release("foobar") self.assertNotIn(server.full_address + "/external/external.html", crawler._processed_urls) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server(static_filesystem_paths=["with_externals"], static_uri_paths=["simple", "external"]) def test_links_priority(self, server): # Download links from the pypi simple index should be used before # external download links. # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error # # Usecase : # - someone uploads a package on pypi, a md5 is generated # - someone manually coindexes this link (with the md5 in the url) onto # an external page accessible from the package page. # - someone reuploads the package (with a different md5) # - while easy_installing, an MD5 error occurs because the external # link is used # -> The index should use the link from pypi, not the external one. # start an index server index_url = server.full_address + '/simple/' # scan a test index crawler = Crawler(index_url, follow_externals=True) releases = crawler.get_releases("foobar") server.stop() # we have only one link, because links are compared without md5 self.assertEqual(1, len(releases)) self.assertEqual(1, len(releases[0].dists)) # the link should be from the index self.assertEqual(2, len(releases[0].dists['sdist'].urls)) self.assertEqual('12345678901234567', releases[0].dists['sdist'].url['hashval']) self.assertEqual('md5', releases[0].dists['sdist'].url['hashname']) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server(static_filesystem_paths=["with_norel_links"], static_uri_paths=["simple", "external"]) def test_not_scan_all_links(self, server): # Do not follow all index page links. # The links not tagged with rel="download" and rel="homepage" have # to not be processed by the package index, while processing "pages". # process the pages crawler = self._get_simple_crawler(server, follow_externals=True) crawler.get_releases("foobar") # now it should have processed only pages with links rel="download" # and rel="homepage" self.assertIn("%s/simple/foobar/" % server.full_address, crawler._processed_urls) # it's the simple index page self.assertIn("%s/external/homepage.html" % server.full_address, crawler._processed_urls) # the external homepage is rel="homepage" self.assertNotIn("%s/external/nonrel.html" % server.full_address, crawler._processed_urls) # this link contains no rel=* self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address, crawler._processed_urls) # linked from simple index (no rel) self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address, crawler._processed_urls) # linked from simple index (rel) self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address, crawler._processed_urls) # linked from external homepage (rel) @unittest.skipIf(_thread is None, 'needs threads') def test_uses_mirrors(self): # When the main repository seems down, try using the given mirrors""" server = PyPIServer("foo_bar_baz") mirror = PyPIServer("foo_bar_baz") mirror.start() # we dont start the server here try: # create the index using both servers crawler = Crawler(server.full_address + "/simple/", hosts=('*',), # set the timeout to 1s for the tests timeout=1, mirrors=[mirror.full_address]) # this should not raise a timeout self.assertEqual(4, len(crawler.get_releases("foo"))) finally: mirror.stop() server.stop() def test_simple_link_matcher(self): # Test that the simple link matcher finds the right links""" crawler = Crawler(follow_externals=False) # Here, we define: # 1. one link that must be followed, cause it's a download one # 2. one link that must *not* be followed, cause the is_browsable # returns false for it. # 3. one link that must be followed cause it's a homepage that is # browsable # 4. one link that must be followed, because it contain a md5 hash self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url)) self.assertFalse(crawler._is_browsable("http://dl-link2")) content = """ download_link1 homepage_link1 homepage_link2 link link2 link2 """ found_links = set(uri for uri, _ in crawler._default_link_matcher(content, base_url)) self.assertIn('http://example.org/some/homepage', found_links) self.assertIn('http://example.org/some/simpleurl', found_links) self.assertIn('http://example.org/some/download', found_links) @unittest.skipIf(_thread is None, 'needs threads') @use_pypi_server("project_list") def test_search_projects(self, server): # we can search the index for some projects, on their names # the case used no matters here crawler = self._get_simple_crawler(server) tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']), ('foobar*', ['FooBar-bar', 'Foobar-baz']), ('*foobar', ['Baz-FooBar'])) for search, expected in tests: projects = [p.name for p in crawler.search_projects(search)] self.assertListEqual(expected, projects) def test_suite(): return unittest.makeSuite(SimpleCrawlerTestCase) if __name__ == '__main__': unittest.main(defaultTest="test_suite")