Move package index/downloading stuff to setuptools.package_index module.

git-svn-id: http://svn.python.org/projects/sandbox/trunk/setuptools@41051 6015fed2-1504-0410-9fe1-9d1591cc4771
author: pje <pje@6015fed2-1504-0410-9fe1-9d1591cc4771> 2005-06-12 03:44:07 +0000
committer: pje <pje@6015fed2-1504-0410-9fe1-9d1591cc4771> 2005-06-12 03:44:07 +0000
commit: 2b883d084eb06985b7eaf85696e8c08fe659bf47 (patch)
tree: 2bec42958fb6ab9976b8cf48bfdeafab0fdb9d99 /easy_install.py
parent: 04db3af06ec50a07b66711938c310082cdf9c362 (diff)
download: python-setuptools-2b883d084eb06985b7eaf85696e8c08fe659bf47.tar.gz
1 files changed, 6 insertions, 334 deletions
diff --git a/easy_install.py b/easy_install.py
index 9e2ad87..22e37a9 100755
--- a/easy_install.py
+++ b/easy_install.py
@@ -12,311 +12,14 @@ __ http://peak.telecommunity.com/DevCenter/EasyInstall
 
 """
 
-import sys
-import os.path
-import pkg_resources
-import re
-import zipimport
-import shutil
-import urlparse
-import urllib2
-import tempfile
+import sys, os.path, zipimport, shutil, tempfile
 
 from setuptools.sandbox import run_setup
-from setuptools.archive_util import unpack_archive
 from distutils.sysconfig import get_python_lib
-from pkg_resources import *
-
-
-
-
-
-
-
-
-
-
-
-
-
-HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
-EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
-
-def distros_for_url(url, metadata=None):
-    """Yield egg or source distribution objects that might be found at a URL"""
-
-    path = urlparse.urlparse(url)[2]
-    base = urllib2.unquote(path.split('/')[-1])
-
-    if base.endswith('.egg'):
-        dist = Distribution.from_filename(base, metadata)
-        dist.path = url
-        yield dist
-        return  # only one, unambiguous interpretation
-
-    for ext in EXTENSIONS:
-        if base.endswith(ext):
-            base = base[:-len(ext)]
-            break
-    else:
-        return  # no extension matched
-
-    # Generate alternative interpretations of a source distro name
-    # Because some packages are ambiguous as to name/versions split
-    # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
-    # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
-    # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
-    # the spurious interpretations should be ignored, because in the event
-    # there's also an "adns" package, the spurious "python-1.1.0" version will
-    # compare lower than any numeric version number, and is therefore unlikely
-    # to match a request for it.  It's still a potential problem, though, and
-    # in the long run PyPI and the distutils should go for "safe" names and
-    # versions in distribution archive names (sdist and bdist).
-
-    parts = base.split('-')
-    for p in range(1,len(parts)+1):
-        yield Distribution(
-            url, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
-            distro_type = SOURCE_DIST
-        )
-
-class PackageIndex(AvailableDistributions):
-    """A distribution index that scans web pages for download URLs"""
-
-    def __init__(self,index_url="http://www.python.org/pypi",*args,**kw):
-        AvailableDistributions.__init__(self,*args,**kw)
-        self.index_url = index_url + "/"[:not index_url.endswith('/')]
-        self.scanned_urls = {}
-        self.fetched_urls = {}
-        self.package_pages = {}
-
-    def scan_url(self, url):
-        self.process_url(url, True)
-
-    def process_url(self, url, retrieve=False):
-        if url in self.scanned_urls and not retrieve:
-            return
-
-        self.scanned_urls[url] = True
-        dists = list(distros_for_url(url))
-        map(self.add, dists)
-
-        if dists or not retrieve or url in self.fetched_urls:
-            # don't need the actual page
-            return
-
-        f = self.open_url(url)
-        self.fetched_urls[url] = self.fetched_urls[f.url] = True
-        if 'html' not in f.headers['content-type'].lower():
-            f.close()   # not html, we can't process it
-            return
-
-        base = f.url     # handle redirects
-        page = f.read()
-        f.close()
-        if url.startswith(self.index_url):
-            self.process_index(url, page)
-        else:
-            for match in HREF.finditer(page):
-                link = urlparse.urljoin(base, match.group(1))
-                self.process_url(link)
-
-    def find_packages(self,requirement):
-        self.scan_url(self.index_url + requirement.distname)
-        if not self.package_pages.get(requirement.key):
-            # We couldn't find the target package, so search the index page too
-            self.scan_url(self.index_url)
-        for url in self.package_pages.get(requirement.key,()):
-            # scan each page that might be related to the desired package
-            self.scan_url(url)
-
-    def process_index(self,url,page):
-        def scan(link):
-            if link.startswith(self.index_url):
-                parts = map(
-                    urllib2.unquote, link[len(self.index_url):].split('/')
-                )
-                if len(parts)==2:
-                    # it's a package page, sanitize and index it
-                    pkg = safe_name(parts[0])
-                    ver = safe_version(parts[1])
-                    self.package_pages.setdefault(pkg.lower(),{})[link] = True
-        if url==self.index_url or 'Index of Packages</title>' in page:
-            # process an index page into the package-page index
-            for match in HREF.finditer(page):
-                scan( urlparse.urljoin(url, match.group(1)) )
-        else:
-            scan(url)   # ensure this page is in the page index
-            # process individual package page
-            for tag in ("<th>Home Page", "<th>Download URL"):
-                pos = page.find(tag)
-                if pos!=-1:
-                    match = HREF.search(page,pos)
-                    if match:
-                        # Process the found URL
-                        self.scan_url(urlparse.urljoin(url, match.group(1)))
-
-    def obtain(self,requirement):
-        self.find_packages(requirement)
-        for dist in self.get(requirement.key, ()):
-            if dist in requirement:
-                return dist
-
-    def download(self, spec, tmpdir):
-        """Locate and/or download `spec`, returning a local filename
-
-        `spec` may be a ``Requirement`` object, or a string containing a URL,
-        an existing local filename, or a package/version requirement spec
-        (i.e. the string form of a ``Requirement`` object).
-
-        If necessary, the requirement is searched for in the package index.
-        If the download is successful, the return value is a local file path,
-        and it is a subpath of `tmpdir` if the distribution had to be
-        downloaded.  If no matching distribution is found, return ``None``.
-        Various errors may be raised if a problem occurs during downloading.
-        """
-
-        if not isinstance(spec,Requirement):
-            scheme = URL_SCHEME(spec)
-            if scheme:
-                # It's a url, download it to tmpdir
-                return self._download_url(scheme.group(1), spec, tmpdir)
-
-            elif os.path.exists(spec):
-                # Existing file or directory, just return it
-                return spec
-            else:
-                try:
-                    spec = Requirement.parse(spec)
-                except ValueError:
-                    raise RuntimeError(
-                        "Not a URL, existing file, or requirement spec: %r" %
-                        (spec,)
-                    )
-
-        # process a Requirement
-        dist = self.best_match(spec,[])
-        if dist is not None:
-            return self.download(dist.path, tmpdir)
-
-        return None
-
-
-
-    dl_blocksize = 8192
-    
-    def _download_to(self, url, filename):
-        # Download the file
-        fp, tfp = None, None
-        try:
-            fp = self.open_url(url)
-            if isinstance(fp, urllib2.HTTPError):
-                raise RuntimeError(
-                    "Can't download %s: %s %s" % (url, fp.code,fp.msg)
-                )
-
-            headers = fp.info()
-            blocknum = 0
-            bs = self.dl_blocksize
-            size = -1
-
-            if "content-length" in headers:
-                size = int(headers["Content-Length"])
-                self.reporthook(url, filename, blocknum, bs, size)
-
-            tfp = open(filename,'wb')      
-            while True:
-                block = fp.read(bs)
-                if block:
-                    tfp.write(block)
-                    blocknum += 1
-                    self.reporthook(url, filename, blocknum, bs, size)
-                else:
-                    break
-            return headers
-
-        finally:
-            if fp: fp.close()
-            if tfp: tfp.close()
-
-    def reporthook(self, url, filename, blocknum, blksize, size):
-        pass    # no-op
-
-
-
-    def open_url(self, url):
-        try:
-            return urllib2.urlopen(url)
-        except urllib2.HTTPError, v:
-            return v
-        except urllib2.URLError, v:
-            raise RuntimeError("Download error: %s" % v.reason)
-
-
-    def _download_url(self, scheme, url, tmpdir):
-
-        # Determine download filename
-        #
-        name = filter(None,urlparse.urlparse(url)[2].split('/'))
-        if name:
-            name = name[-1]
-            while '..' in name:
-                name = name.replace('..','.').replace('\\','_')
-        else:
-            name = "__downloaded__"    # default if URL has no path contents
-
-        filename = os.path.join(tmpdir,name)
-
-        # Download the file
-        #
-        if scheme=='svn' or scheme.startswith('svn+'):
-            return self._download_svn(url, filename)
-        else:
-            headers = self._download_to(url, filename)
-            if 'html' in headers['content-type'].lower():
-                return self._download_html(url, headers, filename, tmpdir)
-            else:
-                return filename
-
-
-
-
-
-
-
-
-    def _download_html(self, url, headers, filename, tmpdir):
-        # Check for a sourceforge URL
-        sf_url = url.startswith('http://prdownloads.')
-        file = open(filename)
-        for line in file:
-            if line.strip():
-                # Check for a subversion index page
-                if re.search(r'<title>Revision \d+:', line):
-                    # it's a subversion index page:
-                    file.close()
-                    os.unlink(filename)
-                    return self._download_svn(url, filename)
-                # Check for a SourceForge header
-                elif sf_url:
-                    if re.search(r'^<HTML><HEAD>', line, re.I):
-                        continue    # skip first line
-                    elif re.search(r'<TITLE>Select a Mirror for File:',line):
-                        # Sourceforge mirror page
-                        page = file.read()
-                        file.close()
-                        os.unlink(filename)
-                        return self._download_sourceforge(url, page, tmpdir)
-                break   # not an index page
-        file.close()
-        raise RuntimeError("Unexpected HTML page found at "+url)
-
-
-    def _download_svn(self, url, filename):
-        os.system("svn checkout -q %s %s" % (url, filename))
-        return filename
-
 
+from setuptools.archive_util import unpack_archive
+from setuptools.package_index import PackageIndex
+from pkg_resources import *
 
 
 
@@ -326,37 +29,6 @@ class PackageIndex(AvailableDistributions):
 
 
 
-    def _download_sourceforge(self, source_url, sf_page, tmpdir):
-        """Download package from randomly-selected SourceForge mirror"""
-
-        mirror_regex = re.compile(r'HREF=(/.*?\?use_mirror=[^>]*)')
-        urls = [m.group(1) for m in mirror_regex.finditer(sf_page)]
-        if not urls:
-            raise RuntimeError(
-                "URL looks like a Sourceforge mirror page, but no URLs found"
-            )
-
-        import random
-        url = urlparse.urljoin(source_url, random.choice(urls))
-        f = self.open_url(url)
-        match = re.search(
-            r'<META HTTP-EQUIV="refresh" content=".*?URL=(.*?)"',
-            f.read()
-        )
-        f.close()
-
-        if match:
-            download_url = match.group(1)
-            scheme = URL_SCHEME(download_url)
-            return self._download_url(scheme.group(1), download_url, tmpdir)
-        else:
-            raise RuntimeError(
-                'No META HTTP-EQUIV="refresh" found in Sourceforge page at %s'
-                % url
-            )
-
-
-
 
 
 
@@ -572,8 +244,6 @@ class PthDistributions(AvailableDistributions):
 
 
 
-URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
-
 def main(argv, installer_type=Installer, index_type=PackageIndex):
 
     from optparse import OptionParser
@@ -613,6 +283,8 @@ def main(argv, installer_type=Installer, index_type=PackageIndex):
 
 
 
+
+
     def alloc_tmp():
         if options.tmpdir is None:
             return tempfile.mkdtemp(prefix="easy_install-")
author	pje <pje@6015fed2-1504-0410-9fe1-9d1591cc4771>	2005-06-12 03:44:07 +0000
committer	pje <pje@6015fed2-1504-0410-9fe1-9d1591cc4771>	2005-06-12 03:44:07 +0000
commit	2b883d084eb06985b7eaf85696e8c08fe659bf47 (patch)
tree	2bec42958fb6ab9976b8cf48bfdeafab0fdb9d99 /easy_install.py
parent	04db3af06ec50a07b66711938c310082cdf9c362 (diff)
download	python-setuptools-2b883d084eb06985b7eaf85696e8c08fe659bf47.tar.gz