diff options
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-x | setuptools/package_index.py | 196 |
1 files changed, 118 insertions, 78 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index e87504db..3e8d6818 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -37,7 +37,7 @@ PYPI_MD5 = re.compile( '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)' 'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\\)' ) -URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match +URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() __all__ = [ @@ -52,6 +52,15 @@ _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}" user_agent = _tmpl.format(py_major=sys.version[:3], **globals()) +def parse_requirement_arg(spec): + try: + return Requirement.parse(spec) + except ValueError: + raise DistutilsError( + "Not a URL, existing file, or requirement spec: %r" % (spec,) + ) + + def parse_bdist_wininst(name): """Return (base,pyversion) or (None,None) for possible .exe name""" @@ -62,41 +71,45 @@ def parse_bdist_wininst(name): if lower.endswith('.win32.exe'): base = name[:-10] plat = 'win32' - elif lower.startswith('.win32-py',-16): + elif lower.startswith('.win32-py', -16): py_ver = name[-7:-4] base = name[:-16] plat = 'win32' elif lower.endswith('.win-amd64.exe'): base = name[:-14] plat = 'win-amd64' - elif lower.startswith('.win-amd64-py',-20): + elif lower.startswith('.win-amd64-py', -20): py_ver = name[-7:-4] base = name[:-20] plat = 'win-amd64' - return base,py_ver,plat + return base, py_ver, plat def egg_info_for_url(url): parts = urllib.parse.urlparse(url) scheme, server, path, parameters, query, fragment = parts base = urllib.parse.unquote(path.split('/')[-1]) - if server=='sourceforge.net' and base=='download': # XXX Yuck + if server == 'sourceforge.net' and base == 'download': # XXX Yuck base = urllib.parse.unquote(path.split('/')[-2]) - if '#' in base: base, fragment = base.split('#',1) - return base,fragment + if '#' in base: + base, fragment = base.split('#', 1) + return base, fragment + def distros_for_url(url, metadata=None): """Yield egg or source distribution objects that might be found at a URL""" base, fragment = egg_info_for_url(url) - for dist in distros_for_location(url, base, metadata): yield dist + for dist in distros_for_location(url, base, metadata): + yield dist if fragment: match = EGG_FRAGMENT.match(fragment) if match: for dist in interpret_distro_name( - url, match.group(1), metadata, precedence = CHECKOUT_DIST + url, match.group(1), metadata, precedence=CHECKOUT_DIST ): yield dist + def distros_for_location(location, basename, metadata=None): """Yield egg or source distribution objects based on basename""" if basename.endswith('.egg.zip'): @@ -118,6 +131,7 @@ def distros_for_location(location, basename, metadata=None): return interpret_distro_name(location, basename, metadata) return [] # no extension matched + def distros_for_filename(filename, metadata=None): """Yield possible egg or source distribution objects based on a filename""" return distros_for_location( @@ -152,13 +166,14 @@ def interpret_distro_name( # it is a bdist_dumb, not an sdist -- bail out return - for p in range(1,len(parts)+1): + for p in range(1, len(parts) + 1): yield Distribution( location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]), - py_version=py_version, precedence = precedence, - platform = platform + py_version=py_version, precedence=precedence, + platform=platform ) + # From Python 2.7 docs def unique_everseen(iterable, key=None): "List unique elements, preserving order. Remember all elements ever seen." @@ -177,6 +192,7 @@ def unique_everseen(iterable, key=None): seen_add(k) yield element + def unique_values(func): """ Wrap a function returning an iterable such that the resulting iterable @@ -187,9 +203,11 @@ def unique_values(func): return unique_everseen(func(*args, **kwargs)) return wrapper + REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I) # this line is here to fix emacs' cruddy broken syntax highlighting + @unique_values def find_external_links(url, page): """Find rel="homepage" and rel="download" links in `page`, yielding URLs""" @@ -203,8 +221,8 @@ def find_external_links(url, page): for tag in ("<th>Home Page", "<th>Download URL"): pos = page.find(tag) - if pos!=-1: - match = HREF.search(page,pos) + if pos != -1: + match = HREF.search(page, pos) if match: yield urllib.parse.urljoin(url, htmldecode(match.group(1))) @@ -213,6 +231,7 @@ class ContentChecker(object): """ A null content checker that defines the interface for checking content """ + def feed(self, block): """ Feed a block of data to the hash. @@ -232,6 +251,7 @@ class ContentChecker(object): """ return + class HashChecker(ContentChecker): pattern = re.compile( r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)=' @@ -272,16 +292,22 @@ class PackageIndex(Environment): self, index_url="https://pypi.python.org/simple", hosts=('*',), ca_bundle=None, verify_ssl=True, *args, **kw ): - Environment.__init__(self,*args,**kw) - self.index_url = index_url + "/"[:not index_url.endswith('/')] + Environment.__init__(self, *args, **kw) + self.index_url = index_url + "/" [:not index_url.endswith('/')] self.scanned_urls = {} self.fetched_urls = {} self.package_pages = {} - self.allows = re.compile('|'.join(map(translate,hosts))).match + self.allows = re.compile('|'.join(map(translate, hosts))).match self.to_scan = [] - if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support.find_ca_bundle()): + use_ssl = ( + verify_ssl + and ssl_support.is_available + and (ca_bundle or ssl_support.find_ca_bundle()) + ) + if use_ssl: self.opener = ssl_support.opener_for(ca_bundle) - else: self.opener = urllib.request.urlopen + else: + self.opener = urllib.request.urlopen def process_url(self, url, retrieve=False): """Evaluate a URL as a possible download, and maybe retrieve it""" @@ -308,8 +334,10 @@ class PackageIndex(Environment): self.info("Reading %s", url) self.fetched_urls[url] = True # prevent multiple fetch attempts - f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url) - if f is None: return + tmpl = "Download error on %s: %%s -- Some packages may not be found!" + f = self.open_url(url, tmpl % url) + if f is None: + return self.fetched_urls[f.url] = True if 'html' not in f.headers.get('content-type', '').lower(): f.close() # not html, we can't process it @@ -317,7 +345,7 @@ class PackageIndex(Environment): base = f.url # handle redirects page = f.read() - if not isinstance(page, str): # We are in Python 3 and got bytes. We want str. + if not isinstance(page, str): # We are in Python 3 and got bytes. We want str. if isinstance(f, urllib.error.HTTPError): # Errors have no charset, assume latin1: charset = 'latin-1' @@ -328,7 +356,7 @@ class PackageIndex(Environment): for match in HREF.finditer(page): link = urllib.parse.urljoin(base, htmldecode(match.group(1))) self.process_url(link) - if url.startswith(self.index_url) and getattr(f,'code',None)!=404: + if url.startswith(self.index_url) and getattr(f, 'code', None) != 404: page = self.process_index(url, page) def process_filename(self, fn, nested=False): @@ -340,7 +368,7 @@ class PackageIndex(Environment): if os.path.isdir(fn) and not nested: path = os.path.realpath(fn) for item in os.listdir(path): - self.process_filename(os.path.join(path,item), True) + self.process_filename(os.path.join(path, item), True) dists = distros_for_filename(fn) if dists: @@ -349,7 +377,8 @@ class PackageIndex(Environment): def url_ok(self, url, fatal=False): s = URL_SCHEME(url) - if (s and s.group(1).lower()=='file') or self.allows(urllib.parse.urlparse(url)[1]): + is_file = s and s.group(1).lower() == 'file' + if is_file or self.allows(urllib.parse.urlparse(url)[1]): return True msg = ("\nNote: Bypassing %s (disallowed host; see " "http://bit.ly/1dg9ijs for details).\n") @@ -384,7 +413,7 @@ class PackageIndex(Environment): dist.precedence = SOURCE_DIST self.add(dist) - def process_index(self,url,page): + def process_index(self, url, page): """Process the contents of a PyPI page""" def scan(link): # Process a URL to see if it's for a package page @@ -392,11 +421,11 @@ class PackageIndex(Environment): parts = list(map( urllib.parse.unquote, link[len(self.index_url):].split('/') )) - if len(parts)==2 and '#' not in parts[1]: + if len(parts) == 2 and '#' not in parts[1]: # it's a package page, sanitize and index it pkg = safe_name(parts[0]) ver = safe_version(parts[1]) - self.package_pages.setdefault(pkg.lower(),{})[link] = True + self.package_pages.setdefault(pkg.lower(), {})[link] = True return to_filename(pkg), to_filename(ver) return None, None @@ -415,13 +444,13 @@ class PackageIndex(Environment): base, frag = egg_info_for_url(new_url) if base.endswith('.py') and not frag: if ver: - new_url+='#egg=%s-%s' % (pkg,ver) + new_url += '#egg=%s-%s' % (pkg, ver) else: self.need_version_info(url) self.scan_url(new_url) return PYPI_MD5.sub( - lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page + lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page ) else: return "" # no sense double-scanning non-package pages @@ -434,24 +463,25 @@ class PackageIndex(Environment): def scan_all(self, msg=None, *args): if self.index_url not in self.fetched_urls: - if msg: self.warn(msg,*args) + if msg: + self.warn(msg, *args) self.info( "Scanning index of all packages (this may take a while)" ) self.scan_url(self.index_url) def find_packages(self, requirement): - self.scan_url(self.index_url + requirement.unsafe_name+'/') + self.scan_url(self.index_url + requirement.unsafe_name + '/') if not self.package_pages.get(requirement.key): # Fall back to safe version of the name - self.scan_url(self.index_url + requirement.project_name+'/') + self.scan_url(self.index_url + requirement.project_name + '/') if not self.package_pages.get(requirement.key): # We couldn't find the target package, so search the index page too self.not_found_in_index(requirement) - for url in list(self.package_pages.get(requirement.key,())): + for url in list(self.package_pages.get(requirement.key, ())): # scan each page that might be related to the desired package self.scan_url(url) @@ -462,7 +492,7 @@ class PackageIndex(Environment): if dist in requirement: return dist self.debug("%s does not match %s", requirement, dist) - return super(PackageIndex, self).obtain(requirement,installer) + return super(PackageIndex, self).obtain(requirement, installer) def check_hash(self, checker, filename, tfp): """ @@ -527,27 +557,21 @@ class PackageIndex(Environment): of `tmpdir`, and the local filename is returned. Various errors may be raised if a problem occurs during downloading. """ - if not isinstance(spec,Requirement): + if not isinstance(spec, Requirement): scheme = URL_SCHEME(spec) if scheme: # It's a url, download it to tmpdir found = self._download_url(scheme.group(1), spec, tmpdir) base, fragment = egg_info_for_url(spec) if base.endswith('.py'): - found = self.gen_setup(found,fragment,tmpdir) + found = self.gen_setup(found, fragment, tmpdir) return found elif os.path.exists(spec): # Existing file or directory, just return it return spec else: - try: - spec = Requirement.parse(spec) - except ValueError: - raise DistutilsError( - "Not a URL, existing file, or requirement spec: %r" % - (spec,) - ) - return getattr(self.fetch_distribution(spec, tmpdir),'location',None) + spec = parse_requirement_arg(spec) + return getattr(self.fetch_distribution(spec, tmpdir), 'location', None) def fetch_distribution( self, requirement, tmpdir, force_scan=False, source=False, @@ -581,22 +605,24 @@ class PackageIndex(Environment): for dist in env[req.key]: - if dist.precedence==DEVELOP_DIST and not develop_ok: + if dist.precedence == DEVELOP_DIST and not develop_ok: if dist not in skipped: - self.warn("Skipping development or system egg: %s",dist) + self.warn("Skipping development or system egg: %s", dist) skipped[dist] = 1 continue - if dist in req and (dist.precedence<=SOURCE_DIST or not source): - return dist + if dist in req and (dist.precedence <= SOURCE_DIST or not source): + dist.download_location = self.download(dist.location, tmpdir) + if os.path.exists(dist.download_location): + return dist if force_scan: self.prescan() self.find_packages(requirement) dist = find(requirement) - if local_index is not None: - dist = dist or find(requirement, local_index) + if not dist and local_index is not None: + dist = find(requirement, local_index) if dist is None: if self.to_scan is not None: @@ -609,13 +635,13 @@ class PackageIndex(Environment): if dist is None: self.warn( - "No local packages or download links found for %s%s", + "No local packages or working download links found for %s%s", (source and "a source distribution of " or ""), requirement, ) else: self.info("Best match: %s", dist) - return dist.clone(location=self.download(dist.location, tmpdir)) + return dist.clone(location=dist.download_location) def fetch(self, requirement, tmpdir, force_scan=False, source=False): """Obtain a file suitable for fulfilling `requirement` @@ -625,7 +651,7 @@ class PackageIndex(Environment): ``location`` of the downloaded distribution instead of a distribution object. """ - dist = self.fetch_distribution(requirement,tmpdir,force_scan,source) + dist = self.fetch_distribution(requirement, tmpdir, force_scan, source) if dist is not None: return dist.location return None @@ -637,7 +663,7 @@ class PackageIndex(Environment): interpret_distro_name(filename, match.group(1), None) if d.version ] or [] - if len(dists)==1: # unambiguous ``#egg`` fragment + if len(dists) == 1: # unambiguous ``#egg`` fragment basename = os.path.basename(filename) # Make sure the file has been downloaded to the temp dir. @@ -646,7 +672,7 @@ class PackageIndex(Environment): from setuptools.command.easy_install import samefile if not samefile(filename, dst): shutil.copy2(filename, dst) - filename=dst + filename = dst with open(os.path.join(tmpdir, 'setup.py'), 'w') as file: file.write( @@ -663,7 +689,7 @@ class PackageIndex(Environment): raise DistutilsError( "Can't unambiguously interpret project/version identifier %r; " "any dashes in the name or version should be escaped using " - "underscores. %r" % (fragment,dists) + "underscores. %r" % (fragment, dists) ) else: raise DistutilsError( @@ -672,6 +698,7 @@ class PackageIndex(Environment): ) dl_blocksize = 8192 + def _download_to(self, url, filename): self.info("Downloading %s", url) # Download the file @@ -681,7 +708,7 @@ class PackageIndex(Environment): fp = self.open_url(strip_fragment(url)) if isinstance(fp, urllib.error.HTTPError): raise DistutilsError( - "Can't download %s: %s %s" % (url, fp.code,fp.msg) + "Can't download %s: %s %s" % (url, fp.code, fp.msg) ) headers = fp.info() blocknum = 0 @@ -692,7 +719,7 @@ class PackageIndex(Environment): sizes = get_all_headers(headers, 'Content-Length') size = max(map(int, sizes)) self.reporthook(url, filename, blocknum, bs, size) - with open(filename,'wb') as tfp: + with open(filename, 'wb') as tfp: while True: block = fp.read(bs) if block: @@ -705,7 +732,8 @@ class PackageIndex(Environment): self.check_hash(checker, filename, tfp) return headers finally: - if fp: fp.close() + if fp: + fp.close() def reporthook(self, url, filename, blocknum, blksize, size): pass # no-op @@ -751,24 +779,24 @@ class PackageIndex(Environment): name, fragment = egg_info_for_url(url) if name: while '..' in name: - name = name.replace('..','.').replace('\\','_') + name = name.replace('..', '.').replace('\\', '_') else: name = "__downloaded__" # default if URL has no path contents if name.endswith('.egg.zip'): name = name[:-4] # strip the extra .zip before download - filename = os.path.join(tmpdir,name) + filename = os.path.join(tmpdir, name) # Download the file # - if scheme=='svn' or scheme.startswith('svn+'): + if scheme == 'svn' or scheme.startswith('svn+'): return self._download_svn(url, filename) - elif scheme=='git' or scheme.startswith('git+'): + elif scheme == 'git' or scheme.startswith('git+'): return self._download_git(url, filename) elif scheme.startswith('hg+'): return self._download_hg(url, filename) - elif scheme=='file': + elif scheme == 'file': return urllib.request.url2pathname(urllib.parse.urlparse(url)[2]) else: self.url_ok(url, True) # raises error if not allowed @@ -779,7 +807,7 @@ class PackageIndex(Environment): def _attempt_download(self, url, filename): headers = self._download_to(url, filename) - if 'html' in headers.get('content-type','').lower(): + if 'html' in headers.get('content-type', '').lower(): return self._download_html(url, headers, filename) else: return filename @@ -797,22 +825,22 @@ class PackageIndex(Environment): break # not an index page file.close() os.unlink(filename) - raise DistutilsError("Unexpected HTML page found at "+url) + raise DistutilsError("Unexpected HTML page found at " + url) def _download_svn(self, url, filename): - url = url.split('#',1)[0] # remove any fragment for svn's sake + url = url.split('#', 1)[0] # remove any fragment for svn's sake creds = '' if url.lower().startswith('svn:') and '@' in url: scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) if not netloc and path.startswith('//') and '/' in path[2:]: - netloc, path = path[2:].split('/',1) + netloc, path = path[2:].split('/', 1) auth, host = splituser(netloc) if auth: if ':' in auth: - user, pw = auth.split(':',1) + user, pw = auth.split(':', 1) creds = " --username=%s --password=%s" % (user, pw) else: - creds = " --username="+auth + creds = " --username=" + auth netloc = host parts = scheme, netloc, url, p, q, f url = urllib.parse.urlunparse(parts) @@ -827,7 +855,7 @@ class PackageIndex(Environment): scheme = scheme.split('+', 1)[-1] # Some fragment identification fails - path = path.split('#',1)[0] + path = path.split('#', 1)[0] rev = None if '@' in path: @@ -839,7 +867,7 @@ class PackageIndex(Environment): return url, rev def _download_git(self, url, filename): - filename = filename.split('#',1)[0] + filename = filename.split('#', 1)[0] url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) self.info("Doing git clone from %s to %s", url, filename) @@ -855,7 +883,7 @@ class PackageIndex(Environment): return filename def _download_hg(self, url, filename): - filename = filename.split('#',1)[0] + filename = filename.split('#', 1)[0] url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) self.info("Doing hg clone from %s to %s", url, filename) @@ -879,16 +907,20 @@ class PackageIndex(Environment): def warn(self, msg, *args): log.warn(msg, *args) + # This pattern matches a character entity reference (a decimal numeric # references, a hexadecimal numeric reference, or a named reference). entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub + def uchr(c): if not isinstance(c, int): return c - if c>255: return six.unichr(c) + if c > 255: + return six.unichr(c) return chr(c) + def decode_entity(match): what = match.group(1) if what.startswith('#x'): @@ -899,10 +931,12 @@ def decode_entity(match): what = six.moves.html_entities.name2codepoint.get(what, match.group(0)) return uchr(what) + def htmldecode(text): """Decode HTML entities in the given text.""" return entity_sub(decode_entity, text) + def socket_timeout(timeout=15): def _socket_timeout(func): def _socket_timeout(*args, **kwargs): @@ -915,6 +949,7 @@ def socket_timeout(timeout=15): return _socket_timeout return _socket_timeout + def _encode_auth(auth): """ A function compatible with Python 2.3-3.3 that will encode @@ -935,12 +970,14 @@ def _encode_auth(auth): # convert back to a string encoded = encoded_bytes.decode() # strip the trailing carriage return - return encoded.replace('\n','') + return encoded.replace('\n', '') + class Credential(object): """ A username/password pair. Use like a namedtuple. """ + def __init__(self, username, password): self.username = username self.password = password @@ -952,6 +989,7 @@ class Credential(object): def __str__(self): return '%(username)s:%(password)s' % vars(self) + class PyPIConfig(configparser.RawConfigParser): def __init__(self): @@ -1011,7 +1049,7 @@ def open_with_auth(url, opener=urllib.request.urlopen): if cred: auth = str(cred) info = cred.username, url - log.info('Authenticating as %s for %s (from .pypirc)' % info) + log.info('Authenticating as %s for %s (from .pypirc)', *info) if auth: auth = "Basic " + _encode_auth(auth) @@ -1029,12 +1067,13 @@ def open_with_auth(url, opener=urllib.request.urlopen): # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url) - if s2==scheme and h2==host: + if s2 == scheme and h2 == host: parts = s2, netloc, path2, param2, query2, frag2 fp.url = urllib.parse.urlunparse(parts) return fp + # adding a timeout to avoid freezing package_index open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) @@ -1042,6 +1081,7 @@ open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) def fix_sf_url(url): return url # backward compatibility + def local_open(url): """Read a local path, with special support for directories""" scheme, server, path, param, query, frag = urllib.parse.urlparse(url) |