summaryrefslogtreecommitdiff
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-xsetuptools/package_index.py196
1 files changed, 118 insertions, 78 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index e87504db..3e8d6818 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -37,7 +37,7 @@ PYPI_MD5 = re.compile(
'<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
)
-URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
+URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
__all__ = [
@@ -52,6 +52,15 @@ _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
user_agent = _tmpl.format(py_major=sys.version[:3], **globals())
+def parse_requirement_arg(spec):
+ try:
+ return Requirement.parse(spec)
+ except ValueError:
+ raise DistutilsError(
+ "Not a URL, existing file, or requirement spec: %r" % (spec,)
+ )
+
+
def parse_bdist_wininst(name):
"""Return (base,pyversion) or (None,None) for possible .exe name"""
@@ -62,41 +71,45 @@ def parse_bdist_wininst(name):
if lower.endswith('.win32.exe'):
base = name[:-10]
plat = 'win32'
- elif lower.startswith('.win32-py',-16):
+ elif lower.startswith('.win32-py', -16):
py_ver = name[-7:-4]
base = name[:-16]
plat = 'win32'
elif lower.endswith('.win-amd64.exe'):
base = name[:-14]
plat = 'win-amd64'
- elif lower.startswith('.win-amd64-py',-20):
+ elif lower.startswith('.win-amd64-py', -20):
py_ver = name[-7:-4]
base = name[:-20]
plat = 'win-amd64'
- return base,py_ver,plat
+ return base, py_ver, plat
def egg_info_for_url(url):
parts = urllib.parse.urlparse(url)
scheme, server, path, parameters, query, fragment = parts
base = urllib.parse.unquote(path.split('/')[-1])
- if server=='sourceforge.net' and base=='download': # XXX Yuck
+ if server == 'sourceforge.net' and base == 'download': # XXX Yuck
base = urllib.parse.unquote(path.split('/')[-2])
- if '#' in base: base, fragment = base.split('#',1)
- return base,fragment
+ if '#' in base:
+ base, fragment = base.split('#', 1)
+ return base, fragment
+
def distros_for_url(url, metadata=None):
"""Yield egg or source distribution objects that might be found at a URL"""
base, fragment = egg_info_for_url(url)
- for dist in distros_for_location(url, base, metadata): yield dist
+ for dist in distros_for_location(url, base, metadata):
+ yield dist
if fragment:
match = EGG_FRAGMENT.match(fragment)
if match:
for dist in interpret_distro_name(
- url, match.group(1), metadata, precedence = CHECKOUT_DIST
+ url, match.group(1), metadata, precedence=CHECKOUT_DIST
):
yield dist
+
def distros_for_location(location, basename, metadata=None):
"""Yield egg or source distribution objects based on basename"""
if basename.endswith('.egg.zip'):
@@ -118,6 +131,7 @@ def distros_for_location(location, basename, metadata=None):
return interpret_distro_name(location, basename, metadata)
return [] # no extension matched
+
def distros_for_filename(filename, metadata=None):
"""Yield possible egg or source distribution objects based on a filename"""
return distros_for_location(
@@ -152,13 +166,14 @@ def interpret_distro_name(
# it is a bdist_dumb, not an sdist -- bail out
return
- for p in range(1,len(parts)+1):
+ for p in range(1, len(parts) + 1):
yield Distribution(
location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
- py_version=py_version, precedence = precedence,
- platform = platform
+ py_version=py_version, precedence=precedence,
+ platform=platform
)
+
# From Python 2.7 docs
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
@@ -177,6 +192,7 @@ def unique_everseen(iterable, key=None):
seen_add(k)
yield element
+
def unique_values(func):
"""
Wrap a function returning an iterable such that the resulting iterable
@@ -187,9 +203,11 @@ def unique_values(func):
return unique_everseen(func(*args, **kwargs))
return wrapper
+
REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
# this line is here to fix emacs' cruddy broken syntax highlighting
+
@unique_values
def find_external_links(url, page):
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
@@ -203,8 +221,8 @@ def find_external_links(url, page):
for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag)
- if pos!=-1:
- match = HREF.search(page,pos)
+ if pos != -1:
+ match = HREF.search(page, pos)
if match:
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
@@ -213,6 +231,7 @@ class ContentChecker(object):
"""
A null content checker that defines the interface for checking content
"""
+
def feed(self, block):
"""
Feed a block of data to the hash.
@@ -232,6 +251,7 @@ class ContentChecker(object):
"""
return
+
class HashChecker(ContentChecker):
pattern = re.compile(
r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
@@ -272,16 +292,22 @@ class PackageIndex(Environment):
self, index_url="https://pypi.python.org/simple", hosts=('*',),
ca_bundle=None, verify_ssl=True, *args, **kw
):
- Environment.__init__(self,*args,**kw)
- self.index_url = index_url + "/"[:not index_url.endswith('/')]
+ Environment.__init__(self, *args, **kw)
+ self.index_url = index_url + "/" [:not index_url.endswith('/')]
self.scanned_urls = {}
self.fetched_urls = {}
self.package_pages = {}
- self.allows = re.compile('|'.join(map(translate,hosts))).match
+ self.allows = re.compile('|'.join(map(translate, hosts))).match
self.to_scan = []
- if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support.find_ca_bundle()):
+ use_ssl = (
+ verify_ssl
+ and ssl_support.is_available
+ and (ca_bundle or ssl_support.find_ca_bundle())
+ )
+ if use_ssl:
self.opener = ssl_support.opener_for(ca_bundle)
- else: self.opener = urllib.request.urlopen
+ else:
+ self.opener = urllib.request.urlopen
def process_url(self, url, retrieve=False):
"""Evaluate a URL as a possible download, and maybe retrieve it"""
@@ -308,8 +334,10 @@ class PackageIndex(Environment):
self.info("Reading %s", url)
self.fetched_urls[url] = True # prevent multiple fetch attempts
- f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
- if f is None: return
+ tmpl = "Download error on %s: %%s -- Some packages may not be found!"
+ f = self.open_url(url, tmpl % url)
+ if f is None:
+ return
self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
f.close() # not html, we can't process it
@@ -317,7 +345,7 @@ class PackageIndex(Environment):
base = f.url # handle redirects
page = f.read()
- if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
+ if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
if isinstance(f, urllib.error.HTTPError):
# Errors have no charset, assume latin1:
charset = 'latin-1'
@@ -328,7 +356,7 @@ class PackageIndex(Environment):
for match in HREF.finditer(page):
link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
self.process_url(link)
- if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
+ if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
page = self.process_index(url, page)
def process_filename(self, fn, nested=False):
@@ -340,7 +368,7 @@ class PackageIndex(Environment):
if os.path.isdir(fn) and not nested:
path = os.path.realpath(fn)
for item in os.listdir(path):
- self.process_filename(os.path.join(path,item), True)
+ self.process_filename(os.path.join(path, item), True)
dists = distros_for_filename(fn)
if dists:
@@ -349,7 +377,8 @@ class PackageIndex(Environment):
def url_ok(self, url, fatal=False):
s = URL_SCHEME(url)
- if (s and s.group(1).lower()=='file') or self.allows(urllib.parse.urlparse(url)[1]):
+ is_file = s and s.group(1).lower() == 'file'
+ if is_file or self.allows(urllib.parse.urlparse(url)[1]):
return True
msg = ("\nNote: Bypassing %s (disallowed host; see "
"http://bit.ly/1dg9ijs for details).\n")
@@ -384,7 +413,7 @@ class PackageIndex(Environment):
dist.precedence = SOURCE_DIST
self.add(dist)
- def process_index(self,url,page):
+ def process_index(self, url, page):
"""Process the contents of a PyPI page"""
def scan(link):
# Process a URL to see if it's for a package page
@@ -392,11 +421,11 @@ class PackageIndex(Environment):
parts = list(map(
urllib.parse.unquote, link[len(self.index_url):].split('/')
))
- if len(parts)==2 and '#' not in parts[1]:
+ if len(parts) == 2 and '#' not in parts[1]:
# it's a package page, sanitize and index it
pkg = safe_name(parts[0])
ver = safe_version(parts[1])
- self.package_pages.setdefault(pkg.lower(),{})[link] = True
+ self.package_pages.setdefault(pkg.lower(), {})[link] = True
return to_filename(pkg), to_filename(ver)
return None, None
@@ -415,13 +444,13 @@ class PackageIndex(Environment):
base, frag = egg_info_for_url(new_url)
if base.endswith('.py') and not frag:
if ver:
- new_url+='#egg=%s-%s' % (pkg,ver)
+ new_url += '#egg=%s-%s' % (pkg, ver)
else:
self.need_version_info(url)
self.scan_url(new_url)
return PYPI_MD5.sub(
- lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page
+ lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
)
else:
return "" # no sense double-scanning non-package pages
@@ -434,24 +463,25 @@ class PackageIndex(Environment):
def scan_all(self, msg=None, *args):
if self.index_url not in self.fetched_urls:
- if msg: self.warn(msg,*args)
+ if msg:
+ self.warn(msg, *args)
self.info(
"Scanning index of all packages (this may take a while)"
)
self.scan_url(self.index_url)
def find_packages(self, requirement):
- self.scan_url(self.index_url + requirement.unsafe_name+'/')
+ self.scan_url(self.index_url + requirement.unsafe_name + '/')
if not self.package_pages.get(requirement.key):
# Fall back to safe version of the name
- self.scan_url(self.index_url + requirement.project_name+'/')
+ self.scan_url(self.index_url + requirement.project_name + '/')
if not self.package_pages.get(requirement.key):
# We couldn't find the target package, so search the index page too
self.not_found_in_index(requirement)
- for url in list(self.package_pages.get(requirement.key,())):
+ for url in list(self.package_pages.get(requirement.key, ())):
# scan each page that might be related to the desired package
self.scan_url(url)
@@ -462,7 +492,7 @@ class PackageIndex(Environment):
if dist in requirement:
return dist
self.debug("%s does not match %s", requirement, dist)
- return super(PackageIndex, self).obtain(requirement,installer)
+ return super(PackageIndex, self).obtain(requirement, installer)
def check_hash(self, checker, filename, tfp):
"""
@@ -527,27 +557,21 @@ class PackageIndex(Environment):
of `tmpdir`, and the local filename is returned. Various errors may be
raised if a problem occurs during downloading.
"""
- if not isinstance(spec,Requirement):
+ if not isinstance(spec, Requirement):
scheme = URL_SCHEME(spec)
if scheme:
# It's a url, download it to tmpdir
found = self._download_url(scheme.group(1), spec, tmpdir)
base, fragment = egg_info_for_url(spec)
if base.endswith('.py'):
- found = self.gen_setup(found,fragment,tmpdir)
+ found = self.gen_setup(found, fragment, tmpdir)
return found
elif os.path.exists(spec):
# Existing file or directory, just return it
return spec
else:
- try:
- spec = Requirement.parse(spec)
- except ValueError:
- raise DistutilsError(
- "Not a URL, existing file, or requirement spec: %r" %
- (spec,)
- )
- return getattr(self.fetch_distribution(spec, tmpdir),'location',None)
+ spec = parse_requirement_arg(spec)
+ return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
def fetch_distribution(
self, requirement, tmpdir, force_scan=False, source=False,
@@ -581,22 +605,24 @@ class PackageIndex(Environment):
for dist in env[req.key]:
- if dist.precedence==DEVELOP_DIST and not develop_ok:
+ if dist.precedence == DEVELOP_DIST and not develop_ok:
if dist not in skipped:
- self.warn("Skipping development or system egg: %s",dist)
+ self.warn("Skipping development or system egg: %s", dist)
skipped[dist] = 1
continue
- if dist in req and (dist.precedence<=SOURCE_DIST or not source):
- return dist
+ if dist in req and (dist.precedence <= SOURCE_DIST or not source):
+ dist.download_location = self.download(dist.location, tmpdir)
+ if os.path.exists(dist.download_location):
+ return dist
if force_scan:
self.prescan()
self.find_packages(requirement)
dist = find(requirement)
- if local_index is not None:
- dist = dist or find(requirement, local_index)
+ if not dist and local_index is not None:
+ dist = find(requirement, local_index)
if dist is None:
if self.to_scan is not None:
@@ -609,13 +635,13 @@ class PackageIndex(Environment):
if dist is None:
self.warn(
- "No local packages or download links found for %s%s",
+ "No local packages or working download links found for %s%s",
(source and "a source distribution of " or ""),
requirement,
)
else:
self.info("Best match: %s", dist)
- return dist.clone(location=self.download(dist.location, tmpdir))
+ return dist.clone(location=dist.download_location)
def fetch(self, requirement, tmpdir, force_scan=False, source=False):
"""Obtain a file suitable for fulfilling `requirement`
@@ -625,7 +651,7 @@ class PackageIndex(Environment):
``location`` of the downloaded distribution instead of a distribution
object.
"""
- dist = self.fetch_distribution(requirement,tmpdir,force_scan,source)
+ dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
if dist is not None:
return dist.location
return None
@@ -637,7 +663,7 @@ class PackageIndex(Environment):
interpret_distro_name(filename, match.group(1), None) if d.version
] or []
- if len(dists)==1: # unambiguous ``#egg`` fragment
+ if len(dists) == 1: # unambiguous ``#egg`` fragment
basename = os.path.basename(filename)
# Make sure the file has been downloaded to the temp dir.
@@ -646,7 +672,7 @@ class PackageIndex(Environment):
from setuptools.command.easy_install import samefile
if not samefile(filename, dst):
shutil.copy2(filename, dst)
- filename=dst
+ filename = dst
with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
file.write(
@@ -663,7 +689,7 @@ class PackageIndex(Environment):
raise DistutilsError(
"Can't unambiguously interpret project/version identifier %r; "
"any dashes in the name or version should be escaped using "
- "underscores. %r" % (fragment,dists)
+ "underscores. %r" % (fragment, dists)
)
else:
raise DistutilsError(
@@ -672,6 +698,7 @@ class PackageIndex(Environment):
)
dl_blocksize = 8192
+
def _download_to(self, url, filename):
self.info("Downloading %s", url)
# Download the file
@@ -681,7 +708,7 @@ class PackageIndex(Environment):
fp = self.open_url(strip_fragment(url))
if isinstance(fp, urllib.error.HTTPError):
raise DistutilsError(
- "Can't download %s: %s %s" % (url, fp.code,fp.msg)
+ "Can't download %s: %s %s" % (url, fp.code, fp.msg)
)
headers = fp.info()
blocknum = 0
@@ -692,7 +719,7 @@ class PackageIndex(Environment):
sizes = get_all_headers(headers, 'Content-Length')
size = max(map(int, sizes))
self.reporthook(url, filename, blocknum, bs, size)
- with open(filename,'wb') as tfp:
+ with open(filename, 'wb') as tfp:
while True:
block = fp.read(bs)
if block:
@@ -705,7 +732,8 @@ class PackageIndex(Environment):
self.check_hash(checker, filename, tfp)
return headers
finally:
- if fp: fp.close()
+ if fp:
+ fp.close()
def reporthook(self, url, filename, blocknum, blksize, size):
pass # no-op
@@ -751,24 +779,24 @@ class PackageIndex(Environment):
name, fragment = egg_info_for_url(url)
if name:
while '..' in name:
- name = name.replace('..','.').replace('\\','_')
+ name = name.replace('..', '.').replace('\\', '_')
else:
name = "__downloaded__" # default if URL has no path contents
if name.endswith('.egg.zip'):
name = name[:-4] # strip the extra .zip before download
- filename = os.path.join(tmpdir,name)
+ filename = os.path.join(tmpdir, name)
# Download the file
#
- if scheme=='svn' or scheme.startswith('svn+'):
+ if scheme == 'svn' or scheme.startswith('svn+'):
return self._download_svn(url, filename)
- elif scheme=='git' or scheme.startswith('git+'):
+ elif scheme == 'git' or scheme.startswith('git+'):
return self._download_git(url, filename)
elif scheme.startswith('hg+'):
return self._download_hg(url, filename)
- elif scheme=='file':
+ elif scheme == 'file':
return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
else:
self.url_ok(url, True) # raises error if not allowed
@@ -779,7 +807,7 @@ class PackageIndex(Environment):
def _attempt_download(self, url, filename):
headers = self._download_to(url, filename)
- if 'html' in headers.get('content-type','').lower():
+ if 'html' in headers.get('content-type', '').lower():
return self._download_html(url, headers, filename)
else:
return filename
@@ -797,22 +825,22 @@ class PackageIndex(Environment):
break # not an index page
file.close()
os.unlink(filename)
- raise DistutilsError("Unexpected HTML page found at "+url)
+ raise DistutilsError("Unexpected HTML page found at " + url)
def _download_svn(self, url, filename):
- url = url.split('#',1)[0] # remove any fragment for svn's sake
+ url = url.split('#', 1)[0] # remove any fragment for svn's sake
creds = ''
if url.lower().startswith('svn:') and '@' in url:
scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
if not netloc and path.startswith('//') and '/' in path[2:]:
- netloc, path = path[2:].split('/',1)
+ netloc, path = path[2:].split('/', 1)
auth, host = splituser(netloc)
if auth:
if ':' in auth:
- user, pw = auth.split(':',1)
+ user, pw = auth.split(':', 1)
creds = " --username=%s --password=%s" % (user, pw)
else:
- creds = " --username="+auth
+ creds = " --username=" + auth
netloc = host
parts = scheme, netloc, url, p, q, f
url = urllib.parse.urlunparse(parts)
@@ -827,7 +855,7 @@ class PackageIndex(Environment):
scheme = scheme.split('+', 1)[-1]
# Some fragment identification fails
- path = path.split('#',1)[0]
+ path = path.split('#', 1)[0]
rev = None
if '@' in path:
@@ -839,7 +867,7 @@ class PackageIndex(Environment):
return url, rev
def _download_git(self, url, filename):
- filename = filename.split('#',1)[0]
+ filename = filename.split('#', 1)[0]
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
self.info("Doing git clone from %s to %s", url, filename)
@@ -855,7 +883,7 @@ class PackageIndex(Environment):
return filename
def _download_hg(self, url, filename):
- filename = filename.split('#',1)[0]
+ filename = filename.split('#', 1)[0]
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
self.info("Doing hg clone from %s to %s", url, filename)
@@ -879,16 +907,20 @@ class PackageIndex(Environment):
def warn(self, msg, *args):
log.warn(msg, *args)
+
# This pattern matches a character entity reference (a decimal numeric
# references, a hexadecimal numeric reference, or a named reference).
entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
+
def uchr(c):
if not isinstance(c, int):
return c
- if c>255: return six.unichr(c)
+ if c > 255:
+ return six.unichr(c)
return chr(c)
+
def decode_entity(match):
what = match.group(1)
if what.startswith('#x'):
@@ -899,10 +931,12 @@ def decode_entity(match):
what = six.moves.html_entities.name2codepoint.get(what, match.group(0))
return uchr(what)
+
def htmldecode(text):
"""Decode HTML entities in the given text."""
return entity_sub(decode_entity, text)
+
def socket_timeout(timeout=15):
def _socket_timeout(func):
def _socket_timeout(*args, **kwargs):
@@ -915,6 +949,7 @@ def socket_timeout(timeout=15):
return _socket_timeout
return _socket_timeout
+
def _encode_auth(auth):
"""
A function compatible with Python 2.3-3.3 that will encode
@@ -935,12 +970,14 @@ def _encode_auth(auth):
# convert back to a string
encoded = encoded_bytes.decode()
# strip the trailing carriage return
- return encoded.replace('\n','')
+ return encoded.replace('\n', '')
+
class Credential(object):
"""
A username/password pair. Use like a namedtuple.
"""
+
def __init__(self, username, password):
self.username = username
self.password = password
@@ -952,6 +989,7 @@ class Credential(object):
def __str__(self):
return '%(username)s:%(password)s' % vars(self)
+
class PyPIConfig(configparser.RawConfigParser):
def __init__(self):
@@ -1011,7 +1049,7 @@ def open_with_auth(url, opener=urllib.request.urlopen):
if cred:
auth = str(cred)
info = cred.username, url
- log.info('Authenticating as %s for %s (from .pypirc)' % info)
+ log.info('Authenticating as %s for %s (from .pypirc)', *info)
if auth:
auth = "Basic " + _encode_auth(auth)
@@ -1029,12 +1067,13 @@ def open_with_auth(url, opener=urllib.request.urlopen):
# Put authentication info back into request URL if same host,
# so that links found on the page will work
s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
- if s2==scheme and h2==host:
+ if s2 == scheme and h2 == host:
parts = s2, netloc, path2, param2, query2, frag2
fp.url = urllib.parse.urlunparse(parts)
return fp
+
# adding a timeout to avoid freezing package_index
open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
@@ -1042,6 +1081,7 @@ open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
def fix_sf_url(url):
return url # backward compatibility
+
def local_open(url):
"""Read a local path, with special support for directories"""
scheme, server, path, param, query, frag = urllib.parse.urlparse(url)