diff options
author | Sviatoslav Sydorenko <wk@sydorenko.org.ua> | 2020-12-31 18:04:46 +0100 |
---|---|---|
committer | Sviatoslav Sydorenko <wk@sydorenko.org.ua> | 2020-12-31 18:38:49 +0100 |
commit | c225c4c0f64bf044f2f82693df097ad07f9c12bd (patch) | |
tree | 689cc4f2aa36bdb781b1f2acb07fb915bc6cadb9 | |
parent | 699afd09f252025ff412c3be101d78576ce0fe60 (diff) | |
download | python-setuptools-git-c225c4c0f64bf044f2f82693df097ad07f9c12bd.tar.gz |
Simplify `PackageIndex.process_index`
-rw-r--r-- | setuptools/package_index.py | 68 |
1 files changed, 36 insertions, 32 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 3979b131..713391af 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -428,49 +428,53 @@ class PackageIndex(Environment): dist.precedence = SOURCE_DIST self.add(dist) + def _scan(self, link): + # Process a URL to see if it's for a package page + NO_MATCH_SENTINEL = None, None + if not link.startswith(self.index_url): + return NO_MATCH_SENTINEL + + parts = list(map( + urllib.parse.unquote, link[len(self.index_url):].split('/') + )) + if len(parts) != 2 or '#' in parts[1]: + return NO_MATCH_SENTINEL + + # it's a package page, sanitize and index it + pkg = safe_name(parts[0]) + ver = safe_version(parts[1]) + self.package_pages.setdefault(pkg.lower(), {})[link] = True + return to_filename(pkg), to_filename(ver) + def process_index(self, url, page): """Process the contents of a PyPI page""" - def scan(link): - # Process a URL to see if it's for a package page - if link.startswith(self.index_url): - parts = list(map( - urllib.parse.unquote, link[len(self.index_url):].split('/') - )) - if len(parts) == 2 and '#' not in parts[1]: - # it's a package page, sanitize and index it - pkg = safe_name(parts[0]) - ver = safe_version(parts[1]) - self.package_pages.setdefault(pkg.lower(), {})[link] = True - return to_filename(pkg), to_filename(ver) - return None, None - # process an index page into the package-page index for match in HREF.finditer(page): try: - scan(urllib.parse.urljoin(url, htmldecode(match.group(1)))) + self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1)))) except ValueError: pass - pkg, ver = scan(url) # ensure this page is in the page index - if pkg: - # process individual package page - for new_url in find_external_links(url, page): - # Process the found URL - base, frag = egg_info_for_url(new_url) - if base.endswith('.py') and not frag: - if ver: - new_url += '#egg=%s-%s' % (pkg, ver) - else: - self.need_version_info(url) - self.scan_url(new_url) - - return PYPI_MD5.sub( - lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page - ) - else: + pkg, ver = self._scan(url) # ensure this page is in the page index + if not pkg: return "" # no sense double-scanning non-package pages + # process individual package page + for new_url in find_external_links(url, page): + # Process the found URL + base, frag = egg_info_for_url(new_url) + if base.endswith('.py') and not frag: + if ver: + new_url += '#egg=%s-%s' % (pkg, ver) + else: + self.need_version_info(url) + self.scan_url(new_url) + + return PYPI_MD5.sub( + lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page + ) + def need_version_info(self, url): self.scan_all( "Page at %s links to .py file(s) without version info; an index " |