summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSviatoslav Sydorenko <wk@sydorenko.org.ua>2020-12-31 18:04:46 +0100
committerSviatoslav Sydorenko <wk@sydorenko.org.ua>2020-12-31 18:38:49 +0100
commitc225c4c0f64bf044f2f82693df097ad07f9c12bd (patch)
tree689cc4f2aa36bdb781b1f2acb07fb915bc6cadb9
parent699afd09f252025ff412c3be101d78576ce0fe60 (diff)
downloadpython-setuptools-git-c225c4c0f64bf044f2f82693df097ad07f9c12bd.tar.gz
Simplify `PackageIndex.process_index`
-rw-r--r--setuptools/package_index.py68
1 files changed, 36 insertions, 32 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 3979b131..713391af 100644
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -428,49 +428,53 @@ class PackageIndex(Environment):
dist.precedence = SOURCE_DIST
self.add(dist)
+ def _scan(self, link):
+ # Process a URL to see if it's for a package page
+ NO_MATCH_SENTINEL = None, None
+ if not link.startswith(self.index_url):
+ return NO_MATCH_SENTINEL
+
+ parts = list(map(
+ urllib.parse.unquote, link[len(self.index_url):].split('/')
+ ))
+ if len(parts) != 2 or '#' in parts[1]:
+ return NO_MATCH_SENTINEL
+
+ # it's a package page, sanitize and index it
+ pkg = safe_name(parts[0])
+ ver = safe_version(parts[1])
+ self.package_pages.setdefault(pkg.lower(), {})[link] = True
+ return to_filename(pkg), to_filename(ver)
+
def process_index(self, url, page):
"""Process the contents of a PyPI page"""
- def scan(link):
- # Process a URL to see if it's for a package page
- if link.startswith(self.index_url):
- parts = list(map(
- urllib.parse.unquote, link[len(self.index_url):].split('/')
- ))
- if len(parts) == 2 and '#' not in parts[1]:
- # it's a package page, sanitize and index it
- pkg = safe_name(parts[0])
- ver = safe_version(parts[1])
- self.package_pages.setdefault(pkg.lower(), {})[link] = True
- return to_filename(pkg), to_filename(ver)
- return None, None
-
# process an index page into the package-page index
for match in HREF.finditer(page):
try:
- scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
+ self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
except ValueError:
pass
- pkg, ver = scan(url) # ensure this page is in the page index
- if pkg:
- # process individual package page
- for new_url in find_external_links(url, page):
- # Process the found URL
- base, frag = egg_info_for_url(new_url)
- if base.endswith('.py') and not frag:
- if ver:
- new_url += '#egg=%s-%s' % (pkg, ver)
- else:
- self.need_version_info(url)
- self.scan_url(new_url)
-
- return PYPI_MD5.sub(
- lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
- )
- else:
+ pkg, ver = self._scan(url) # ensure this page is in the page index
+ if not pkg:
return "" # no sense double-scanning non-package pages
+ # process individual package page
+ for new_url in find_external_links(url, page):
+ # Process the found URL
+ base, frag = egg_info_for_url(new_url)
+ if base.endswith('.py') and not frag:
+ if ver:
+ new_url += '#egg=%s-%s' % (pkg, ver)
+ else:
+ self.need_version_info(url)
+ self.scan_url(new_url)
+
+ return PYPI_MD5.sub(
+ lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
+ )
+
def need_version_info(self, url):
self.scan_all(
"Page at %s links to .py file(s) without version info; an index "