Simplify `PackageIndex.process_index`

author: Sviatoslav Sydorenko <wk@sydorenko.org.ua> 2020-12-31 18:04:46 +0100
committer: Sviatoslav Sydorenko <wk@sydorenko.org.ua> 2020-12-31 18:38:49 +0100
commit: c225c4c0f64bf044f2f82693df097ad07f9c12bd (patch)
tree: 689cc4f2aa36bdb781b1f2acb07fb915bc6cadb9
parent: 699afd09f252025ff412c3be101d78576ce0fe60 (diff)
download: python-setuptools-git-c225c4c0f64bf044f2f82693df097ad07f9c12bd.tar.gz
1 files changed, 36 insertions, 32 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 3979b131..713391af 100644
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -428,49 +428,53 @@ class PackageIndex(Environment):
             dist.precedence = SOURCE_DIST
             self.add(dist)
 
+    def _scan(self, link):
+        # Process a URL to see if it's for a package page
+        NO_MATCH_SENTINEL = None, None
+        if not link.startswith(self.index_url):
+            return NO_MATCH_SENTINEL
+
+        parts = list(map(
+            urllib.parse.unquote, link[len(self.index_url):].split('/')
+        ))
+        if len(parts) != 2 or '#' in parts[1]:
+            return NO_MATCH_SENTINEL
+
+        # it's a package page, sanitize and index it
+        pkg = safe_name(parts[0])
+        ver = safe_version(parts[1])
+        self.package_pages.setdefault(pkg.lower(), {})[link] = True
+        return to_filename(pkg), to_filename(ver)
+
     def process_index(self, url, page):
         """Process the contents of a PyPI page"""
 
-        def scan(link):
-            # Process a URL to see if it's for a package page
-            if link.startswith(self.index_url):
-                parts = list(map(
-                    urllib.parse.unquote, link[len(self.index_url):].split('/')
-                ))
-                if len(parts) == 2 and '#' not in parts[1]:
-                    # it's a package page, sanitize and index it
-                    pkg = safe_name(parts[0])
-                    ver = safe_version(parts[1])
-                    self.package_pages.setdefault(pkg.lower(), {})[link] = True
-                    return to_filename(pkg), to_filename(ver)
-            return None, None
-
         # process an index page into the package-page index
         for match in HREF.finditer(page):
             try:
-                scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
+                self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
             except ValueError:
                 pass
 
-        pkg, ver = scan(url)  # ensure this page is in the page index
-        if pkg:
-            # process individual package page
-            for new_url in find_external_links(url, page):
-                # Process the found URL
-                base, frag = egg_info_for_url(new_url)
-                if base.endswith('.py') and not frag:
-                    if ver:
-                        new_url += '#egg=%s-%s' % (pkg, ver)
-                    else:
-                        self.need_version_info(url)
-                self.scan_url(new_url)
-
-            return PYPI_MD5.sub(
-                lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
-            )
-        else:
+        pkg, ver = self._scan(url)  # ensure this page is in the page index
+        if not pkg:
             return ""  # no sense double-scanning non-package pages
 
+        # process individual package page
+        for new_url in find_external_links(url, page):
+            # Process the found URL
+            base, frag = egg_info_for_url(new_url)
+            if base.endswith('.py') and not frag:
+                if ver:
+                    new_url += '#egg=%s-%s' % (pkg, ver)
+                else:
+                    self.need_version_info(url)
+            self.scan_url(new_url)
+
+        return PYPI_MD5.sub(
+            lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
+        )
+
     def need_version_info(self, url):
         self.scan_all(
             "Page at %s links to .py file(s) without version info; an index "
author	Sviatoslav Sydorenko <wk@sydorenko.org.ua>	2020-12-31 18:04:46 +0100
committer	Sviatoslav Sydorenko <wk@sydorenko.org.ua>	2020-12-31 18:38:49 +0100
commit	c225c4c0f64bf044f2f82693df097ad07f9c12bd (patch)
tree	689cc4f2aa36bdb781b1f2acb07fb915bc6cadb9
parent	699afd09f252025ff412c3be101d78576ce0fe60 (diff)
download	python-setuptools-git-c225c4c0f64bf044f2f82693df097ad07f9c12bd.tar.gz