summaryrefslogtreecommitdiff
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
Diffstat (limited to 'setuptools/package_index.py')
-rwxr-xr-xsetuptools/package_index.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index 0ee21e3b..4393c83a 100755
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -139,20 +139,26 @@ REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
def find_external_links(url, page):
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
+ seen = set()
for match in REL.finditer(page):
tag, rel = match.groups()
rels = map(str.strip, rel.lower().split(','))
if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag):
- yield urlparse.urljoin(url, htmldecode(match.group(1)))
+ url = urlparse.urljoin(url, htmldecode(match.group(1)))
+ if not url in seen:
+ yield url
for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag)
if pos!=-1:
match = HREF.search(page,pos)
if match:
- yield urlparse.urljoin(url, htmldecode(match.group(1)))
+ url = urlparse.urljoin(url, htmldecode(match.group(1)))
+ if not url in seen:
+ yield url
+
user_agent = "Python-urllib/%s distribute/%s" % (
sys.version[:3], require('distribute')[0].version