From 37e2f6f5395586e12eb57a4b6fd598b29744de0c Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Tue, 9 Jul 2013 22:30:25 -0400 Subject: Enable using any guarenteed hash as a hash function --HG-- extra : rebase_source : 1c5040c4a89dfcd4ec8cf2ad64825d5bc73ebe30 --- setuptools/package_index.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'setuptools/package_index.py') diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 61a66c6d..e29a142c 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -11,11 +11,8 @@ from setuptools.compat import (urllib2, httplib, StringIO, HTTPError, url2pathname, name2codepoint, unichr, urljoin) from setuptools.compat import filterfalse -try: - from hashlib import md5 -except ImportError: - from md5 import md5 from fnmatch import translate +from setuptools.py24compat import hashlib from setuptools.py24compat import wraps from setuptools.py27compat import get_all_headers @@ -28,6 +25,7 @@ PYPI_MD5 = re.compile( ) URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() +_HASH_RE = re.compile(r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)') __all__ = [ 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst', @@ -387,15 +385,19 @@ class PackageIndex(Environment): - def check_md5(self, cs, info, filename, tfp): - if re.match('md5=[0-9a-f]{32}$', info): - self.debug("Validating md5 checksum for %s", filename) - if cs.hexdigest() != info[4:]: + def check_hash(self, cs, info, filename, tfp): + match = _HASH_RE.search(info) + if match: + hash_name = match.group(1) + hash_data = match.group(2) + self.debug("Validating %s checksum for %s", hash_name, filename) + if cs.hexdigest() != hash_data: tfp.close() os.unlink(filename) raise DistutilsError( - "MD5 validation failed for "+os.path.basename(filename)+ - "; possible download problem?" + "%s validation failed for %s; " + "possible download problem?" % ( + hash_name, os.path.basename(filename)) ) def add_find_links(self, urls): @@ -598,16 +600,19 @@ class PackageIndex(Environment): def _download_to(self, url, filename): self.info("Downloading %s", url) # Download the file - fp, tfp, info = None, None, None + fp, tfp, cs, info = None, None, None, None try: if '#' in url: url, info = url.split('#', 1) + hmatch = _HASH_RE.search(info) + hash_name = hmatch.group(1) + hash_data = hmatch.group(2) + cs = hashlib.new(hash_name) fp = self.open_url(url) if isinstance(fp, HTTPError): raise DistutilsError( "Can't download %s: %s %s" % (url, fp.code,fp.msg) ) - cs = md5() headers = fp.info() blocknum = 0 bs = self.dl_blocksize @@ -621,13 +626,14 @@ class PackageIndex(Environment): while True: block = fp.read(bs) if block: - cs.update(block) + if cs is not None: + cs.update(block) tfp.write(block) blocknum += 1 self.reporthook(url, filename, blocknum, bs, size) else: break - if info: self.check_md5(cs, info, filename, tfp) + if info: self.check_hash(cs, info, filename, tfp) return headers finally: if fp: fp.close() -- cgit v1.2.1 From 95bf90f3dae23f3d672450d94adfbcc55f9e252a Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 15 Jul 2013 12:06:54 -0400 Subject: Extracted hash-checking functionality into its own classes. Hashes are no longer checked when the proper pattern isn't detected. Fixes #42. --- setuptools/package_index.py | 99 +++++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 26 deletions(-) (limited to 'setuptools/package_index.py') diff --git a/setuptools/package_index.py b/setuptools/package_index.py index e29a142c..4c4a647d 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -25,7 +25,6 @@ PYPI_MD5 = re.compile( ) URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() -_HASH_RE = re.compile(r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)') __all__ = [ 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst', @@ -193,6 +192,61 @@ user_agent = "Python-urllib/%s setuptools/%s" % ( sys.version[:3], require('setuptools')[0].version ) +class ContentChecker(object): + """ + A null content checker that defines the interface for checking content + """ + def feed(self, block): + """ + Feed a block of data to the hash. + """ + return + + def check(self): + """ + Check the hash. Return False if validation fails. + """ + return True + + def report(self, reporter, template): + """ + Call reporter with information about the checker (hash name) + substituted into the template. + """ + return + +class HashChecker(ContentChecker): + pattern = re.compile( + r'(?Psha1|sha224|sha384|sha256|sha512|md5)=' + r'(?P[a-f0-9]+)' + ) + + def __init__(self, hash_name, expected): + self.hash = hashlib.new(hash_name) + self.expected = expected + + @classmethod + def from_url(cls, url): + "Construct a (possibly null) ContentChecker from a URL" + fragment = urlparse(url)[-1] + if not fragment: + return ContentChecker() + match = cls.pattern.search(fragment) + if not match: + return ContentChecker() + return cls(**match.groupdict()) + + def feed(self, block): + self.hash.update(block) + + def check(self): + return self.hash.hexdigest() == self.expected + + def report(self, reporter, template): + msg = template % self.hash.name + return reporter(msg) + + class PackageIndex(Environment): """A distribution index that scans web pages for download URLs""" @@ -385,20 +439,20 @@ class PackageIndex(Environment): - def check_hash(self, cs, info, filename, tfp): - match = _HASH_RE.search(info) - if match: - hash_name = match.group(1) - hash_data = match.group(2) - self.debug("Validating %s checksum for %s", hash_name, filename) - if cs.hexdigest() != hash_data: - tfp.close() - os.unlink(filename) - raise DistutilsError( - "%s validation failed for %s; " - "possible download problem?" % ( - hash_name, os.path.basename(filename)) - ) + def check_hash(self, checker, filename, tfp): + """ + checker is a ContentChecker + """ + checker.report(self.debug, + "Validating %%s checksum for %s" % filename) + if not checker.valid(): + tfp.close() + os.unlink(filename) + raise DistutilsError( + "%s validation failed for %s; " + "possible download problem?" % ( + checker.hash.name, os.path.basename(filename)) + ) def add_find_links(self, urls): """Add `urls` to the list that will be prescanned for searches""" @@ -600,14 +654,9 @@ class PackageIndex(Environment): def _download_to(self, url, filename): self.info("Downloading %s", url) # Download the file - fp, tfp, cs, info = None, None, None, None + fp, tfp, info = None, None, None try: - if '#' in url: - url, info = url.split('#', 1) - hmatch = _HASH_RE.search(info) - hash_name = hmatch.group(1) - hash_data = hmatch.group(2) - cs = hashlib.new(hash_name) + checker = HashChecker.from_url(url) fp = self.open_url(url) if isinstance(fp, HTTPError): raise DistutilsError( @@ -626,14 +675,13 @@ class PackageIndex(Environment): while True: block = fp.read(bs) if block: - if cs is not None: - cs.update(block) + checker.feed(block) tfp.write(block) blocknum += 1 self.reporthook(url, filename, blocknum, bs, size) else: break - if info: self.check_hash(cs, info, filename, tfp) + self.check_hash(checker, filename, tfp) return headers finally: if fp: fp.close() @@ -642,7 +690,6 @@ class PackageIndex(Environment): def reporthook(self, url, filename, blocknum, blksize, size): pass # no-op - def open_url(self, url, warning=None): if url.startswith('file:'): return local_open(url) -- cgit v1.2.1 From 915f05b7445af2c51b63d22e429d9a7397221518 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 15 Jul 2013 13:31:00 -0400 Subject: Use 'is_valid' instead of simply 'valid' or 'check', which are less clear about the purpose of the method. Fixes AttributeError introduces in 0.9.2. Fixes #42. --- setuptools/package_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'setuptools/package_index.py') diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 4c4a647d..70aabd1b 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -202,7 +202,7 @@ class ContentChecker(object): """ return - def check(self): + def is_valid(self): """ Check the hash. Return False if validation fails. """ @@ -239,7 +239,7 @@ class HashChecker(ContentChecker): def feed(self, block): self.hash.update(block) - def check(self): + def is_valid(self): return self.hash.hexdigest() == self.expected def report(self, reporter, template): @@ -445,7 +445,7 @@ class PackageIndex(Environment): """ checker.report(self.debug, "Validating %%s checksum for %s" % filename) - if not checker.valid(): + if not checker.is_valid(): tfp.close() os.unlink(filename) raise DistutilsError( -- cgit v1.2.1 From 3e72e7f7eacca7db638f7230f93cf696d49c77bf Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 17 Jul 2013 11:28:44 -0400 Subject: Add compatibility for Python 2.4 when querying the hash name. Fixes #44 --- setuptools/package_index.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'setuptools/package_index.py') diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 70aabd1b..47f00c00 100755 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -242,8 +242,23 @@ class HashChecker(ContentChecker): def is_valid(self): return self.hash.hexdigest() == self.expected + def _get_hash_name(self): + """ + Python 2.4 implementation of MD5 doesn't supply a .name attribute + so provide that name. + + When Python 2.4 is no longer required, replace invocations of this + method with simply 'self.hash.name'. + """ + try: + return self.hash.name + except AttributeError: + if 'md5' in str(type(self.hash)): + return 'md5' + raise + def report(self, reporter, template): - msg = template % self.hash.name + msg = template % self._get_hash_name() return reporter(msg) -- cgit v1.2.1