From 896fd3d8c7eec74766560559f095fe92e49ecfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jurko=20Gospodneti=C4=87?= Date: Tue, 15 Apr 2014 17:31:55 +0200 Subject: quick-fix #168: avoid using stale cached zipped egg dist info in easy_install When installing a zipped egg, into a Python environment with a same named zipped egg already installed, the installation can fail with a zipimport.ZipImportError complaining about a 'bad local header' encountered in the new zip archive. This can occur if setuptools loads the original egg for some reason and the two zip archives have different content. Then if Python attempts to read a file from the new archive, it will expect it in a location pointed to by the original archive's directory. This will report an error if zipimport does not encounter the expected local file start header in the given location. The mismatch between the two archives can be reproduced by installing the same setuptools version (prior to this commit and after commit f40b810acc5f6494735c912a625d647dc2a3c582 that first introduced the requires.txt metadata information file into the setuptools project) twice from its sources - which can randomly fail due to the scenario described above. That will package the zipped egg archive twice, with each of the archives containing slightly different Python modules. In case this causes any of the compressed modules to have different size (easy_install.pyc is often the culprit here), then attempting to read any later file in the zip archive will fail (requires.txt package metadata file is often the culprit here). A similar scenario can be reproduced more consistently by manually modifying the setuptools easy_install.py source file before building the new egg, e.g. by adding some additional empty lines to its start. The underlying reason for this problem is setuptools using zipimporter instances with cached zip archive content directory information from the older zip archive, even after the old archive has been replaced. This patch cleans up only one such old zipimporter instance - one referenced via easy_install command's local_index attribute. That is the one that has been causing all the currently reported/reproduced installation failures. A clean solution needs to make certain there are no more zipimporter instances with stale archive content directory caches left behind after replacing a zipped egg archive with another. There are currently at least the following known potential sources for such stale zipimporter instances (all holding references to Distribution instances that can then hold a reference to a zipimporter related to their zipped egg archive): easy_install command attributes: local_index (Environment with a list of Distributions) package_index (PackageIndex with a list of Distributions) pth_file (PthDistributions with a list of Distributions) global pkg_resources.working_set object (holds a list of Distributions) imported module's __loader__ attribute (zipimporter instance) zipimport._zip_directory_cache sys.path_importer_cache Further debugging & development note: A complete list of all the currently active stale zipimporter instances can be read using CPython's gc module and its object reference introspection functionality (gc.get_objects() & gc.get_referrers()) from inside the uncache_zipdir() method in the setuptools easy_install.py module. That is the method called just after the old arhive has been replaced by the new one and all the stale zipimporter instances were supposed to have been released. --HG-- extra : rebase_source : 041d2819881b8f7e5c4da333a387fc86d4f7b791 --- setuptools/command/easy_install.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'setuptools/command/easy_install.py') diff --git a/setuptools/command/easy_install.py b/setuptools/command/easy_install.py index de139f2f..10176874 100755 --- a/setuptools/command/easy_install.py +++ b/setuptools/command/easy_install.py @@ -646,6 +646,15 @@ Please make the appropriate changes for your system and try again. def process_distribution(self, requirement, dist, deps=True, *info): self.update_pth(dist) self.package_index.add(dist) + # First remove the dist from self.local_index, to avoid problems using + # old cached data in case its underlying file has been replaced. + # + # This is a quick-fix for a zipimporter caching issue in case the dist + # has been implemented as and already loaded from a zip file that got + # replaced later on. For more detailed information see setuptools issue + # #168 at 'http://bitbucket.org/pypa/setuptools/issue/168'. + if dist in self.local_index[dist.key]: + self.local_index.remove(dist) self.local_index.add(dist) self.install_egg_scripts(dist) self.installed_projects[dist.key] = dist -- cgit v1.2.1 From cb4b1a9e751b10d63d91197934d1d8f8fff44be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jurko=20Gospodneti=C4=87?= Date: Tue, 15 Apr 2014 23:27:23 +0200 Subject: clean up easy_install.uncache_zipdir() code & comments --HG-- extra : rebase_source : 79778a670897cb92c17307f2535fcac6447e16b4 --- setuptools/command/easy_install.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'setuptools/command/easy_install.py') diff --git a/setuptools/command/easy_install.py b/setuptools/command/easy_install.py index 10176874..8c281590 100755 --- a/setuptools/command/easy_install.py +++ b/setuptools/command/easy_install.py @@ -1583,18 +1583,30 @@ def auto_chmod(func, arg, exc): reraise(et, (ev[0], ev[1] + (" %s %s" % (func,arg)))) def uncache_zipdir(path): - """Ensure that the importer caches dont have stale info for `path`""" - from zipimport import _zip_directory_cache as zdc - _uncache(path, zdc) + """ + Remove any globally cached zip file related data for `path` + + Stale zipimport.zipimporter objects need to be removed when a zip file is + replaced as they contain cached zip file directory information. If they are + asked to get data from their zip file, they will use that cached + information to calculate the data location in the zip file. This calculated + location may be incorrect for the replaced zip file, which may in turn + cause the read operation to either fail or return incorrect data. + + Note we have no way to clear any local caches from here. That is left up to + whomever is in charge of maintaining that cache. + + """ + _uncache(path, zipimport._zip_directory_cache) _uncache(path, sys.path_importer_cache) def _uncache(path, cache): if path in cache: del cache[path] else: - path = normalize_path(path) + normalized_path = normalize_path(path) for p in cache: - if normalize_path(p)==path: + if normalize_path(p) == normalized_path: del cache[p] return -- cgit v1.2.1 From 92cbda9859d98267e36c836ca954ca884df9b07c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jurko=20Gospodneti=C4=87?= Date: Tue, 15 Apr 2014 23:35:44 +0200 Subject: make easy_install.uncache_zipdir() remove more stale zipimporter instances Since paths are case-insensitive on Windows, zipped egg modules may be loaded using different but equivalent paths. Importing each such different path causes a new zipimporter to be instantiated. Removing cached zipimporter instances must then not forget about removing those created for differently spelled paths to the same replaced egg. Other missed zipimporter instances are those used to access zipped eggs stored inside zipped eggs. When clearing zipimporter instances got a given path, we need to clear all the instances related to any of its subpaths as well. --HG-- extra : rebase_source : 86aeadd1e639fbc83d27a0c551fdc2b8a68a6f85 --- setuptools/command/easy_install.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'setuptools/command/easy_install.py') diff --git a/setuptools/command/easy_install.py b/setuptools/command/easy_install.py index 8c281590..d4bb2b90 100755 --- a/setuptools/command/easy_install.py +++ b/setuptools/command/easy_install.py @@ -1597,18 +1597,20 @@ def uncache_zipdir(path): whomever is in charge of maintaining that cache. """ - _uncache(path, zipimport._zip_directory_cache) - _uncache(path, sys.path_importer_cache) - -def _uncache(path, cache): - if path in cache: - del cache[path] - else: - normalized_path = normalize_path(path) - for p in cache: - if normalize_path(p) == normalized_path: - del cache[p] - return + normalized_path = normalize_path(path) + _uncache(normalized_path, zipimport._zip_directory_cache) + _uncache(normalized_path, sys.path_importer_cache) + +def _uncache(normalized_path, cache): + to_remove = [] + prefix_len = len(normalized_path) + for p in cache: + np = normalize_path(p) + if (np.startswith(normalized_path) and + np[prefix_len:prefix_len + 1] in (os.sep, '')): + to_remove.append(p) + for p in to_remove: + del cache[p] def is_python(text, filename=''): "Is this string a valid Python script?" -- cgit v1.2.1