summaryrefslogtreecommitdiff
path: root/coverage/files.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/files.py')
-rw-r--r--coverage/files.py193
1 files changed, 177 insertions, 16 deletions
diff --git a/coverage/files.py b/coverage/files.py
index 9a8ac564..40af7bf7 100644
--- a/coverage/files.py
+++ b/coverage/files.py
@@ -1,22 +1,20 @@
"""File wrangling."""
-import fnmatch, os, sys
+from coverage.backward import to_string
+from coverage.misc import CoverageException
+import fnmatch, os, os.path, re, sys
class FileLocator(object):
"""Understand how filenames work."""
def __init__(self):
# The absolute path to our current directory.
- self.relative_dir = self.abs_file(os.curdir) + os.sep
+ self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep)
# Cache of results of calling the canonical_filename() method, to
# avoid duplicating work.
self.canonical_filename_cache = {}
- def abs_file(self, filename):
- """Return the absolute normalized form of `filename`."""
- return os.path.normcase(os.path.abspath(os.path.realpath(filename)))
-
def relative_filename(self, filename):
"""Return the relative form of `filename`.
@@ -24,8 +22,9 @@ class FileLocator(object):
`FileLocator` was constructed.
"""
- if filename.startswith(self.relative_dir):
- filename = filename.replace(self.relative_dir, "")
+ fnorm = os.path.normcase(filename)
+ if fnorm.startswith(self.relative_dir):
+ filename = filename[len(self.relative_dir):]
return filename
def canonical_filename(self, filename):
@@ -47,7 +46,7 @@ class FileLocator(object):
if os.path.exists(g):
f = g
break
- cf = self.abs_file(f)
+ cf = abs_file(f)
self.canonical_filename_cache[filename] = cf
return self.canonical_filename_cache[filename]
@@ -72,12 +71,76 @@ class FileLocator(object):
data = zi.get_data(parts[1])
except IOError:
continue
- if sys.version_info >= (3, 0):
- data = data.decode('utf8') # TODO: How to do this properly?
- return data
+ return to_string(data)
return None
+if sys.platform == 'win32':
+
+ def actual_path(path):
+ """Get the actual path of `path`, including the correct case."""
+ if path in actual_path.cache:
+ return actual_path.cache[path]
+
+ head, tail = os.path.split(path)
+ if not tail:
+ actpath = head
+ elif not head:
+ actpath = tail
+ else:
+ head = actual_path(head)
+ if head in actual_path.list_cache:
+ files = actual_path.list_cache[head]
+ else:
+ try:
+ files = os.listdir(head)
+ except OSError:
+ files = []
+ actual_path.list_cache[head] = files
+ normtail = os.path.normcase(tail)
+ for f in files:
+ if os.path.normcase(f) == normtail:
+ tail = f
+ break
+ actpath = os.path.join(head, tail)
+ actual_path.cache[path] = actpath
+ return actpath
+
+ actual_path.cache = {}
+ actual_path.list_cache = {}
+
+else:
+ def actual_path(filename):
+ """The actual path for non-Windows platforms."""
+ return filename
+
+def abs_file(filename):
+ """Return the absolute normalized form of `filename`."""
+ path = os.path.abspath(os.path.realpath(filename))
+ path = actual_path(path)
+ return path
+
+
+def prep_patterns(patterns):
+ """Prepare the file patterns for use in a `FnmatchMatcher`.
+
+ If a pattern starts with a wildcard, it is used as a pattern
+ as-is. If it does not start with a wildcard, then it is made
+ absolute with the current directory.
+
+ If `patterns` is None, an empty list is returned.
+
+ """
+ patterns = patterns or []
+ prepped = []
+ for p in patterns or []:
+ if p.startswith("*") or p.startswith("?"):
+ prepped.append(p)
+ else:
+ prepped.append(abs_file(p))
+ return prepped
+
+
class TreeMatcher(object):
"""A matcher for files in a tree."""
def __init__(self, directories):
@@ -119,14 +182,112 @@ class FnmatchMatcher(object):
return False
+def sep(s):
+ """Find the path separator used in this string, or os.sep if none."""
+ sep_match = re.search(r"[\\/]", s)
+ if sep_match:
+ the_sep = sep_match.group(0)
+ else:
+ the_sep = os.sep
+ return the_sep
+
+
+class PathAliases(object):
+ """A collection of aliases for paths.
+
+ When combining data files from remote machines, often the paths to source
+ code are different, for example, due to OS differences, or because of
+ serialized checkouts on continuous integration machines.
+
+ A `PathAliases` object tracks a list of pattern/result pairs, and can
+ map a path through those aliases to produce a unified path.
+
+ `locator` is a FileLocator that is used to canonicalize the results.
+
+ """
+ def __init__(self, locator=None):
+ self.aliases = []
+ self.locator = locator
+
+ def add(self, pattern, result):
+ """Add the `pattern`/`result` pair to the list of aliases.
+
+ `pattern` is an `fnmatch`-style pattern. `result` is a simple
+ string. When mapping paths, if a path starts with a match against
+ `pattern`, then that match is replaced with `result`. This models
+ isomorphic source trees being rooted at different places on two
+ different machines.
+
+ `pattern` can't end with a wildcard component, since that would
+ match an entire tree, and not just its root.
+
+ """
+ # The pattern can't end with a wildcard component.
+ pattern = pattern.rstrip(r"\/")
+ if pattern.endswith("*"):
+ raise CoverageException("Pattern must not end with wildcards.")
+ pattern_sep = sep(pattern)
+ pattern += pattern_sep
+
+ # Make a regex from the pattern. fnmatch always adds a \Z or $ to
+ # match the whole string, which we don't want.
+ regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
+ if regex_pat.endswith("$"):
+ regex_pat = regex_pat[:-1]
+ # We want */a/b.py to match on Windows to, so change slash to match
+ # either separator.
+ regex_pat = regex_pat.replace(r"\/", r"[\\/]")
+ # We want case-insensitive matching, so add that flag.
+ regex = re.compile(r"(?i)" + regex_pat)
+
+ # Normalize the result: it must end with a path separator.
+ result_sep = sep(result)
+ result = result.rstrip(r"\/") + result_sep
+ self.aliases.append((regex, result, pattern_sep, result_sep))
+
+ def map(self, path):
+ """Map `path` through the aliases.
+
+ `path` is checked against all of the patterns. The first pattern to
+ match is used to replace the root of the path with the result root.
+ Only one pattern is ever used. If no patterns match, `path` is
+ returned unchanged.
+
+ The separator style in the result is made to match that of the result
+ in the alias.
+
+ """
+ for regex, result, pattern_sep, result_sep in self.aliases:
+ m = regex.match(path)
+ if m:
+ new = path.replace(m.group(0), result)
+ if pattern_sep != result_sep:
+ new = new.replace(pattern_sep, result_sep)
+ if self.locator:
+ new = self.locator.canonical_filename(new)
+ return new
+ return path
+
+
def find_python_files(dirname):
- """Yield all of the importable Python files in `dirname`, recursively."""
- for dirpath, dirnames, filenames in os.walk(dirname, topdown=True):
- if '__init__.py' not in filenames:
+ """Yield all of the importable Python files in `dirname`, recursively.
+
+ To be importable, the files have to be in a directory with a __init__.py,
+ except for `dirname` itself, which isn't required to have one. The
+ assumption is that `dirname` was specified directly, so the user knows
+ best, but subdirectories are checked for a __init__.py to be sure we only
+ find the importable files.
+
+ """
+ for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
+ if i > 0 and '__init__.py' not in filenames:
# If a directory doesn't have __init__.py, then it isn't
# importable and neither are its files
del dirnames[:]
continue
for filename in filenames:
- if fnmatch.fnmatch(filename, "*.py"):
+ # We're only interested in files that look like reasonable Python
+ # files: Must end with .py, and must not have certain funny
+ # characters that probably mean they are editor junk.
+ if re.match(r"^[^.#~!$@%^&*()+=,]+\.py$", filename):
yield os.path.join(dirpath, filename)