diff options
Diffstat (limited to 'coverage/files.py')
-rw-r--r-- | coverage/files.py | 193 |
1 files changed, 177 insertions, 16 deletions
diff --git a/coverage/files.py b/coverage/files.py index 9a8ac564..40af7bf7 100644 --- a/coverage/files.py +++ b/coverage/files.py @@ -1,22 +1,20 @@ """File wrangling.""" -import fnmatch, os, sys +from coverage.backward import to_string +from coverage.misc import CoverageException +import fnmatch, os, os.path, re, sys class FileLocator(object): """Understand how filenames work.""" def __init__(self): # The absolute path to our current directory. - self.relative_dir = self.abs_file(os.curdir) + os.sep + self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) # Cache of results of calling the canonical_filename() method, to # avoid duplicating work. self.canonical_filename_cache = {} - def abs_file(self, filename): - """Return the absolute normalized form of `filename`.""" - return os.path.normcase(os.path.abspath(os.path.realpath(filename))) - def relative_filename(self, filename): """Return the relative form of `filename`. @@ -24,8 +22,9 @@ class FileLocator(object): `FileLocator` was constructed. """ - if filename.startswith(self.relative_dir): - filename = filename.replace(self.relative_dir, "") + fnorm = os.path.normcase(filename) + if fnorm.startswith(self.relative_dir): + filename = filename[len(self.relative_dir):] return filename def canonical_filename(self, filename): @@ -47,7 +46,7 @@ class FileLocator(object): if os.path.exists(g): f = g break - cf = self.abs_file(f) + cf = abs_file(f) self.canonical_filename_cache[filename] = cf return self.canonical_filename_cache[filename] @@ -72,12 +71,76 @@ class FileLocator(object): data = zi.get_data(parts[1]) except IOError: continue - if sys.version_info >= (3, 0): - data = data.decode('utf8') # TODO: How to do this properly? - return data + return to_string(data) return None +if sys.platform == 'win32': + + def actual_path(path): + """Get the actual path of `path`, including the correct case.""" + if path in actual_path.cache: + return actual_path.cache[path] + + head, tail = os.path.split(path) + if not tail: + actpath = head + elif not head: + actpath = tail + else: + head = actual_path(head) + if head in actual_path.list_cache: + files = actual_path.list_cache[head] + else: + try: + files = os.listdir(head) + except OSError: + files = [] + actual_path.list_cache[head] = files + normtail = os.path.normcase(tail) + for f in files: + if os.path.normcase(f) == normtail: + tail = f + break + actpath = os.path.join(head, tail) + actual_path.cache[path] = actpath + return actpath + + actual_path.cache = {} + actual_path.list_cache = {} + +else: + def actual_path(filename): + """The actual path for non-Windows platforms.""" + return filename + +def abs_file(filename): + """Return the absolute normalized form of `filename`.""" + path = os.path.abspath(os.path.realpath(filename)) + path = actual_path(path) + return path + + +def prep_patterns(patterns): + """Prepare the file patterns for use in a `FnmatchMatcher`. + + If a pattern starts with a wildcard, it is used as a pattern + as-is. If it does not start with a wildcard, then it is made + absolute with the current directory. + + If `patterns` is None, an empty list is returned. + + """ + patterns = patterns or [] + prepped = [] + for p in patterns or []: + if p.startswith("*") or p.startswith("?"): + prepped.append(p) + else: + prepped.append(abs_file(p)) + return prepped + + class TreeMatcher(object): """A matcher for files in a tree.""" def __init__(self, directories): @@ -119,14 +182,112 @@ class FnmatchMatcher(object): return False +def sep(s): + """Find the path separator used in this string, or os.sep if none.""" + sep_match = re.search(r"[\\/]", s) + if sep_match: + the_sep = sep_match.group(0) + else: + the_sep = os.sep + return the_sep + + +class PathAliases(object): + """A collection of aliases for paths. + + When combining data files from remote machines, often the paths to source + code are different, for example, due to OS differences, or because of + serialized checkouts on continuous integration machines. + + A `PathAliases` object tracks a list of pattern/result pairs, and can + map a path through those aliases to produce a unified path. + + `locator` is a FileLocator that is used to canonicalize the results. + + """ + def __init__(self, locator=None): + self.aliases = [] + self.locator = locator + + def add(self, pattern, result): + """Add the `pattern`/`result` pair to the list of aliases. + + `pattern` is an `fnmatch`-style pattern. `result` is a simple + string. When mapping paths, if a path starts with a match against + `pattern`, then that match is replaced with `result`. This models + isomorphic source trees being rooted at different places on two + different machines. + + `pattern` can't end with a wildcard component, since that would + match an entire tree, and not just its root. + + """ + # The pattern can't end with a wildcard component. + pattern = pattern.rstrip(r"\/") + if pattern.endswith("*"): + raise CoverageException("Pattern must not end with wildcards.") + pattern_sep = sep(pattern) + pattern += pattern_sep + + # Make a regex from the pattern. fnmatch always adds a \Z or $ to + # match the whole string, which we don't want. + regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') + if regex_pat.endswith("$"): + regex_pat = regex_pat[:-1] + # We want */a/b.py to match on Windows to, so change slash to match + # either separator. + regex_pat = regex_pat.replace(r"\/", r"[\\/]") + # We want case-insensitive matching, so add that flag. + regex = re.compile(r"(?i)" + regex_pat) + + # Normalize the result: it must end with a path separator. + result_sep = sep(result) + result = result.rstrip(r"\/") + result_sep + self.aliases.append((regex, result, pattern_sep, result_sep)) + + def map(self, path): + """Map `path` through the aliases. + + `path` is checked against all of the patterns. The first pattern to + match is used to replace the root of the path with the result root. + Only one pattern is ever used. If no patterns match, `path` is + returned unchanged. + + The separator style in the result is made to match that of the result + in the alias. + + """ + for regex, result, pattern_sep, result_sep in self.aliases: + m = regex.match(path) + if m: + new = path.replace(m.group(0), result) + if pattern_sep != result_sep: + new = new.replace(pattern_sep, result_sep) + if self.locator: + new = self.locator.canonical_filename(new) + return new + return path + + def find_python_files(dirname): - """Yield all of the importable Python files in `dirname`, recursively.""" - for dirpath, dirnames, filenames in os.walk(dirname, topdown=True): - if '__init__.py' not in filenames: + """Yield all of the importable Python files in `dirname`, recursively. + + To be importable, the files have to be in a directory with a __init__.py, + except for `dirname` itself, which isn't required to have one. The + assumption is that `dirname` was specified directly, so the user knows + best, but subdirectories are checked for a __init__.py to be sure we only + find the importable files. + + """ + for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): + if i > 0 and '__init__.py' not in filenames: # If a directory doesn't have __init__.py, then it isn't # importable and neither are its files del dirnames[:] continue for filename in filenames: - if fnmatch.fnmatch(filename, "*.py"): + # We're only interested in files that look like reasonable Python + # files: Must end with .py, and must not have certain funny + # characters that probably mean they are editor junk. + if re.match(r"^[^.#~!$@%^&*()+=,]+\.py$", filename): yield os.path.join(dirpath, filename) |