diff options
author | Jason R. Coombs <jaraco@jaraco.com> | 2020-09-04 10:22:54 -0400 |
---|---|---|
committer | Jason R. Coombs <jaraco@jaraco.com> | 2020-09-04 10:22:54 -0400 |
commit | fc4d9828768ceebd2f9337481450c88376c013e9 (patch) | |
tree | 6dff622c50739ca19fdfbecf1008a42589d37b57 /setuptools/_distutils/filelist.py | |
parent | 7bb73a477de24069002516eb6eb1d755bed9d65b (diff) | |
parent | 03d36b9edb53e266a0b4b836e1e3178f989a0781 (diff) | |
download | python-setuptools-git-feature/implicit-bootstrap.tar.gz |
Merge branch 'master' into feature/implicit-bootstrapfeature/implicit-bootstrap
Diffstat (limited to 'setuptools/_distutils/filelist.py')
-rw-r--r-- | setuptools/_distutils/filelist.py | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/setuptools/_distutils/filelist.py b/setuptools/_distutils/filelist.py new file mode 100644 index 00000000..c92d5fdb --- /dev/null +++ b/setuptools/_distutils/filelist.py @@ -0,0 +1,327 @@ +"""distutils.filelist + +Provides the FileList class, used for poking about the filesystem +and building lists of files. +""" + +import os, re +import fnmatch +import functools +from distutils.util import convert_path +from distutils.errors import DistutilsTemplateError, DistutilsInternalError +from distutils import log + +class FileList: + """A list of files built by on exploring the filesystem and filtered by + applying various patterns to what we find there. + + Instance attributes: + dir + directory from which files will be taken -- only used if + 'allfiles' not supplied to constructor + files + list of filenames currently being built/filtered/manipulated + allfiles + complete list of files under consideration (ie. without any + filtering applied) + """ + + def __init__(self, warn=None, debug_print=None): + # ignore argument to FileList, but keep them for backwards + # compatibility + self.allfiles = None + self.files = [] + + def set_allfiles(self, allfiles): + self.allfiles = allfiles + + def findall(self, dir=os.curdir): + self.allfiles = findall(dir) + + def debug_print(self, msg): + """Print 'msg' to stdout if the global DEBUG (taken from the + DISTUTILS_DEBUG environment variable) flag is true. + """ + from distutils.debug import DEBUG + if DEBUG: + print(msg) + + # -- List-like methods --------------------------------------------- + + def append(self, item): + self.files.append(item) + + def extend(self, items): + self.files.extend(items) + + def sort(self): + # Not a strict lexical sort! + sortable_files = sorted(map(os.path.split, self.files)) + self.files = [] + for sort_tuple in sortable_files: + self.files.append(os.path.join(*sort_tuple)) + + + # -- Other miscellaneous utility methods --------------------------- + + def remove_duplicates(self): + # Assumes list has been sorted! + for i in range(len(self.files) - 1, 0, -1): + if self.files[i] == self.files[i - 1]: + del self.files[i] + + + # -- "File template" methods --------------------------------------- + + def _parse_template_line(self, line): + words = line.split() + action = words[0] + + patterns = dir = dir_pattern = None + + if action in ('include', 'exclude', + 'global-include', 'global-exclude'): + if len(words) < 2: + raise DistutilsTemplateError( + "'%s' expects <pattern1> <pattern2> ..." % action) + patterns = [convert_path(w) for w in words[1:]] + elif action in ('recursive-include', 'recursive-exclude'): + if len(words) < 3: + raise DistutilsTemplateError( + "'%s' expects <dir> <pattern1> <pattern2> ..." % action) + dir = convert_path(words[1]) + patterns = [convert_path(w) for w in words[2:]] + elif action in ('graft', 'prune'): + if len(words) != 2: + raise DistutilsTemplateError( + "'%s' expects a single <dir_pattern>" % action) + dir_pattern = convert_path(words[1]) + else: + raise DistutilsTemplateError("unknown action '%s'" % action) + + return (action, patterns, dir, dir_pattern) + + def process_template_line(self, line): + # Parse the line: split it up, make sure the right number of words + # is there, and return the relevant words. 'action' is always + # defined: it's the first word of the line. Which of the other + # three are defined depends on the action; it'll be either + # patterns, (dir and patterns), or (dir_pattern). + (action, patterns, dir, dir_pattern) = self._parse_template_line(line) + + # OK, now we know that the action is valid and we have the + # right number of words on the line for that action -- so we + # can proceed with minimal error-checking. + if action == 'include': + self.debug_print("include " + ' '.join(patterns)) + for pattern in patterns: + if not self.include_pattern(pattern, anchor=1): + log.warn("warning: no files found matching '%s'", + pattern) + + elif action == 'exclude': + self.debug_print("exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.exclude_pattern(pattern, anchor=1): + log.warn(("warning: no previously-included files " + "found matching '%s'"), pattern) + + elif action == 'global-include': + self.debug_print("global-include " + ' '.join(patterns)) + for pattern in patterns: + if not self.include_pattern(pattern, anchor=0): + log.warn(("warning: no files found matching '%s' " + "anywhere in distribution"), pattern) + + elif action == 'global-exclude': + self.debug_print("global-exclude " + ' '.join(patterns)) + for pattern in patterns: + if not self.exclude_pattern(pattern, anchor=0): + log.warn(("warning: no previously-included files matching " + "'%s' found anywhere in distribution"), + pattern) + + elif action == 'recursive-include': + self.debug_print("recursive-include %s %s" % + (dir, ' '.join(patterns))) + for pattern in patterns: + if not self.include_pattern(pattern, prefix=dir): + log.warn(("warning: no files found matching '%s' " + "under directory '%s'"), + pattern, dir) + + elif action == 'recursive-exclude': + self.debug_print("recursive-exclude %s %s" % + (dir, ' '.join(patterns))) + for pattern in patterns: + if not self.exclude_pattern(pattern, prefix=dir): + log.warn(("warning: no previously-included files matching " + "'%s' found under directory '%s'"), + pattern, dir) + + elif action == 'graft': + self.debug_print("graft " + dir_pattern) + if not self.include_pattern(None, prefix=dir_pattern): + log.warn("warning: no directories found matching '%s'", + dir_pattern) + + elif action == 'prune': + self.debug_print("prune " + dir_pattern) + if not self.exclude_pattern(None, prefix=dir_pattern): + log.warn(("no previously-included directories found " + "matching '%s'"), dir_pattern) + else: + raise DistutilsInternalError( + "this cannot happen: invalid action '%s'" % action) + + + # -- Filtering/selection methods ----------------------------------- + + def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): + """Select strings (presumably filenames) from 'self.files' that + match 'pattern', a Unix-style wildcard (glob) pattern. Patterns + are not quite the same as implemented by the 'fnmatch' module: '*' + and '?' match non-special characters, where "special" is platform- + dependent: slash on Unix; colon, slash, and backslash on + DOS/Windows; and colon on Mac OS. + + If 'anchor' is true (the default), then the pattern match is more + stringent: "*.py" will match "foo.py" but not "foo/bar.py". If + 'anchor' is false, both of these will match. + + If 'prefix' is supplied, then only filenames starting with 'prefix' + (itself a pattern) and ending with 'pattern', with anything in between + them, will match. 'anchor' is ignored in this case. + + If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and + 'pattern' is assumed to be either a string containing a regex or a + regex object -- no translation is done, the regex is just compiled + and used as-is. + + Selected strings will be added to self.files. + + Return True if files are found, False otherwise. + """ + # XXX docstring lying about what the special chars are? + files_found = False + pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) + self.debug_print("include_pattern: applying regex r'%s'" % + pattern_re.pattern) + + # delayed loading of allfiles list + if self.allfiles is None: + self.findall() + + for name in self.allfiles: + if pattern_re.search(name): + self.debug_print(" adding " + name) + self.files.append(name) + files_found = True + return files_found + + + def exclude_pattern (self, pattern, + anchor=1, prefix=None, is_regex=0): + """Remove strings (presumably filenames) from 'files' that match + 'pattern'. Other parameters are the same as for + 'include_pattern()', above. + The list 'self.files' is modified in place. + Return True if files are found, False otherwise. + """ + files_found = False + pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) + self.debug_print("exclude_pattern: applying regex r'%s'" % + pattern_re.pattern) + for i in range(len(self.files)-1, -1, -1): + if pattern_re.search(self.files[i]): + self.debug_print(" removing " + self.files[i]) + del self.files[i] + files_found = True + return files_found + + +# ---------------------------------------------------------------------- +# Utility functions + +def _find_all_simple(path): + """ + Find all files under 'path' + """ + results = ( + os.path.join(base, file) + for base, dirs, files in os.walk(path, followlinks=True) + for file in files + ) + return filter(os.path.isfile, results) + + +def findall(dir=os.curdir): + """ + Find all files under 'dir' and return the list of full filenames. + Unless dir is '.', return full filenames with dir prepended. + """ + files = _find_all_simple(dir) + if dir == os.curdir: + make_rel = functools.partial(os.path.relpath, start=dir) + files = map(make_rel, files) + return list(files) + + +def glob_to_re(pattern): + """Translate a shell-like glob pattern to a regular expression; return + a string containing the regex. Differs from 'fnmatch.translate()' in + that '*' does not match "special characters" (which are + platform-specific). + """ + pattern_re = fnmatch.translate(pattern) + + # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which + # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, + # and by extension they shouldn't match such "special characters" under + # any OS. So change all non-escaped dots in the RE to match any + # character except the special characters (currently: just os.sep). + sep = os.sep + if os.sep == '\\': + # we're using a regex to manipulate a regex, so we need + # to escape the backslash twice + sep = r'\\\\' + escaped = r'\1[^%s]' % sep + pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) + return pattern_re + + +def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0): + """Translate a shell-like wildcard pattern to a compiled regular + expression. Return the compiled regex. If 'is_regex' true, + then 'pattern' is directly compiled to a regex (if it's a string) + or just returned as-is (assumes it's a regex object). + """ + if is_regex: + if isinstance(pattern, str): + return re.compile(pattern) + else: + return pattern + + # ditch start and end characters + start, _, end = glob_to_re('_').partition('_') + + if pattern: + pattern_re = glob_to_re(pattern) + assert pattern_re.startswith(start) and pattern_re.endswith(end) + else: + pattern_re = '' + + if prefix is not None: + prefix_re = glob_to_re(prefix) + assert prefix_re.startswith(start) and prefix_re.endswith(end) + prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] + sep = os.sep + if os.sep == '\\': + sep = r'\\' + pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] + pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end) + else: # no prefix -- respect anchor flag + if anchor: + pattern_re = r'%s\A%s' % (start, pattern_re[len(start):]) + + return re.compile(pattern_re) |