diff options
author | Tim Peters <tim.peters@gmail.com> | 2003-04-25 07:11:48 +0000 |
---|---|---|
committer | Tim Peters <tim.peters@gmail.com> | 2003-04-25 07:11:48 +0000 |
commit | c4e09400422487857a665a5c69a4e2d07a909aed (patch) | |
tree | 3b31be0701fea9d0131c84929e53eb1ae0713b49 | |
parent | e7adda903500e3467f8d892c6ad46a73a19164b4 (diff) | |
download | cpython-git-c4e09400422487857a665a5c69a4e2d07a909aed.tar.gz |
New generator os.walk() does a bit more than os.path.walk() does, and
seems much easier to use. Code, docs, NEWS, and additions to test_os.py
(testing this sucker is a bitch!).
-rw-r--r-- | Doc/lib/libos.tex | 65 | ||||
-rw-r--r-- | Doc/lib/libposixpath.tex | 6 | ||||
-rw-r--r-- | Lib/os.py | 81 | ||||
-rw-r--r-- | Lib/test/test_os.py | 88 | ||||
-rw-r--r-- | Misc/NEWS | 7 |
5 files changed, 242 insertions, 5 deletions
diff --git a/Doc/lib/libos.tex b/Doc/lib/libos.tex index 9714036055..e0a43eb274 100644 --- a/Doc/lib/libos.tex +++ b/Doc/lib/libos.tex @@ -1050,6 +1050,71 @@ which is used to set the access and modified times, respectively. Availability: Macintosh, \UNIX, Windows. \end{funcdesc} +\begin{funcdesc}{walk}{top\optional{, topdown=True}} +\index{directory!walking} +\index{directory!traversal} + +\function{walk()} generates the file names in a directory tree. +For each directory in the tree rooted at directory \var{top} (including +\var{top} itself), it yields a 3-tuple +\code{(\var{dirpath}, \var{dirnames}, \var{filenames})}. + +\var{dirpath} is a string, the path to the directory. \var{dirnames} is +a list of the names of the subdirectories in \var{dirpath} +(excluding \code{'.'} and \code{'..'}). \var{filenames} is a list of +the names of the non-directory files in \var{dirpath}. Note that the +names in the lists contain no path components. To get a full +path (which begins with \var{top)) to a file or directory in +\var{dirpath}, do \code{os.path.join(\var{dirpath}, \var{name})}. + +If optional argument \var{topdown} is true or not specified, the triple +for a directory is generated before the triples for any of its +subdirectories (directories are generated top down). If \var{topdown} is +false, the triple for a directory is generated after the triples for all +of its subdirectories (directories are generated bottom up). + +When \var{topdown} is true, the caller can modify the \var{dirnames} list +in-place (e.g., via \keyword{del} or slice assignment), and +\function{walk()} will only recurse into the subdirectories whose names +remain in \var{dirnames}; this can be used to prune the search, +impose a specific order of visiting, or even to inform \function{walk()} +about directories the caller creates or renames before it resumes +\function{walk()} again. Modifying \var{dirnames} when \var{topdown} is +false is ineffective, because in bottom-up mode the directories in +\var{dirnames} are generated before \var{dirnames} itself is generated. + +\begin{notice} +If you pass a relative pathname, don't change the current working +directory between resumptions of \function{walk}. \function{walk} +never changes the current directory, and assumes that its caller +doesn't either. +\end{notice} + +\begin{notice} +On systems that support symbolic links, links to subdirectories appear +in \var{dirnames} lists, but \function{walk()} will not visit them +(infinite loops are hard to avoid when following symbolic links). +To visit linked directories, you can identify them with +\code{os.path.islink(\var{path})}, and invoke \function{walk(\var{path})} +on each directly. +\end{notice} + +This example displays the number of bytes taken by non-directory files +in each directory under the starting directory, except that it doesn't +look under any CVS subdirectory: + +\begin{verbatim} +import os +from os.path import join, getsize +for root, dirs, files in os.walk('python/Lib/email'): + print root, "consumes", + print sum([getsize(join(root, name)) for name in files]), + print "bytes in", len(files), "non-directory files" + if 'CVS' in dirs: + dirs.remove('CVS') # don't visit CVS directories +\end{verbatim} +\versionadded{2.3} +\end{funcdesc} \subsection{Process Management \label{os-process}} diff --git a/Doc/lib/libposixpath.tex b/Doc/lib/libposixpath.tex index 2b26954b4a..93a280938a 100644 --- a/Doc/lib/libposixpath.tex +++ b/Doc/lib/libposixpath.tex @@ -237,6 +237,12 @@ directories you must identify them with \code{os.path.isdir(\var{file})}, and invoke \function{walk()} as necessary. \end{notice} + +\begin{seealso} + \seemodule{os}{The newer \function{os.walk()} generator supplies similar + functionality and can be easier to use. +\end{seealso} + \end{funcdesc} \begin{datadesc}{supports_unicode_filenames} @@ -26,6 +26,7 @@ import sys _names = sys.builtin_module_names +# Note: more names are added to __all__ later. __all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep", "defpath", "name", "path"] @@ -158,7 +159,7 @@ def removedirs(name): Super-rmdir; remove a leaf directory and empty all intermediate ones. Works like rmdir except that, if the leaf directory is successfully removed, directories corresponding to rightmost path - segments will be pruned way until either the whole path is + segments will be pruned away until either the whole path is consumed or an error occurs. Errors during this latter phase are ignored -- they generally mean that a directory was not empty. @@ -202,6 +203,84 @@ def renames(old, new): __all__.extend(["makedirs", "removedirs", "renames"]) +def walk(top, topdown=True): + """Directory tree generator. + + For each directory in the directory tree rooted at top (including top + itself, but excluding '.' and '..'), yields a 3-tuple + + dirpath, dirnames, filenames + + dirpath is a string, the path to the directory. dirnames is a list of + the names of the subdirectories in dirpath (excluding '.' and '..'). + filenames is a list of the names of the non-directory files in dirpath. + Note that the names in the lists are just names, with no path components. + To get a full path (which begins with top) to a file or directory in + dirpath, do os.path.join(dirpath, name). + + If optional arg 'topdown' is true or not specified, the triple for a + directory is generated before the triples for any of its subdirectories + (directories are generated top down). If topdown is false, the triple + for a directory is generated after the triples for all of its + subdirectories (directories are generated bottom up). + + When topdown is true, the caller can modify the dirnames list in-place + (e.g., via del or slice assignment), and walk will only recurse into the + subdirectories whose names remain in dirnames; this can be used to prune + the search, or to impose a specific order of visiting. Modifying + dirnames when topdown is false is ineffective, since the directories in + dirnames have already been generated by the time dirnames itself is + generated. + + Caution: if you pass a relative pathname for top, don't change the + current working directory between resumptions of walk. walk never + changes the current directory, and assumes that the client doesn't + either. + + Example: + + from os.path import join, getsize + for root, dirs, files in walk('python/Lib/email'): + print root, "consumes", + print sum([getsize(join(root, name)) for name in files]), + print "bytes in", len(files), "non-directory files" + if 'CVS' in dirs: + dirs.remove('CVS') # don't visit CVS directories + """ + + from os.path import join, isdir, islink + + # We may not have read permission for top, in which case we can't + # get a list of the files the directory contains. os.path.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + # Note that listdir and error are globals in this module due + # to earlier import-*. + names = listdir(top) + except error: + return + + dirs, nondirs = [], [] + for name in names: + if isdir(join(top, name)): + dirs.append(name) + else: + nondirs.append(name) + + if topdown: + yield top, dirs, nondirs + for name in dirs: + path = join(top, name) + if not islink(path): + for x in walk(path, topdown): + yield x + if not topdown: + yield top, dirs, nondirs + +__all__.append("walk") + # Make sure os.environ exists, at least try: environ diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 2956d73bce..cf67ef83ac 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -202,11 +202,93 @@ class EnvironTests(TestMappingProtocol): os.environ.clear() os.environ.update(self.__save) +class WalkTests(unittest.TestCase): + """Tests for os.walk().""" + + def test_traversal(self): + import os + from os.path import join + + # Build: + # TESTFN/ a file kid and two directory kids + # tmp1 + # SUB1/ a file kid and a directory kid + # tmp2 + # SUB11/ no kids + # SUB2/ just a file kid + # tmp3 + sub1_path = join(TESTFN, "SUB1") + sub11_path = join(sub1_path, "SUB11") + sub2_path = join(TESTFN, "SUB2") + tmp1_path = join(TESTFN, "tmp1") + tmp2_path = join(sub1_path, "tmp2") + tmp3_path = join(sub2_path, "tmp3") + + # Create stuff. + os.makedirs(sub11_path) + os.makedirs(sub2_path) + for path in tmp1_path, tmp2_path, tmp3_path: + f = file(path, "w") + f.write("I'm " + path + " and proud of it. Blame test_os.\n") + f.close() + + # Walk top-down. + all = list(os.walk(TESTFN)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: TESTFN, SUB1, SUB11, SUB2 + # flipped: TESTFN, SUB2, SUB1, SUB11 + flipped = all[0][1][0] != "SUB1" + all[0][1].sort() + self.assertEqual(all[0], (TESTFN, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 + flipped], (sub11_path, [], [])) + self.assertEqual(all[3 - 2 * flipped], (sub2_path, [], ["tmp3"])) + + # Prune the search. + all = [] + for root, dirs, files in os.walk(TESTFN): + all.append((root, dirs, files)) + # Don't descend into SUB1. + if 'SUB1' in dirs: + # Note that this also mutates the dirs we appended to all! + dirs.remove('SUB1') + self.assertEqual(len(all), 2) + self.assertEqual(all[0], (TESTFN, ["SUB2"], ["tmp1"])) + self.assertEqual(all[1], (sub2_path, [], ["tmp3"])) + + # Walk bottom-up. + all = list(os.walk(TESTFN, topdown=False)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: SUB11, SUB1, SUB2, TESTFN + # flipped: SUB2, SUB11, SUB1, TESTFN + flipped = all[3][1][0] != "SUB1" + all[3][1].sort() + self.assertEqual(all[3], (TESTFN, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[flipped], (sub11_path, [], [])) + self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 - 2 * flipped], (sub2_path, [], ["tmp3"])) + + # Tear everything down. This is a decent use for bottom-up on + # Windows, which doesn't have a recursive delete command. The + # (not so) subtlety is that rmdir will fail unless the dir's + # kids are removed first, so bottom up is essential. + for root, dirs, files in os.walk(TESTFN, topdown=False): + for name in files: + os.remove(join(root, name)) + for name in dirs: + os.rmdir(join(root, name)) + os.rmdir(TESTFN) + def test_main(): suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(TemporaryFileTests)) - suite.addTest(unittest.makeSuite(StatAttributeTests)) - suite.addTest(unittest.makeSuite(EnvironTests)) + for cls in (TemporaryFileTests, + StatAttributeTests, + EnvironTests, + WalkTests, + ): + suite.addTest(unittest.makeSuite(cls)) run_suite(suite) if __name__ == "__main__": @@ -127,7 +127,7 @@ Extension modules Subsumed the times() function into repeat(). Added chain() and cycle(). -- The rotor module is now deprecated; the encryption algorithm it uses +- The rotor module is now deprecated; the encryption algorithm it uses is not believed to be secure, and including crypto code with Python has implications for exporting and importing it in various countries. @@ -139,6 +139,11 @@ Extension modules Library ------- +- New generator function os.walk() is an easy-to-use alternative to + os.path.walk(). See os module docs for details. os.path.walk() + isn't deprecated at this time, but may become deprecated in a + future release. + - Added new module "platform" which provides a wide range of tools for querying platform dependent features. |