summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libos.tex65
-rw-r--r--Doc/lib/libposixpath.tex6
-rw-r--r--Lib/os.py81
-rw-r--r--Lib/test/test_os.py88
-rw-r--r--Misc/NEWS7
5 files changed, 242 insertions, 5 deletions
diff --git a/Doc/lib/libos.tex b/Doc/lib/libos.tex
index 9714036055..e0a43eb274 100644
--- a/Doc/lib/libos.tex
+++ b/Doc/lib/libos.tex
@@ -1050,6 +1050,71 @@ which is used to set the access and modified times, respectively.
Availability: Macintosh, \UNIX, Windows.
\end{funcdesc}
+\begin{funcdesc}{walk}{top\optional{, topdown=True}}
+\index{directory!walking}
+\index{directory!traversal}
+
+\function{walk()} generates the file names in a directory tree.
+For each directory in the tree rooted at directory \var{top} (including
+\var{top} itself), it yields a 3-tuple
+\code{(\var{dirpath}, \var{dirnames}, \var{filenames})}.
+
+\var{dirpath} is a string, the path to the directory. \var{dirnames} is
+a list of the names of the subdirectories in \var{dirpath}
+(excluding \code{'.'} and \code{'..'}). \var{filenames} is a list of
+the names of the non-directory files in \var{dirpath}. Note that the
+names in the lists contain no path components. To get a full
+path (which begins with \var{top)) to a file or directory in
+\var{dirpath}, do \code{os.path.join(\var{dirpath}, \var{name})}.
+
+If optional argument \var{topdown} is true or not specified, the triple
+for a directory is generated before the triples for any of its
+subdirectories (directories are generated top down). If \var{topdown} is
+false, the triple for a directory is generated after the triples for all
+of its subdirectories (directories are generated bottom up).
+
+When \var{topdown} is true, the caller can modify the \var{dirnames} list
+in-place (e.g., via \keyword{del} or slice assignment), and
+\function{walk()} will only recurse into the subdirectories whose names
+remain in \var{dirnames}; this can be used to prune the search,
+impose a specific order of visiting, or even to inform \function{walk()}
+about directories the caller creates or renames before it resumes
+\function{walk()} again. Modifying \var{dirnames} when \var{topdown} is
+false is ineffective, because in bottom-up mode the directories in
+\var{dirnames} are generated before \var{dirnames} itself is generated.
+
+\begin{notice}
+If you pass a relative pathname, don't change the current working
+directory between resumptions of \function{walk}. \function{walk}
+never changes the current directory, and assumes that its caller
+doesn't either.
+\end{notice}
+
+\begin{notice}
+On systems that support symbolic links, links to subdirectories appear
+in \var{dirnames} lists, but \function{walk()} will not visit them
+(infinite loops are hard to avoid when following symbolic links).
+To visit linked directories, you can identify them with
+\code{os.path.islink(\var{path})}, and invoke \function{walk(\var{path})}
+on each directly.
+\end{notice}
+
+This example displays the number of bytes taken by non-directory files
+in each directory under the starting directory, except that it doesn't
+look under any CVS subdirectory:
+
+\begin{verbatim}
+import os
+from os.path import join, getsize
+for root, dirs, files in os.walk('python/Lib/email'):
+ print root, "consumes",
+ print sum([getsize(join(root, name)) for name in files]),
+ print "bytes in", len(files), "non-directory files"
+ if 'CVS' in dirs:
+ dirs.remove('CVS') # don't visit CVS directories
+\end{verbatim}
+\versionadded{2.3}
+\end{funcdesc}
\subsection{Process Management \label{os-process}}
diff --git a/Doc/lib/libposixpath.tex b/Doc/lib/libposixpath.tex
index 2b26954b4a..93a280938a 100644
--- a/Doc/lib/libposixpath.tex
+++ b/Doc/lib/libposixpath.tex
@@ -237,6 +237,12 @@ directories you must identify them with
\code{os.path.isdir(\var{file})}, and invoke \function{walk()} as
necessary.
\end{notice}
+
+\begin{seealso}
+ \seemodule{os}{The newer \function{os.walk()} generator supplies similar
+ functionality and can be easier to use.
+\end{seealso}
+
\end{funcdesc}
\begin{datadesc}{supports_unicode_filenames}
diff --git a/Lib/os.py b/Lib/os.py
index 358c8c6269..69d1a44c85 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -26,6 +26,7 @@ import sys
_names = sys.builtin_module_names
+# Note: more names are added to __all__ later.
__all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
"defpath", "name", "path"]
@@ -158,7 +159,7 @@ def removedirs(name):
Super-rmdir; remove a leaf directory and empty all intermediate
ones. Works like rmdir except that, if the leaf directory is
successfully removed, directories corresponding to rightmost path
- segments will be pruned way until either the whole path is
+ segments will be pruned away until either the whole path is
consumed or an error occurs. Errors during this latter phase are
ignored -- they generally mean that a directory was not empty.
@@ -202,6 +203,84 @@ def renames(old, new):
__all__.extend(["makedirs", "removedirs", "renames"])
+def walk(top, topdown=True):
+ """Directory tree generator.
+
+ For each directory in the directory tree rooted at top (including top
+ itself, but excluding '.' and '..'), yields a 3-tuple
+
+ dirpath, dirnames, filenames
+
+ dirpath is a string, the path to the directory. dirnames is a list of
+ the names of the subdirectories in dirpath (excluding '.' and '..').
+ filenames is a list of the names of the non-directory files in dirpath.
+ Note that the names in the lists are just names, with no path components.
+ To get a full path (which begins with top) to a file or directory in
+ dirpath, do os.path.join(dirpath, name).
+
+ If optional arg 'topdown' is true or not specified, the triple for a
+ directory is generated before the triples for any of its subdirectories
+ (directories are generated top down). If topdown is false, the triple
+ for a directory is generated after the triples for all of its
+ subdirectories (directories are generated bottom up).
+
+ When topdown is true, the caller can modify the dirnames list in-place
+ (e.g., via del or slice assignment), and walk will only recurse into the
+ subdirectories whose names remain in dirnames; this can be used to prune
+ the search, or to impose a specific order of visiting. Modifying
+ dirnames when topdown is false is ineffective, since the directories in
+ dirnames have already been generated by the time dirnames itself is
+ generated.
+
+ Caution: if you pass a relative pathname for top, don't change the
+ current working directory between resumptions of walk. walk never
+ changes the current directory, and assumes that the client doesn't
+ either.
+
+ Example:
+
+ from os.path import join, getsize
+ for root, dirs, files in walk('python/Lib/email'):
+ print root, "consumes",
+ print sum([getsize(join(root, name)) for name in files]),
+ print "bytes in", len(files), "non-directory files"
+ if 'CVS' in dirs:
+ dirs.remove('CVS') # don't visit CVS directories
+ """
+
+ from os.path import join, isdir, islink
+
+ # We may not have read permission for top, in which case we can't
+ # get a list of the files the directory contains. os.path.walk
+ # always suppressed the exception then, rather than blow up for a
+ # minor reason when (say) a thousand readable directories are still
+ # left to visit. That logic is copied here.
+ try:
+ # Note that listdir and error are globals in this module due
+ # to earlier import-*.
+ names = listdir(top)
+ except error:
+ return
+
+ dirs, nondirs = [], []
+ for name in names:
+ if isdir(join(top, name)):
+ dirs.append(name)
+ else:
+ nondirs.append(name)
+
+ if topdown:
+ yield top, dirs, nondirs
+ for name in dirs:
+ path = join(top, name)
+ if not islink(path):
+ for x in walk(path, topdown):
+ yield x
+ if not topdown:
+ yield top, dirs, nondirs
+
+__all__.append("walk")
+
# Make sure os.environ exists, at least
try:
environ
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 2956d73bce..cf67ef83ac 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -202,11 +202,93 @@ class EnvironTests(TestMappingProtocol):
os.environ.clear()
os.environ.update(self.__save)
+class WalkTests(unittest.TestCase):
+ """Tests for os.walk()."""
+
+ def test_traversal(self):
+ import os
+ from os.path import join
+
+ # Build:
+ # TESTFN/ a file kid and two directory kids
+ # tmp1
+ # SUB1/ a file kid and a directory kid
+ # tmp2
+ # SUB11/ no kids
+ # SUB2/ just a file kid
+ # tmp3
+ sub1_path = join(TESTFN, "SUB1")
+ sub11_path = join(sub1_path, "SUB11")
+ sub2_path = join(TESTFN, "SUB2")
+ tmp1_path = join(TESTFN, "tmp1")
+ tmp2_path = join(sub1_path, "tmp2")
+ tmp3_path = join(sub2_path, "tmp3")
+
+ # Create stuff.
+ os.makedirs(sub11_path)
+ os.makedirs(sub2_path)
+ for path in tmp1_path, tmp2_path, tmp3_path:
+ f = file(path, "w")
+ f.write("I'm " + path + " and proud of it. Blame test_os.\n")
+ f.close()
+
+ # Walk top-down.
+ all = list(os.walk(TESTFN))
+ self.assertEqual(len(all), 4)
+ # We can't know which order SUB1 and SUB2 will appear in.
+ # Not flipped: TESTFN, SUB1, SUB11, SUB2
+ # flipped: TESTFN, SUB2, SUB1, SUB11
+ flipped = all[0][1][0] != "SUB1"
+ all[0][1].sort()
+ self.assertEqual(all[0], (TESTFN, ["SUB1", "SUB2"], ["tmp1"]))
+ self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"]))
+ self.assertEqual(all[2 + flipped], (sub11_path, [], []))
+ self.assertEqual(all[3 - 2 * flipped], (sub2_path, [], ["tmp3"]))
+
+ # Prune the search.
+ all = []
+ for root, dirs, files in os.walk(TESTFN):
+ all.append((root, dirs, files))
+ # Don't descend into SUB1.
+ if 'SUB1' in dirs:
+ # Note that this also mutates the dirs we appended to all!
+ dirs.remove('SUB1')
+ self.assertEqual(len(all), 2)
+ self.assertEqual(all[0], (TESTFN, ["SUB2"], ["tmp1"]))
+ self.assertEqual(all[1], (sub2_path, [], ["tmp3"]))
+
+ # Walk bottom-up.
+ all = list(os.walk(TESTFN, topdown=False))
+ self.assertEqual(len(all), 4)
+ # We can't know which order SUB1 and SUB2 will appear in.
+ # Not flipped: SUB11, SUB1, SUB2, TESTFN
+ # flipped: SUB2, SUB11, SUB1, TESTFN
+ flipped = all[3][1][0] != "SUB1"
+ all[3][1].sort()
+ self.assertEqual(all[3], (TESTFN, ["SUB1", "SUB2"], ["tmp1"]))
+ self.assertEqual(all[flipped], (sub11_path, [], []))
+ self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"]))
+ self.assertEqual(all[2 - 2 * flipped], (sub2_path, [], ["tmp3"]))
+
+ # Tear everything down. This is a decent use for bottom-up on
+ # Windows, which doesn't have a recursive delete command. The
+ # (not so) subtlety is that rmdir will fail unless the dir's
+ # kids are removed first, so bottom up is essential.
+ for root, dirs, files in os.walk(TESTFN, topdown=False):
+ for name in files:
+ os.remove(join(root, name))
+ for name in dirs:
+ os.rmdir(join(root, name))
+ os.rmdir(TESTFN)
+
def test_main():
suite = unittest.TestSuite()
- suite.addTest(unittest.makeSuite(TemporaryFileTests))
- suite.addTest(unittest.makeSuite(StatAttributeTests))
- suite.addTest(unittest.makeSuite(EnvironTests))
+ for cls in (TemporaryFileTests,
+ StatAttributeTests,
+ EnvironTests,
+ WalkTests,
+ ):
+ suite.addTest(unittest.makeSuite(cls))
run_suite(suite)
if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
index f3fef11753..a6eb469a70 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -127,7 +127,7 @@ Extension modules
Subsumed the times() function into repeat().
Added chain() and cycle().
-- The rotor module is now deprecated; the encryption algorithm it uses
+- The rotor module is now deprecated; the encryption algorithm it uses
is not believed to be secure, and including crypto code with Python
has implications for exporting and importing it in various countries.
@@ -139,6 +139,11 @@ Extension modules
Library
-------
+- New generator function os.walk() is an easy-to-use alternative to
+ os.path.walk(). See os module docs for details. os.path.walk()
+ isn't deprecated at this time, but may become deprecated in a
+ future release.
+
- Added new module "platform" which provides a wide range of tools
for querying platform dependent features.