diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2011-02-23 11:42:22 +0000 |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2011-02-23 11:42:22 +0000 |
commit | dd071045e776e1c3e8cf6750a2fd1d0958bf19b3 (patch) | |
tree | 3afb00727522ffb897602ec1ae5d2a9ccfd3dce4 /Lib/test/test_tarfile.py | |
parent | 3eeee833915b96a15c60eafc317bb6822af2084c (diff) | |
download | cpython-git-dd071045e776e1c3e8cf6750a2fd1d0958bf19b3.tar.gz |
Issue #11224: Improved sparse file read support (r85916) introduced a
regression in _FileInFile which is used in file-like objects returned
by TarFile.extractfile(). The inefficient design of the
_FileInFile.read() method causes various dramatic side-effects and
errors:
- The data segment of a file member is read completely into memory
every(!) time a small block is accessed. This is not only slow
but may cause unexpected MemoryErrors with very large files.
- Reading members from compressed tar archives is even slower
because of the excessive backwards seeking which is done when the
same data segment is read over and over again.
- As a backwards seek on a TarFile opened in stream mode is not
possible, using extractfile() fails with a StreamError.
Diffstat (limited to 'Lib/test/test_tarfile.py')
-rw-r--r-- | Lib/test/test_tarfile.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 94ef61c0ce..68e094d5db 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -419,6 +419,22 @@ class StreamReadTest(CommonReadTest): mode="r|" + def test_read_through(self): + # Issue #11224: A poorly designed _FileInFile.read() method + # caused seeking errors with stream tar files. + for tarinfo in self.tar: + if not tarinfo.isreg(): + continue + fobj = self.tar.extractfile(tarinfo) + while True: + try: + buf = fobj.read(512) + except tarfile.StreamError: + self.fail("simple read-through using TarFile.extractfile() failed") + if not buf: + break + fobj.close() + def test_fileobj_regular_file(self): tarinfo = self.tar.next() # get "regtype" (can't use getmember) fobj = self.tar.extractfile(tarinfo) |