summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Lib/tarfile.py87
-rw-r--r--Lib/test/test_tarfile.py85
-rw-r--r--Misc/NEWS3
3 files changed, 143 insertions, 32 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 29e12f0216..f48b477bb6 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -195,7 +195,7 @@ def nti(s):
try:
n = int(nts(s) or "0", 8)
except ValueError:
- raise HeaderError("invalid header")
+ raise InvalidHeaderError("invalid header")
else:
n = 0L
for i in xrange(len(s) - 1):
@@ -346,8 +346,23 @@ class StreamError(TarError):
"""Exception for unsupported operations on stream-like TarFiles."""
pass
class HeaderError(TarError):
+ """Base exception for header errors."""
+ pass
+class EmptyHeaderError(HeaderError):
+ """Exception for empty headers."""
+ pass
+class TruncatedHeaderError(HeaderError):
+ """Exception for truncated headers."""
+ pass
+class EOFHeaderError(HeaderError):
+ """Exception for end of file headers."""
+ pass
+class InvalidHeaderError(HeaderError):
"""Exception for invalid headers."""
pass
+class SubsequentHeaderError(HeaderError):
+ """Exception for missing and invalid extended headers."""
+ pass
#---------------------------
# internal stream interface
@@ -1179,14 +1194,16 @@ class TarInfo(object):
def frombuf(cls, buf):
"""Construct a TarInfo object from a 512 byte string buffer.
"""
+ if len(buf) == 0:
+ raise EmptyHeaderError("empty header")
if len(buf) != BLOCKSIZE:
- raise HeaderError("truncated header")
+ raise TruncatedHeaderError("truncated header")
if buf.count(NUL) == BLOCKSIZE:
- raise HeaderError("empty header")
+ raise EOFHeaderError("end of file header")
chksum = nti(buf[148:156])
if chksum not in calc_chksums(buf):
- raise HeaderError("bad checksum")
+ raise InvalidHeaderError("bad checksum")
obj = cls()
obj.buf = buf
@@ -1225,8 +1242,6 @@ class TarInfo(object):
tarfile.
"""
buf = tarfile.fileobj.read(BLOCKSIZE)
- if not buf:
- return
obj = cls.frombuf(buf)
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
return obj._proc_member(tarfile)
@@ -1279,9 +1294,10 @@ class TarInfo(object):
buf = tarfile.fileobj.read(self._block(self.size))
# Fetch the next header and process it.
- next = self.fromtarfile(tarfile)
- if next is None:
- raise HeaderError("missing subsequent header")
+ try:
+ next = self.fromtarfile(tarfile)
+ except HeaderError:
+ raise SubsequentHeaderError("missing or bad subsequent header")
# Patch the TarInfo object from the next header with
# the longname information.
@@ -1386,12 +1402,12 @@ class TarInfo(object):
pos += length
# Fetch the next header.
- next = self.fromtarfile(tarfile)
+ try:
+ next = self.fromtarfile(tarfile)
+ except HeaderError:
+ raise SubsequentHeaderError("missing or bad subsequent header")
if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
- if next is None:
- raise HeaderError("missing subsequent header")
-
# Patch the TarInfo object with the extended header info.
next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
next.offset = self.offset
@@ -1565,12 +1581,16 @@ class TarFile(object):
if self.mode == "a":
# Move to the end of the archive,
# before the first empty block.
- self.firstmember = None
while True:
- if self.next() is None:
- if self.offset > 0:
- self.fileobj.seek(- BLOCKSIZE, 1)
+ self.fileobj.seek(self.offset)
+ try:
+ tarinfo = self.tarinfo.fromtarfile(self)
+ self.members.append(tarinfo)
+ except EOFHeaderError:
+ self.fileobj.seek(self.offset)
break
+ except HeaderError, e:
+ raise ReadError(str(e))
if self.mode in "aw":
self._loaded = True
@@ -1735,7 +1755,7 @@ class TarFile(object):
try:
t = cls.taropen(name, mode, fileobj, **kwargs)
- except IOError:
+ except (IOError, EOFError):
raise ReadError("not a bzip2 file")
t._extfileobj = False
return t
@@ -2307,24 +2327,37 @@ class TarFile(object):
# Read the next block.
self.fileobj.seek(self.offset)
+ tarinfo = None
while True:
try:
tarinfo = self.tarinfo.fromtarfile(self)
- if tarinfo is None:
- return
- self.members.append(tarinfo)
-
- except HeaderError, e:
+ except EOFHeaderError, e:
if self.ignore_zeros:
self._dbg(2, "0x%X: %s" % (self.offset, e))
self.offset += BLOCKSIZE
continue
- else:
- if self.offset == 0:
- raise ReadError(str(e))
- return None
+ except InvalidHeaderError, e:
+ if self.ignore_zeros:
+ self._dbg(2, "0x%X: %s" % (self.offset, e))
+ self.offset += BLOCKSIZE
+ continue
+ elif self.offset == 0:
+ raise ReadError(str(e))
+ except EmptyHeaderError:
+ if self.offset == 0:
+ raise ReadError("empty file")
+ except TruncatedHeaderError, e:
+ if self.offset == 0:
+ raise ReadError(str(e))
+ except SubsequentHeaderError, e:
+ raise ReadError(str(e))
break
+ if tarinfo is not None:
+ self.members.append(tarinfo)
+ else:
+ self._loaded = True
+
return tarinfo
#--------------------------------------------------------------------------
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 74a8dac713..20339dd0a7 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -136,7 +136,53 @@ class UstarReadTest(ReadTest):
fobj.close()
-class MiscReadTest(ReadTest):
+class CommonReadTest(ReadTest):
+
+ def test_empty_tarfile(self):
+ # Test for issue6123: Allow opening empty archives.
+ # This test checks if tarfile.open() is able to open an empty tar
+ # archive successfully. Note that an empty tar archive is not the
+ # same as an empty file!
+ tarfile.open(tmpname, self.mode.replace("r", "w")).close()
+ try:
+ tar = tarfile.open(tmpname, self.mode)
+ tar.getnames()
+ except tarfile.ReadError:
+ self.fail("tarfile.open() failed on empty archive")
+ self.assertListEqual(tar.getmembers(), [])
+
+ def test_null_tarfile(self):
+ # Test for issue6123: Allow opening empty archives.
+ # This test guarantees that tarfile.open() does not treat an empty
+ # file as an empty tar archive.
+ open(tmpname, "wb").close()
+ self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, self.mode)
+ self.assertRaises(tarfile.ReadError, tarfile.open, tmpname)
+
+ def test_ignore_zeros(self):
+ # Test TarFile's ignore_zeros option.
+ if self.mode.endswith(":gz"):
+ _open = gzip.GzipFile
+ elif self.mode.endswith(":bz2"):
+ _open = bz2.BZ2File
+ else:
+ _open = open
+
+ for char in ('\0', 'a'):
+ # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
+ # are ignored correctly.
+ fobj = _open(tmpname, "wb")
+ fobj.write(char * 1024)
+ fobj.write(tarfile.TarInfo("foo").tobuf())
+ fobj.close()
+
+ tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
+ self.assertListEqual(tar.getnames(), ["foo"],
+ "ignore_zeros=True should have skipped the %r-blocks" % char)
+ tar.close()
+
+
+class MiscReadTest(CommonReadTest):
def test_no_name_argument(self):
fobj = open(self.tarname, "rb")
@@ -265,7 +311,7 @@ class MiscReadTest(ReadTest):
tar.close()
-class StreamReadTest(ReadTest):
+class StreamReadTest(CommonReadTest):
mode="r|"
@@ -1111,12 +1157,12 @@ class AppendTest(unittest.TestCase):
self._test()
def test_empty(self):
- open(self.tarname, "w").close()
+ tarfile.open(self.tarname, "w:").close()
self._add_testfile()
self._test()
def test_empty_fileobj(self):
- fobj = StringIO.StringIO()
+ fobj = StringIO.StringIO("\0" * 1024)
self._add_testfile(fobj)
fobj.seek(0)
self._test(fileobj=fobj)
@@ -1146,6 +1192,29 @@ class AppendTest(unittest.TestCase):
self._create_testtar("w:bz2")
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
+ # Append mode is supposed to fail if the tarfile to append to
+ # does not end with a zero block.
+ def _test_error(self, data):
+ open(self.tarname, "wb").write(data)
+ self.assertRaises(tarfile.ReadError, self._add_testfile)
+
+ def test_null(self):
+ self._test_error("")
+
+ def test_incomplete(self):
+ self._test_error("\0" * 13)
+
+ def test_premature_eof(self):
+ data = tarfile.TarInfo("foo").tobuf()
+ self._test_error(data)
+
+ def test_trailing_garbage(self):
+ data = tarfile.TarInfo("foo").tobuf()
+ self._test_error(data + "\0" * 13)
+
+ def test_invalid(self):
+ self._test_error("a" * 512)
+
class LimitsTest(unittest.TestCase):
@@ -1247,10 +1316,16 @@ class Bz2PartialReadTest(unittest.TestCase):
raise AssertionError("infinite loop detected in tarfile.open()")
self.hit_eof = self.pos == self.len
return StringIO.StringIO.read(self, n)
+ def seek(self, *args):
+ self.hit_eof = False
+ return StringIO.StringIO.seek(self, *args)
data = bz2.compress(tarfile.TarInfo("foo").tobuf())
for x in range(len(data) + 1):
- tarfile.open(fileobj=MyStringIO(data[:x]), mode=mode)
+ try:
+ tarfile.open(fileobj=MyStringIO(data[:x]), mode=mode)
+ except tarfile.ReadError:
+ pass # we have no interest in ReadErrors
def test_partial_input(self):
self._test_partial_input("r")
diff --git a/Misc/NEWS b/Misc/NEWS
index c24f0679e2..d6616402f7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -476,6 +476,9 @@ Core and Builtins
Library
-------
+- Issue #6123: tarfile now opens empty archives correctly and consistently
+ raises ReadError on empty files.
+
- Issue #7354: distutils.tests.test_msvc9compiler - dragfullwindows can
be 2.