summaryrefslogtreecommitdiff
path: root/Lib/tarfile.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/tarfile.py')
-rwxr-xr-xLib/tarfile.py236
1 files changed, 157 insertions, 79 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index f6d7f79390..5f1a979ad0 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -38,6 +38,7 @@ __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
#---------
# Imports
#---------
+from builtins import open as bltn_open
import sys
import os
import io
@@ -56,17 +57,15 @@ except ImportError:
# os.symlink on Windows prior to 6.0 raises NotImplementedError
symlink_exception = (AttributeError, NotImplementedError)
try:
- # WindowsError (1314) will be raised if the caller does not hold the
+ # OSError (winerror=1314) will be raised if the caller does not hold the
# SeCreateSymbolicLinkPrivilege privilege
- symlink_exception += (WindowsError,)
+ symlink_exception += (OSError,)
except NameError:
pass
# from tarfile import *
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
-from builtins import open as _open # Since 'open' is TarFile.open
-
#---------------------------------------------------------
# tar constants
#---------------------------------------------------------
@@ -140,30 +139,6 @@ PAX_NUMBER_FIELDS = {
}
#---------------------------------------------------------
-# Bits used in the mode field, values in octal.
-#---------------------------------------------------------
-S_IFLNK = 0o120000 # symbolic link
-S_IFREG = 0o100000 # regular file
-S_IFBLK = 0o060000 # block device
-S_IFDIR = 0o040000 # directory
-S_IFCHR = 0o020000 # character device
-S_IFIFO = 0o010000 # fifo
-
-TSUID = 0o4000 # set UID on execution
-TSGID = 0o2000 # set GID on execution
-TSVTX = 0o1000 # reserved
-
-TUREAD = 0o400 # read by owner
-TUWRITE = 0o200 # write by owner
-TUEXEC = 0o100 # execute/search by owner
-TGREAD = 0o040 # read by group
-TGWRITE = 0o020 # write by group
-TGEXEC = 0o010 # execute/search by group
-TOREAD = 0o004 # read by other
-TOWRITE = 0o002 # write by other
-TOEXEC = 0o001 # execute/search by other
-
-#---------------------------------------------------------
# initialization
#---------------------------------------------------------
if os.name in ("nt", "ce"):
@@ -203,7 +178,8 @@ def nti(s):
n = -(256 ** (len(s) - 1) - n)
else:
try:
- n = int(nts(s, "ascii", "strict") or "0", 8)
+ s = nts(s, "ascii", "strict")
+ n = int(s.strip() or "0", 8)
except ValueError:
raise InvalidHeaderError("invalid header")
return n
@@ -220,7 +196,7 @@ def itn(n, digits=8, format=DEFAULT_FORMAT):
# A 0o200 byte indicates a positive number, a 0o377 byte a negative
# number.
if 0 <= n < 8 ** (digits - 1):
- s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
+ s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
if n >= 0:
s = bytearray([0o200])
@@ -249,7 +225,7 @@ def calc_chksums(buf):
signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
return unsigned_chksum, signed_chksum
-def copyfileobj(src, dst, length=None):
+def copyfileobj(src, dst, length=None, exception=OSError):
"""Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content.
"""
@@ -264,13 +240,13 @@ def copyfileobj(src, dst, length=None):
for b in range(blocks):
buf = src.read(BUFSIZE)
if len(buf) < BUFSIZE:
- raise IOError("end of file reached")
+ raise exception("unexpected end of data")
dst.write(buf)
if remainder != 0:
buf = src.read(remainder)
if len(buf) < remainder:
- raise IOError("end of file reached")
+ raise exception("unexpected end of data")
dst.write(buf)
return
@@ -405,7 +381,7 @@ class _Stream:
if mode == "r":
self.dbuf = b""
self.cmp = bz2.BZ2Decompressor()
- self.exception = IOError
+ self.exception = OSError
else:
self.cmp = bz2.BZ2Compressor()
@@ -474,26 +450,26 @@ class _Stream:
if self.closed:
return
- if self.mode == "w" and self.comptype != "tar":
- self.buf += self.cmp.flush()
-
- if self.mode == "w" and self.buf:
- self.fileobj.write(self.buf)
- self.buf = b""
- if self.comptype == "gz":
- # The native zlib crc is an unsigned 32-bit integer, but
- # the Python wrapper implicitly casts that to a signed C
- # long. So, on a 32-bit box self.crc may "look negative",
- # while the same crc on a 64-bit box may "look positive".
- # To avoid irksome warnings from the `struct` module, force
- # it to look positive on all boxes.
- self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
- self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
-
- if not self._extfileobj:
- self.fileobj.close()
-
self.closed = True
+ try:
+ if self.mode == "w" and self.comptype != "tar":
+ self.buf += self.cmp.flush()
+
+ if self.mode == "w" and self.buf:
+ self.fileobj.write(self.buf)
+ self.buf = b""
+ if self.comptype == "gz":
+ # The native zlib crc is an unsigned 32-bit integer, but
+ # the Python wrapper implicitly casts that to a signed C
+ # long. So, on a 32-bit box self.crc may "look negative",
+ # while the same crc on a 64-bit box may "look positive".
+ # To avoid irksome warnings from the `struct` module, force
+ # it to look positive on all boxes.
+ self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
+ self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
+ finally:
+ if not self._extfileobj:
+ self.fileobj.close()
def _init_read_gz(self):
"""Initialize for reading a gzip compressed fileobj.
@@ -714,7 +690,10 @@ class _FileInFile(object):
length = min(size, stop - self.position)
if data:
self.fileobj.seek(offset + (self.position - start))
- buf += self.fileobj.read(length)
+ b = self.fileobj.read(length)
+ if len(b) != length:
+ raise ReadError("unexpected end of data")
+ buf += b
else:
buf += NUL * length
size -= length
@@ -1449,7 +1428,8 @@ class TarFile(object):
fileobj = bltn_open(name, self._mode)
self._extfileobj = False
else:
- if name is None and hasattr(fileobj, "name"):
+ if (name is None and hasattr(fileobj, "name") and
+ isinstance(fileobj.name, (str, bytes))):
name = fileobj.name
if hasattr(fileobj, "mode"):
self._mode = fileobj.mode
@@ -1672,7 +1652,7 @@ class TarFile(object):
try:
t = cls.taropen(name, mode, fileobj, **kwargs)
- except (IOError, EOFError):
+ except (OSError, EOFError):
fileobj.close()
if mode == 'r':
raise ReadError("not a bzip2 file")
@@ -1729,18 +1709,19 @@ class TarFile(object):
if self.closed:
return
- if self.mode in "aw":
- self.fileobj.write(NUL * (BLOCKSIZE * 2))
- self.offset += (BLOCKSIZE * 2)
- # fill up the end with zero-blocks
- # (like option -b20 for tar does)
- blocks, remainder = divmod(self.offset, RECORDSIZE)
- if remainder > 0:
- self.fileobj.write(NUL * (RECORDSIZE - remainder))
-
- if not self._extfileobj:
- self.fileobj.close()
self.closed = True
+ try:
+ if self.mode in "aw":
+ self.fileobj.write(NUL * (BLOCKSIZE * 2))
+ self.offset += (BLOCKSIZE * 2)
+ # fill up the end with zero-blocks
+ # (like option -b20 for tar does)
+ blocks, remainder = divmod(self.offset, RECORDSIZE)
+ if remainder > 0:
+ self.fileobj.write(NUL * (RECORDSIZE - remainder))
+ finally:
+ if not self._extfileobj:
+ self.fileobj.close()
def getmember(self, name):
"""Return a TarInfo object for member `name'. If `name' can not be
@@ -2042,7 +2023,7 @@ class TarFile(object):
try:
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
set_attrs=set_attrs)
- except EnvironmentError as e:
+ except OSError as e:
if self.errorlevel > 0:
raise
else:
@@ -2154,9 +2135,9 @@ class TarFile(object):
if tarinfo.sparse is not None:
for offset, size in tarinfo.sparse:
target.seek(offset)
- copyfileobj(source, target, size)
+ copyfileobj(source, target, size, ReadError)
else:
- copyfileobj(source, target, tarinfo.size)
+ copyfileobj(source, target, tarinfo.size, ReadError)
target.seek(tarinfo.size)
target.truncate()
@@ -2231,9 +2212,8 @@ class TarFile(object):
if tarinfo.issym() and hasattr(os, "lchown"):
os.lchown(targetpath, u, g)
else:
- if sys.platform != "os2emx":
- os.chown(targetpath, u, g)
- except EnvironmentError as e:
+ os.chown(targetpath, u, g)
+ except OSError as e:
raise ExtractError("could not change owner")
def chmod(self, tarinfo, targetpath):
@@ -2242,7 +2222,7 @@ class TarFile(object):
if hasattr(os, 'chmod'):
try:
os.chmod(targetpath, tarinfo.mode)
- except EnvironmentError as e:
+ except OSError as e:
raise ExtractError("could not change mode")
def utime(self, tarinfo, targetpath):
@@ -2252,7 +2232,7 @@ class TarFile(object):
return
try:
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
- except EnvironmentError as e:
+ except OSError as e:
raise ExtractError("could not change modification time")
#--------------------------------------------------------------------------
@@ -2267,8 +2247,13 @@ class TarFile(object):
self.firstmember = None
return m
+ # Advance the file pointer.
+ if self.offset != self.fileobj.tell():
+ self.fileobj.seek(self.offset - 1)
+ if not self.fileobj.read(1):
+ raise ReadError("unexpected end of data")
+
# Read the next block.
- self.fileobj.seek(self.offset)
tarinfo = None
while True:
try:
@@ -2343,9 +2328,9 @@ class TarFile(object):
corresponds to TarFile's mode.
"""
if self.closed:
- raise IOError("%s is closed" % self.__class__.__name__)
+ raise OSError("%s is closed" % self.__class__.__name__)
if mode is not None and self.mode not in mode:
- raise IOError("bad operation for mode %r" % self.mode)
+ raise OSError("bad operation for mode %r" % self.mode)
def _find_link_target(self, tarinfo):
"""Find the target member of a symlink or hardlink member in the
@@ -2447,5 +2432,98 @@ def is_tarfile(name):
except TarError:
return False
-bltn_open = open
open = TarFile.open
+
+
+def main():
+ import argparse
+
+ description = 'A simple command line interface for tarfile module.'
+ parser = argparse.ArgumentParser(description=description)
+ parser.add_argument('-v', '--verbose', action='store_true', default=False,
+ help='Verbose output')
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('-l', '--list', metavar='<tarfile>',
+ help='Show listing of a tarfile')
+ group.add_argument('-e', '--extract', nargs='+',
+ metavar=('<tarfile>', '<output_dir>'),
+ help='Extract tarfile into target dir')
+ group.add_argument('-c', '--create', nargs='+',
+ metavar=('<name>', '<file>'),
+ help='Create tarfile from sources')
+ group.add_argument('-t', '--test', metavar='<tarfile>',
+ help='Test if a tarfile is valid')
+ args = parser.parse_args()
+
+ if args.test:
+ src = args.test
+ if is_tarfile(src):
+ with open(src, 'r') as tar:
+ tar.getmembers()
+ print(tar.getmembers(), file=sys.stderr)
+ if args.verbose:
+ print('{!r} is a tar archive.'.format(src))
+ else:
+ parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
+
+ elif args.list:
+ src = args.list
+ if is_tarfile(src):
+ with TarFile.open(src, 'r:*') as tf:
+ tf.list(verbose=args.verbose)
+ else:
+ parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
+
+ elif args.extract:
+ if len(args.extract) == 1:
+ src = args.extract[0]
+ curdir = os.curdir
+ elif len(args.extract) == 2:
+ src, curdir = args.extract
+ else:
+ parser.exit(1, parser.format_help())
+
+ if is_tarfile(src):
+ with TarFile.open(src, 'r:*') as tf:
+ tf.extractall(path=curdir)
+ if args.verbose:
+ if curdir == '.':
+ msg = '{!r} file is extracted.'.format(src)
+ else:
+ msg = ('{!r} file is extracted '
+ 'into {!r} directory.').format(src, curdir)
+ print(msg)
+ else:
+ parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
+
+ elif args.create:
+ tar_name = args.create.pop(0)
+ _, ext = os.path.splitext(tar_name)
+ compressions = {
+ # gz
+ '.gz': 'gz',
+ '.tgz': 'gz',
+ # xz
+ '.xz': 'xz',
+ '.txz': 'xz',
+ # bz2
+ '.bz2': 'bz2',
+ '.tbz': 'bz2',
+ '.tbz2': 'bz2',
+ '.tb2': 'bz2',
+ }
+ tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
+ tar_files = args.create
+
+ with TarFile.open(tar_name, tar_mode) as tf:
+ for file_name in tar_files:
+ tf.add(file_name)
+
+ if args.verbose:
+ print('{!r} file created.'.format(tar_name))
+
+ else:
+ parser.exit(1, parser.format_help())
+
+if __name__ == '__main__':
+ main()