diff options
Diffstat (limited to 'Lib/zipfile.py')
| -rw-r--r-- | Lib/zipfile.py | 410 | 
1 files changed, 234 insertions, 176 deletions
| diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 56a2479fb3..8dd064a2c6 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -371,7 +371,7 @@ class ZipInfo (object):              result.append(' filemode=%r' % stat.filemode(hi))          if lo:              result.append(' external_attr=%#x' % lo) -        isdir = self.filename[-1:] == '/' +        isdir = self.is_dir()          if not isdir or self.file_size:              result.append(' file_size=%r' % self.file_size)          if ((not isdir or self.compress_size) and @@ -469,6 +469,42 @@ class ZipInfo (object):              extra = extra[ln+4:] +    @classmethod +    def from_file(cls, filename, arcname=None): +        """Construct an appropriate ZipInfo for a file on the filesystem. + +        filename should be the path to a file or directory on the filesystem. + +        arcname is the name which it will have within the archive (by default, +        this will be the same as filename, but without a drive letter and with +        leading path separators removed). +        """ +        st = os.stat(filename) +        isdir = stat.S_ISDIR(st.st_mode) +        mtime = time.localtime(st.st_mtime) +        date_time = mtime[0:6] +        # Create ZipInfo instance to store file information +        if arcname is None: +            arcname = filename +        arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) +        while arcname[0] in (os.sep, os.altsep): +            arcname = arcname[1:] +        if isdir: +            arcname += '/' +        zinfo = cls(arcname, date_time) +        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes +        if isdir: +            zinfo.file_size = 0 +            zinfo.external_attr |= 0x10  # MS-DOS directory flag +        else: +            zinfo.file_size = st.st_size + +        return zinfo + +    def is_dir(self): +        """Return True if this archive member is a directory.""" +        return self.filename[-1] == '/' +  class _ZipDecrypter:      """Class to handle decryption of files stored within a ZIP archive. @@ -651,14 +687,19 @@ def _get_decompressor(compress_type):  class _SharedFile: -    def __init__(self, file, pos, close, lock): +    def __init__(self, file, pos, close, lock, writing):          self._file = file          self._pos = pos          self._close = close          self._lock = lock +        self._writing = writing      def read(self, n=-1):          with self._lock: +            if self._writing(): +                raise RuntimeError("Can't read from the ZIP file while there " +                        "is an open writing handle on it. " +                        "Close the writing handle before trying to read.")              self._file.seek(self._pos)              data = self._file.read(n)              self._pos = self._file.tell() @@ -702,9 +743,6 @@ class ZipExtFile(io.BufferedIOBase):      # Read from compressed files in 4k blocks.      MIN_READ_SIZE = 4096 -    # Search for universal newlines or line chunks. -    PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') -      def __init__(self, fileobj, mode, zipinfo, decrypter=None,                   close_fileobj=False):          self._fileobj = fileobj @@ -721,7 +759,6 @@ class ZipExtFile(io.BufferedIOBase):          self._readbuffer = b''          self._offset = 0 -        self._universal = 'U' in mode          self.newlines = None          # Adjust read size for encrypted files since the first 12 bytes @@ -758,7 +795,7 @@ class ZipExtFile(io.BufferedIOBase):          If limit is specified, at most limit bytes will be read.          """ -        if not self._universal and limit < 0: +        if limit < 0:              # Shortcut common case - newline found in buffer.              i = self._readbuffer.find(b'\n', self._offset) + 1              if i > 0: @@ -766,41 +803,7 @@ class ZipExtFile(io.BufferedIOBase):                  self._offset = i                  return line -        if not self._universal: -            return io.BufferedIOBase.readline(self, limit) - -        line = b'' -        while limit < 0 or len(line) < limit: -            readahead = self.peek(2) -            if readahead == b'': -                return line - -            # -            # Search for universal newlines or line chunks. -            # -            # The pattern returns either a line chunk or a newline, but not -            # both. Combined with peek(2), we are assured that the sequence -            # '\r\n' is always retrieved completely and never split into -            # separate newlines - '\r', '\n' due to coincidental readaheads. -            # -            match = self.PATTERN.search(readahead) -            newline = match.group('newline') -            if newline is not None: -                if self.newlines is None: -                    self.newlines = [] -                if newline not in self.newlines: -                    self.newlines.append(newline) -                self._offset += len(newline) -                return line + b'\n' - -            chunk = match.group('chunk') -            if limit >= 0: -                chunk = chunk[: limit - len(line)] - -            self._offset += len(chunk) -            line += chunk - -        return line +        return io.BufferedIOBase.readline(self, limit)      def peek(self, n=1):          """Returns buffered bytes without advancing the position.""" @@ -958,6 +961,76 @@ class ZipExtFile(io.BufferedIOBase):              super().close() +class _ZipWriteFile(io.BufferedIOBase): +    def __init__(self, zf, zinfo, zip64): +        self._zinfo = zinfo +        self._zip64 = zip64 +        self._zipfile = zf +        self._compressor = _get_compressor(zinfo.compress_type) +        self._file_size = 0 +        self._compress_size = 0 +        self._crc = 0 + +    @property +    def _fileobj(self): +        return self._zipfile.fp + +    def writable(self): +        return True + +    def write(self, data): +        nbytes = len(data) +        self._file_size += nbytes +        self._crc = crc32(data, self._crc) +        if self._compressor: +            data = self._compressor.compress(data) +            self._compress_size += len(data) +        self._fileobj.write(data) +        return nbytes + +    def close(self): +        super().close() +        # Flush any data from the compressor, and update header info +        if self._compressor: +            buf = self._compressor.flush() +            self._compress_size += len(buf) +            self._fileobj.write(buf) +            self._zinfo.compress_size = self._compress_size +        else: +            self._zinfo.compress_size = self._file_size +        self._zinfo.CRC = self._crc +        self._zinfo.file_size = self._file_size + +        # Write updated header info +        if self._zinfo.flag_bits & 0x08: +            # Write CRC and file sizes after the file data +            fmt = '<LQQ' if self._zip64 else '<LLL' +            self._fileobj.write(struct.pack(fmt, self._zinfo.CRC, +                self._zinfo.compress_size, self._zinfo.file_size)) +            self._zipfile.start_dir = self._fileobj.tell() +        else: +            if not self._zip64: +                if self._file_size > ZIP64_LIMIT: +                    raise RuntimeError('File size unexpectedly exceeded ZIP64 ' +                                       'limit') +                if self._compress_size > ZIP64_LIMIT: +                    raise RuntimeError('Compressed size unexpectedly exceeded ' +                                       'ZIP64 limit') +            # Seek backwards and write file header (which will now include +            # correct CRC and file sizes) + +            # Preserve current position in file +            self._zipfile.start_dir = self._fileobj.tell() +            self._fileobj.seek(self._zinfo.header_offset) +            self._fileobj.write(self._zinfo.FileHeader(self._zip64)) +            self._fileobj.seek(self._zipfile.start_dir) + +        self._zipfile._writing = False + +        # Successfully written: Add file to our caches +        self._zipfile.filelist.append(self._zinfo) +        self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo +  class ZipFile:      """ Class with methods to open, read, write, close, list zip files. @@ -1020,6 +1093,7 @@ class ZipFile:          self._fileRefCnt = 1          self._lock = threading.RLock()          self._seekable = True +        self._writing = False          try:              if mode == 'r': @@ -1232,30 +1306,55 @@ class ZipFile:          with self.open(name, "r", pwd) as fp:              return fp.read() -    def open(self, name, mode="r", pwd=None): -        """Return file-like object for 'name'.""" -        if mode not in ("r", "U", "rU"): -            raise RuntimeError('open() requires mode "r", "U", or "rU"') -        if 'U' in mode: -            import warnings -            warnings.warn("'U' mode is deprecated", -                          DeprecationWarning, 2) +    def open(self, name, mode="r", pwd=None, *, force_zip64=False): +        """Return file-like object for 'name'. + +        name is a string for the file name within the ZIP file, or a ZipInfo +        object. + +        mode should be 'r' to read a file already in the ZIP file, or 'w' to +        write to a file newly added to the archive. + +        pwd is the password to decrypt files (only used for reading). + +        When writing, if the file size is not known in advance but may exceed +        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large +        files.  If the size is known in advance, it is best to pass a ZipInfo +        instance for name, with zinfo.file_size set. +        """ +        if mode not in {"r", "w"}: +            raise RuntimeError('open() requires mode "r" or "w"')          if pwd and not isinstance(pwd, bytes):              raise TypeError("pwd: expected bytes, got %s" % type(pwd)) +        if pwd and (mode == "w"): +            raise ValueError("pwd is only supported for reading files")          if not self.fp:              raise RuntimeError( -                "Attempt to read ZIP archive that was already closed") +                "Attempt to use ZIP archive that was already closed")          # Make sure we have an info object          if isinstance(name, ZipInfo):              # 'name' is already an info object              zinfo = name +        elif mode == 'w': +            zinfo = ZipInfo(name) +            zinfo.compress_type = self.compression          else:              # Get info object for name              zinfo = self.getinfo(name) +        if mode == 'w': +            return self._open_to_write(zinfo, force_zip64=force_zip64) + +        if self._writing: +            raise RuntimeError("Can't read from the ZIP file while there " +                    "is an open writing handle on it. " +                    "Close the writing handle before trying to read.") + +        # Open for reading:          self._fileRefCnt += 1 -        zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) +        zef_file = _SharedFile(self.fp, zinfo.header_offset, +                               self._fpclose, self._lock, lambda: self._writing)          try:              # Skip the file header:              fheader = zef_file.read(sizeFileHeader) @@ -1320,6 +1419,49 @@ class ZipFile:              zef_file.close()              raise +    def _open_to_write(self, zinfo, force_zip64=False): +        if force_zip64 and not self._allowZip64: +            raise ValueError( +                "force_zip64 is True, but allowZip64 was False when opening " +                "the ZIP file." +            ) +        if self._writing: +            raise RuntimeError("Can't write to the ZIP file while there is " +                               "another write handle open on it. " +                               "Close the first handle before opening another.") + +        # Sizes and CRC are overwritten with correct data after processing the file +        if not hasattr(zinfo, 'file_size'): +            zinfo.file_size = 0 +        zinfo.compress_size = 0 +        zinfo.CRC = 0 + +        zinfo.flag_bits = 0x00 +        if zinfo.compress_type == ZIP_LZMA: +            # Compressed data includes an end-of-stream (EOS) marker +            zinfo.flag_bits |= 0x02 +        if not self._seekable: +            zinfo.flag_bits |= 0x08 + +        if not zinfo.external_attr: +            zinfo.external_attr = 0o600 << 16  # permissions: ?rw------- + +        # Compressed size can be larger than uncompressed size +        zip64 = self._allowZip64 and \ +                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) + +        if self._seekable: +            self.fp.seek(self.start_dir) +        zinfo.header_offset = self.fp.tell() + +        self._writecheck(zinfo) +        self._didModify = True + +        self.fp.write(zinfo.FileHeader(zip64)) + +        self._writing = True +        return _ZipWriteFile(self, zinfo, zip64) +      def extract(self, member, path=None, pwd=None):          """Extract a member from the archive to the current working directory,             using its full name. Its file information is extracted as accurately @@ -1389,7 +1531,7 @@ class ZipFile:          if upperdirs and not os.path.exists(upperdirs):              os.makedirs(upperdirs) -        if member.filename[-1] == '/': +        if member.is_dir():              if not os.path.isdir(targetpath):                  os.mkdir(targetpath)              return targetpath @@ -1429,103 +1571,41 @@ class ZipFile:          if not self.fp:              raise RuntimeError(                  "Attempt to write to ZIP archive that was already closed") +        if self._writing: +            raise RuntimeError( +                "Can't write to ZIP archive while an open writing handle exists" +            ) -        st = os.stat(filename) -        isdir = stat.S_ISDIR(st.st_mode) -        mtime = time.localtime(st.st_mtime) -        date_time = mtime[0:6] -        # Create ZipInfo instance to store file information -        if arcname is None: -            arcname = filename -        arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) -        while arcname[0] in (os.sep, os.altsep): -            arcname = arcname[1:] -        if isdir: -            arcname += '/' -        zinfo = ZipInfo(arcname, date_time) -        zinfo.external_attr = (st[0] & 0xFFFF) << 16      # Unix attributes -        if isdir: -            zinfo.compress_type = ZIP_STORED -        elif compress_type is None: -            zinfo.compress_type = self.compression +        zinfo = ZipInfo.from_file(filename, arcname) + +        if zinfo.is_dir(): +            zinfo.compress_size = 0 +            zinfo.CRC = 0          else: -            zinfo.compress_type = compress_type +            if compress_type is not None: +                zinfo.compress_type = compress_type +            else: +                zinfo.compress_type = self.compression -        zinfo.file_size = st.st_size -        zinfo.flag_bits = 0x00 -        with self._lock: -            if self._seekable: -                self.fp.seek(self.start_dir) -            zinfo.header_offset = self.fp.tell()    # Start of header bytes -            if zinfo.compress_type == ZIP_LZMA: +        if zinfo.is_dir(): +            with self._lock: +                if self._seekable: +                    self.fp.seek(self.start_dir) +                zinfo.header_offset = self.fp.tell()  # Start of header bytes +                if zinfo.compress_type == ZIP_LZMA:                  # Compressed data includes an end-of-stream (EOS) marker -                zinfo.flag_bits |= 0x02 +                    zinfo.flag_bits |= 0x02 -            self._writecheck(zinfo) -            self._didModify = True +                self._writecheck(zinfo) +                self._didModify = True -            if isdir: -                zinfo.file_size = 0 -                zinfo.compress_size = 0 -                zinfo.CRC = 0 -                zinfo.external_attr |= 0x10  # MS-DOS directory flag                  self.filelist.append(zinfo)                  self.NameToInfo[zinfo.filename] = zinfo                  self.fp.write(zinfo.FileHeader(False))                  self.start_dir = self.fp.tell() -                return - -            cmpr = _get_compressor(zinfo.compress_type) -            if not self._seekable: -                zinfo.flag_bits |= 0x08 -            with open(filename, "rb") as fp: -                # Must overwrite CRC and sizes with correct data later -                zinfo.CRC = CRC = 0 -                zinfo.compress_size = compress_size = 0 -                # Compressed size can be larger than uncompressed size -                zip64 = self._allowZip64 and \ -                    zinfo.file_size * 1.05 > ZIP64_LIMIT -                self.fp.write(zinfo.FileHeader(zip64)) -                file_size = 0 -                while 1: -                    buf = fp.read(1024 * 8) -                    if not buf: -                        break -                    file_size = file_size + len(buf) -                    CRC = crc32(buf, CRC) -                    if cmpr: -                        buf = cmpr.compress(buf) -                        compress_size = compress_size + len(buf) -                    self.fp.write(buf) -            if cmpr: -                buf = cmpr.flush() -                compress_size = compress_size + len(buf) -                self.fp.write(buf) -                zinfo.compress_size = compress_size -            else: -                zinfo.compress_size = file_size -            zinfo.CRC = CRC -            zinfo.file_size = file_size -            if zinfo.flag_bits & 0x08: -                # Write CRC and file sizes after the file data -                fmt = '<LQQ' if zip64 else '<LLL' -                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, -                                          zinfo.file_size)) -                self.start_dir = self.fp.tell() -            else: -                if not zip64 and self._allowZip64: -                    if file_size > ZIP64_LIMIT: -                        raise RuntimeError('File size has increased during compressing') -                    if compress_size > ZIP64_LIMIT: -                        raise RuntimeError('Compressed size larger than uncompressed size') -                # Seek backwards and write file header (which will now include -                # correct CRC and file sizes) -                self.start_dir = self.fp.tell() # Preserve current position in file -                self.fp.seek(zinfo.header_offset) -                self.fp.write(zinfo.FileHeader(zip64)) -                self.fp.seek(self.start_dir) -            self.filelist.append(zinfo) -            self.NameToInfo[zinfo.filename] = zinfo +        else: +            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: +                shutil.copyfileobj(src, dest, 1024*8)      def writestr(self, zinfo_or_arcname, data, compress_type=None):          """Write a file into the archive.  The contents is 'data', which @@ -1550,45 +1630,18 @@ class ZipFile:          if not self.fp:              raise RuntimeError(                  "Attempt to write to ZIP archive that was already closed") +        if self._writing: +            raise RuntimeError( +                "Can't write to ZIP archive while an open writing handle exists." +            ) + +        if compress_type is not None: +            zinfo.compress_type = compress_type          zinfo.file_size = len(data)            # Uncompressed size          with self._lock: -            if self._seekable: -                self.fp.seek(self.start_dir) -            zinfo.header_offset = self.fp.tell()    # Start of header data -            if compress_type is not None: -                zinfo.compress_type = compress_type -            zinfo.header_offset = self.fp.tell()    # Start of header data -            if compress_type is not None: -                zinfo.compress_type = compress_type -            if zinfo.compress_type == ZIP_LZMA: -                # Compressed data includes an end-of-stream (EOS) marker -                zinfo.flag_bits |= 0x02 - -            self._writecheck(zinfo) -            self._didModify = True -            zinfo.CRC = crc32(data)       # CRC-32 checksum -            co = _get_compressor(zinfo.compress_type) -            if co: -                data = co.compress(data) + co.flush() -                zinfo.compress_size = len(data)    # Compressed size -            else: -                zinfo.compress_size = zinfo.file_size -            zip64 = zinfo.file_size > ZIP64_LIMIT or \ -                zinfo.compress_size > ZIP64_LIMIT -            if zip64 and not self._allowZip64: -                raise LargeZipFile("Filesize would require ZIP64 extensions") -            self.fp.write(zinfo.FileHeader(zip64)) -            self.fp.write(data) -            if zinfo.flag_bits & 0x08: -                # Write CRC and file sizes after the file data -                fmt = '<LQQ' if zip64 else '<LLL' -                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, -                                          zinfo.file_size)) -            self.fp.flush() -            self.start_dir = self.fp.tell() -            self.filelist.append(zinfo) -            self.NameToInfo[zinfo.filename] = zinfo +            with self.open(zinfo, mode='w') as dest: +                dest.write(data)      def __del__(self):          """Call the "close()" method in case the user forgot.""" @@ -1600,6 +1653,11 @@ class ZipFile:          if self.fp is None:              return +        if self._writing: +            raise RuntimeError("Can't close the ZIP file while there is " +                               "an open writing handle on it. " +                               "Close the writing handle before closing the zip.") +          try:              if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records                  with self._lock: | 
