1 files changed, 234 insertions, 176 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 56a2479fb3..8dd064a2c6 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -371,7 +371,7 @@ class ZipInfo (object):
             result.append(' filemode=%r' % stat.filemode(hi))
         if lo:
             result.append(' external_attr=%#x' % lo)
-        isdir = self.filename[-1:] == '/'
+        isdir = self.is_dir()
         if not isdir or self.file_size:
             result.append(' file_size=%r' % self.file_size)
         if ((not isdir or self.compress_size) and
@@ -469,6 +469,42 @@ class ZipInfo (object):
 
             extra = extra[ln+4:]
 
+    @classmethod
+    def from_file(cls, filename, arcname=None):
+        """Construct an appropriate ZipInfo for a file on the filesystem.
+
+        filename should be the path to a file or directory on the filesystem.
+
+        arcname is the name which it will have within the archive (by default,
+        this will be the same as filename, but without a drive letter and with
+        leading path separators removed).
+        """
+        st = os.stat(filename)
+        isdir = stat.S_ISDIR(st.st_mode)
+        mtime = time.localtime(st.st_mtime)
+        date_time = mtime[0:6]
+        # Create ZipInfo instance to store file information
+        if arcname is None:
+            arcname = filename
+        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
+        while arcname[0] in (os.sep, os.altsep):
+            arcname = arcname[1:]
+        if isdir:
+            arcname += '/'
+        zinfo = cls(arcname, date_time)
+        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
+        if isdir:
+            zinfo.file_size = 0
+            zinfo.external_attr |= 0x10  # MS-DOS directory flag
+        else:
+            zinfo.file_size = st.st_size
+
+        return zinfo
+
+    def is_dir(self):
+        """Return True if this archive member is a directory."""
+        return self.filename[-1] == '/'
+
 
 class _ZipDecrypter:
     """Class to handle decryption of files stored within a ZIP archive.
@@ -651,14 +687,19 @@ def _get_decompressor(compress_type):
 
 
 class _SharedFile:
-    def __init__(self, file, pos, close, lock):
+    def __init__(self, file, pos, close, lock, writing):
         self._file = file
         self._pos = pos
         self._close = close
         self._lock = lock
+        self._writing = writing
 
     def read(self, n=-1):
         with self._lock:
+            if self._writing():
+                raise RuntimeError("Can't read from the ZIP file while there "
+                        "is an open writing handle on it. "
+                        "Close the writing handle before trying to read.")
             self._file.seek(self._pos)
             data = self._file.read(n)
             self._pos = self._file.tell()
@@ -702,9 +743,6 @@ class ZipExtFile(io.BufferedIOBase):
     # Read from compressed files in 4k blocks.
     MIN_READ_SIZE = 4096
 
-    # Search for universal newlines or line chunks.
-    PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
-
     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
                  close_fileobj=False):
         self._fileobj = fileobj
@@ -721,7 +759,6 @@ class ZipExtFile(io.BufferedIOBase):
         self._readbuffer = b''
         self._offset = 0
 
-        self._universal = 'U' in mode
         self.newlines = None
 
         # Adjust read size for encrypted files since the first 12 bytes
@@ -758,7 +795,7 @@ class ZipExtFile(io.BufferedIOBase):
         If limit is specified, at most limit bytes will be read.
         """
 
-        if not self._universal and limit < 0:
+        if limit < 0:
             # Shortcut common case - newline found in buffer.
             i = self._readbuffer.find(b'\n', self._offset) + 1
             if i > 0:
@@ -766,41 +803,7 @@ class ZipExtFile(io.BufferedIOBase):
                 self._offset = i
                 return line
 
-        if not self._universal:
-            return io.BufferedIOBase.readline(self, limit)
-
-        line = b''
-        while limit < 0 or len(line) < limit:
-            readahead = self.peek(2)
-            if readahead == b'':
-                return line
-
-            #
-            # Search for universal newlines or line chunks.
-            #
-            # The pattern returns either a line chunk or a newline, but not
-            # both. Combined with peek(2), we are assured that the sequence
-            # '\r\n' is always retrieved completely and never split into
-            # separate newlines - '\r', '\n' due to coincidental readaheads.
-            #
-            match = self.PATTERN.search(readahead)
-            newline = match.group('newline')
-            if newline is not None:
-                if self.newlines is None:
-                    self.newlines = []
-                if newline not in self.newlines:
-                    self.newlines.append(newline)
-                self._offset += len(newline)
-                return line + b'\n'
-
-            chunk = match.group('chunk')
-            if limit >= 0:
-                chunk = chunk[: limit - len(line)]
-
-            self._offset += len(chunk)
-            line += chunk
-
-        return line
+        return io.BufferedIOBase.readline(self, limit)
 
     def peek(self, n=1):
         """Returns buffered bytes without advancing the position."""
@@ -958,6 +961,76 @@ class ZipExtFile(io.BufferedIOBase):
             super().close()
 
 
+class _ZipWriteFile(io.BufferedIOBase):
+    def __init__(self, zf, zinfo, zip64):
+        self._zinfo = zinfo
+        self._zip64 = zip64
+        self._zipfile = zf
+        self._compressor = _get_compressor(zinfo.compress_type)
+        self._file_size = 0
+        self._compress_size = 0
+        self._crc = 0
+
+    @property
+    def _fileobj(self):
+        return self._zipfile.fp
+
+    def writable(self):
+        return True
+
+    def write(self, data):
+        nbytes = len(data)
+        self._file_size += nbytes
+        self._crc = crc32(data, self._crc)
+        if self._compressor:
+            data = self._compressor.compress(data)
+            self._compress_size += len(data)
+        self._fileobj.write(data)
+        return nbytes
+
+    def close(self):
+        super().close()
+        # Flush any data from the compressor, and update header info
+        if self._compressor:
+            buf = self._compressor.flush()
+            self._compress_size += len(buf)
+            self._fileobj.write(buf)
+            self._zinfo.compress_size = self._compress_size
+        else:
+            self._zinfo.compress_size = self._file_size
+        self._zinfo.CRC = self._crc
+        self._zinfo.file_size = self._file_size
+
+        # Write updated header info
+        if self._zinfo.flag_bits & 0x08:
+            # Write CRC and file sizes after the file data
+            fmt = '<LQQ' if self._zip64 else '<LLL'
+            self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
+                self._zinfo.compress_size, self._zinfo.file_size))
+            self._zipfile.start_dir = self._fileobj.tell()
+        else:
+            if not self._zip64:
+                if self._file_size > ZIP64_LIMIT:
+                    raise RuntimeError('File size unexpectedly exceeded ZIP64 '
+                                       'limit')
+                if self._compress_size > ZIP64_LIMIT:
+                    raise RuntimeError('Compressed size unexpectedly exceeded '
+                                       'ZIP64 limit')
+            # Seek backwards and write file header (which will now include
+            # correct CRC and file sizes)
+
+            # Preserve current position in file
+            self._zipfile.start_dir = self._fileobj.tell()
+            self._fileobj.seek(self._zinfo.header_offset)
+            self._fileobj.write(self._zinfo.FileHeader(self._zip64))
+            self._fileobj.seek(self._zipfile.start_dir)
+
+        self._zipfile._writing = False
+
+        # Successfully written: Add file to our caches
+        self._zipfile.filelist.append(self._zinfo)
+        self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
+
 class ZipFile:
     """ Class with methods to open, read, write, close, list zip files.
 
@@ -1020,6 +1093,7 @@ class ZipFile:
         self._fileRefCnt = 1
         self._lock = threading.RLock()
         self._seekable = True
+        self._writing = False
 
         try:
             if mode == 'r':
@@ -1232,30 +1306,55 @@ class ZipFile:
         with self.open(name, "r", pwd) as fp:
             return fp.read()
 
-    def open(self, name, mode="r", pwd=None):
-        """Return file-like object for 'name'."""
-        if mode not in ("r", "U", "rU"):
-            raise RuntimeError('open() requires mode "r", "U", or "rU"')
-        if 'U' in mode:
-            import warnings
-            warnings.warn("'U' mode is deprecated",
-                          DeprecationWarning, 2)
+    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
+        """Return file-like object for 'name'.
+
+        name is a string for the file name within the ZIP file, or a ZipInfo
+        object.
+
+        mode should be 'r' to read a file already in the ZIP file, or 'w' to
+        write to a file newly added to the archive.
+
+        pwd is the password to decrypt files (only used for reading).
+
+        When writing, if the file size is not known in advance but may exceed
+        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
+        files.  If the size is known in advance, it is best to pass a ZipInfo
+        instance for name, with zinfo.file_size set.
+        """
+        if mode not in {"r", "w"}:
+            raise RuntimeError('open() requires mode "r" or "w"')
         if pwd and not isinstance(pwd, bytes):
             raise TypeError("pwd: expected bytes, got %s" % type(pwd))
+        if pwd and (mode == "w"):
+            raise ValueError("pwd is only supported for reading files")
         if not self.fp:
             raise RuntimeError(
-                "Attempt to read ZIP archive that was already closed")
+                "Attempt to use ZIP archive that was already closed")
 
         # Make sure we have an info object
         if isinstance(name, ZipInfo):
             # 'name' is already an info object
             zinfo = name
+        elif mode == 'w':
+            zinfo = ZipInfo(name)
+            zinfo.compress_type = self.compression
         else:
             # Get info object for name
             zinfo = self.getinfo(name)
 
+        if mode == 'w':
+            return self._open_to_write(zinfo, force_zip64=force_zip64)
+
+        if self._writing:
+            raise RuntimeError("Can't read from the ZIP file while there "
+                    "is an open writing handle on it. "
+                    "Close the writing handle before trying to read.")
+
+        # Open for reading:
         self._fileRefCnt += 1
-        zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
+        zef_file = _SharedFile(self.fp, zinfo.header_offset,
+                               self._fpclose, self._lock, lambda: self._writing)
         try:
             # Skip the file header:
             fheader = zef_file.read(sizeFileHeader)
@@ -1320,6 +1419,49 @@ class ZipFile:
             zef_file.close()
             raise
 
+    def _open_to_write(self, zinfo, force_zip64=False):
+        if force_zip64 and not self._allowZip64:
+            raise ValueError(
+                "force_zip64 is True, but allowZip64 was False when opening "
+                "the ZIP file."
+            )
+        if self._writing:
+            raise RuntimeError("Can't write to the ZIP file while there is "
+                               "another write handle open on it. "
+                               "Close the first handle before opening another.")
+
+        # Sizes and CRC are overwritten with correct data after processing the file
+        if not hasattr(zinfo, 'file_size'):
+            zinfo.file_size = 0
+        zinfo.compress_size = 0
+        zinfo.CRC = 0
+
+        zinfo.flag_bits = 0x00
+        if zinfo.compress_type == ZIP_LZMA:
+            # Compressed data includes an end-of-stream (EOS) marker
+            zinfo.flag_bits |= 0x02
+        if not self._seekable:
+            zinfo.flag_bits |= 0x08
+
+        if not zinfo.external_attr:
+            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
+
+        # Compressed size can be larger than uncompressed size
+        zip64 = self._allowZip64 and \
+                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
+
+        if self._seekable:
+            self.fp.seek(self.start_dir)
+        zinfo.header_offset = self.fp.tell()
+
+        self._writecheck(zinfo)
+        self._didModify = True
+
+        self.fp.write(zinfo.FileHeader(zip64))
+
+        self._writing = True
+        return _ZipWriteFile(self, zinfo, zip64)
+
     def extract(self, member, path=None, pwd=None):
         """Extract a member from the archive to the current working directory,
            using its full name. Its file information is extracted as accurately
@@ -1389,7 +1531,7 @@ class ZipFile:
         if upperdirs and not os.path.exists(upperdirs):
             os.makedirs(upperdirs)
 
-        if member.filename[-1] == '/':
+        if member.is_dir():
             if not os.path.isdir(targetpath):
                 os.mkdir(targetpath)
             return targetpath
@@ -1429,103 +1571,41 @@ class ZipFile:
         if not self.fp:
             raise RuntimeError(
                 "Attempt to write to ZIP archive that was already closed")
+        if self._writing:
+            raise RuntimeError(
+                "Can't write to ZIP archive while an open writing handle exists"
+            )
 
-        st = os.stat(filename)
-        isdir = stat.S_ISDIR(st.st_mode)
-        mtime = time.localtime(st.st_mtime)
-        date_time = mtime[0:6]
-        # Create ZipInfo instance to store file information
-        if arcname is None:
-            arcname = filename
-        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
-        while arcname[0] in (os.sep, os.altsep):
-            arcname = arcname[1:]
-        if isdir:
-            arcname += '/'
-        zinfo = ZipInfo(arcname, date_time)
-        zinfo.external_attr = (st[0] & 0xFFFF) << 16      # Unix attributes
-        if isdir:
-            zinfo.compress_type = ZIP_STORED
-        elif compress_type is None:
-            zinfo.compress_type = self.compression
+        zinfo = ZipInfo.from_file(filename, arcname)
+
+        if zinfo.is_dir():
+            zinfo.compress_size = 0
+            zinfo.CRC = 0
         else:
-            zinfo.compress_type = compress_type
+            if compress_type is not None:
+                zinfo.compress_type = compress_type
+            else:
+                zinfo.compress_type = self.compression
 
-        zinfo.file_size = st.st_size
-        zinfo.flag_bits = 0x00
-        with self._lock:
-            if self._seekable:
-                self.fp.seek(self.start_dir)
-            zinfo.header_offset = self.fp.tell()    # Start of header bytes
-            if zinfo.compress_type == ZIP_LZMA:
+        if zinfo.is_dir():
+            with self._lock:
+                if self._seekable:
+                    self.fp.seek(self.start_dir)
+                zinfo.header_offset = self.fp.tell()  # Start of header bytes
+                if zinfo.compress_type == ZIP_LZMA:
                 # Compressed data includes an end-of-stream (EOS) marker
-                zinfo.flag_bits |= 0x02
+                    zinfo.flag_bits |= 0x02
 
-            self._writecheck(zinfo)
-            self._didModify = True
+                self._writecheck(zinfo)
+                self._didModify = True
 
-            if isdir:
-                zinfo.file_size = 0
-                zinfo.compress_size = 0
-                zinfo.CRC = 0
-                zinfo.external_attr |= 0x10  # MS-DOS directory flag
                 self.filelist.append(zinfo)
                 self.NameToInfo[zinfo.filename] = zinfo
                 self.fp.write(zinfo.FileHeader(False))
                 self.start_dir = self.fp.tell()
-                return
-
-            cmpr = _get_compressor(zinfo.compress_type)
-            if not self._seekable:
-                zinfo.flag_bits |= 0x08
-            with open(filename, "rb") as fp:
-                # Must overwrite CRC and sizes with correct data later
-                zinfo.CRC = CRC = 0
-                zinfo.compress_size = compress_size = 0
-                # Compressed size can be larger than uncompressed size
-                zip64 = self._allowZip64 and \
-                    zinfo.file_size * 1.05 > ZIP64_LIMIT
-                self.fp.write(zinfo.FileHeader(zip64))
-                file_size = 0
-                while 1:
-                    buf = fp.read(1024 * 8)
-                    if not buf:
-                        break
-                    file_size = file_size + len(buf)
-                    CRC = crc32(buf, CRC)
-                    if cmpr:
-                        buf = cmpr.compress(buf)
-                        compress_size = compress_size + len(buf)
-                    self.fp.write(buf)
-            if cmpr:
-                buf = cmpr.flush()
-                compress_size = compress_size + len(buf)
-                self.fp.write(buf)
-                zinfo.compress_size = compress_size
-            else:
-                zinfo.compress_size = file_size
-            zinfo.CRC = CRC
-            zinfo.file_size = file_size
-            if zinfo.flag_bits & 0x08:
-                # Write CRC and file sizes after the file data
-                fmt = '<LQQ' if zip64 else '<LLL'
-                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
-                                          zinfo.file_size))
-                self.start_dir = self.fp.tell()
-            else:
-                if not zip64 and self._allowZip64:
-                    if file_size > ZIP64_LIMIT:
-                        raise RuntimeError('File size has increased during compressing')
-                    if compress_size > ZIP64_LIMIT:
-                        raise RuntimeError('Compressed size larger than uncompressed size')
-                # Seek backwards and write file header (which will now include
-                # correct CRC and file sizes)
-                self.start_dir = self.fp.tell() # Preserve current position in file
-                self.fp.seek(zinfo.header_offset)
-                self.fp.write(zinfo.FileHeader(zip64))
-                self.fp.seek(self.start_dir)
-            self.filelist.append(zinfo)
-            self.NameToInfo[zinfo.filename] = zinfo
+        else:
+            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
+                shutil.copyfileobj(src, dest, 1024*8)
 
     def writestr(self, zinfo_or_arcname, data, compress_type=None):
         """Write a file into the archive.  The contents is 'data', which
@@ -1550,45 +1630,18 @@ class ZipFile:
         if not self.fp:
             raise RuntimeError(
                 "Attempt to write to ZIP archive that was already closed")
+        if self._writing:
+            raise RuntimeError(
+                "Can't write to ZIP archive while an open writing handle exists."
+            )
+
+        if compress_type is not None:
+            zinfo.compress_type = compress_type
 
         zinfo.file_size = len(data)            # Uncompressed size
         with self._lock:
-            if self._seekable:
-                self.fp.seek(self.start_dir)
-            zinfo.header_offset = self.fp.tell()    # Start of header data
-            if compress_type is not None:
-                zinfo.compress_type = compress_type
-            zinfo.header_offset = self.fp.tell()    # Start of header data
-            if compress_type is not None:
-                zinfo.compress_type = compress_type
-            if zinfo.compress_type == ZIP_LZMA:
-                # Compressed data includes an end-of-stream (EOS) marker
-                zinfo.flag_bits |= 0x02
-
-            self._writecheck(zinfo)
-            self._didModify = True
-            zinfo.CRC = crc32(data)       # CRC-32 checksum
-            co = _get_compressor(zinfo.compress_type)
-            if co:
-                data = co.compress(data) + co.flush()
-                zinfo.compress_size = len(data)    # Compressed size
-            else:
-                zinfo.compress_size = zinfo.file_size
-            zip64 = zinfo.file_size > ZIP64_LIMIT or \
-                zinfo.compress_size > ZIP64_LIMIT
-            if zip64 and not self._allowZip64:
-                raise LargeZipFile("Filesize would require ZIP64 extensions")
-            self.fp.write(zinfo.FileHeader(zip64))
-            self.fp.write(data)
-            if zinfo.flag_bits & 0x08:
-                # Write CRC and file sizes after the file data
-                fmt = '<LQQ' if zip64 else '<LLL'
-                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
-                                          zinfo.file_size))
-            self.fp.flush()
-            self.start_dir = self.fp.tell()
-            self.filelist.append(zinfo)
-            self.NameToInfo[zinfo.filename] = zinfo
+            with self.open(zinfo, mode='w') as dest:
+                dest.write(data)
 
     def __del__(self):
         """Call the "close()" method in case the user forgot."""
@@ -1600,6 +1653,11 @@ class ZipFile:
         if self.fp is None:
             return
 
+        if self._writing:
+            raise RuntimeError("Can't close the ZIP file while there is "
+                               "an open writing handle on it. "
+                               "Close the writing handle before closing the zip.")
+
         try:
             if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
                 with self._lock: