diff options
author | Ronald Oussoren <ronaldoussoren@mac.com> | 2006-06-15 08:14:18 +0000 |
---|---|---|
committer | Ronald Oussoren <ronaldoussoren@mac.com> | 2006-06-15 08:14:18 +0000 |
commit | 143cefb8461c2c684e904723be45f84d2c998466 (patch) | |
tree | 0ce1d5bba4691e8292f4d4deb278aa5a57bdb589 /Lib/zipfile.py | |
parent | 0eac11826a6b9efed2f52b612e58046c584db573 (diff) | |
download | cpython-git-143cefb8461c2c684e904723be45f84d2c998466.tar.gz |
Patch #1446489 (zipfile: support for ZIP64)
Diffstat (limited to 'Lib/zipfile.py')
-rw-r--r-- | Lib/zipfile.py | 384 |
1 files changed, 331 insertions, 53 deletions
diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 168d24502d..2cdbc6fad4 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -1,7 +1,8 @@ -"Read and write ZIP files." - +""" +Read and write ZIP files. +""" import struct, os, time, sys -import binascii +import binascii, cStringIO try: import zlib # We may need its compression method @@ -9,12 +10,22 @@ except ImportError: zlib = None __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", - "ZipInfo", "ZipFile", "PyZipFile"] + "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] class BadZipfile(Exception): pass + + +class LargeZipFile(Exception): + """ + Raised when writing a zipfile, the zipfile requires ZIP64 extensions + and those extensions are disabled. + """ + error = BadZipfile # The exception raised by this module +ZIP64_LIMIT= (1 << 31) - 1 + # constants for Zip file compression methods ZIP_STORED = 0 ZIP_DEFLATED = 8 @@ -27,6 +38,11 @@ structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes stringCentralDir = "PK\001\002" # magic number for central directory structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes stringFileHeader = "PK\003\004" # magic number for file header +structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes +stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header +structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes +stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header + # indexes of entries in the central directory structure _CD_SIGNATURE = 0 @@ -75,6 +91,40 @@ def is_zipfile(filename): pass return False +def _EndRecData64(fpin, offset, endrec): + """ + Read the ZIP64 end-of-archive records and use that to update endrec + """ + locatorSize = struct.calcsize(structEndArchive64Locator) + fpin.seek(offset - locatorSize, 2) + data = fpin.read(locatorSize) + sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) + if sig != stringEndArchive64Locator: + return endrec + + if diskno != 0 or disks != 1: + raise BadZipfile("zipfiles that span multiple disks are not supported") + + # Assume no 'zip64 extensible data' + endArchiveSize = struct.calcsize(structEndArchive64) + fpin.seek(offset - locatorSize - endArchiveSize, 2) + data = fpin.read(endArchiveSize) + sig, sz, create_version, read_version, disk_num, disk_dir, \ + dircount, dircount2, dirsize, diroffset = \ + struct.unpack(structEndArchive64, data) + if sig != stringEndArchive64: + return endrec + + # Update the original endrec using data from the ZIP64 record + endrec[1] = disk_num + endrec[2] = disk_dir + endrec[3] = dircount + endrec[4] = dircount2 + endrec[5] = dirsize + endrec[6] = diroffset + return endrec + + def _EndRecData(fpin): """Return data from the "End of Central Directory" record, or None. @@ -88,6 +138,8 @@ def _EndRecData(fpin): endrec = list(endrec) endrec.append("") # Append the archive comment endrec.append(filesize - 22) # Append the record start offset + if endrec[-4] == -1 or endrec[-4] == 0xffffffff: + return _EndRecData64(fpin, -22, endrec) return endrec # Search the last END_BLOCK bytes of the file for the record signature. # The comment is appended to the ZIP file and has a 16 bit length. @@ -106,25 +158,50 @@ def _EndRecData(fpin): # Append the archive comment and start offset endrec.append(comment) endrec.append(filesize - END_BLOCK + start) + if endrec[-4] == -1 or endrec[-4] == 0xffffffff: + return _EndRecData64(fpin, - END_BLOCK + start, endrec) return endrec return # Error, return None -class ZipInfo: +class ZipInfo (object): """Class with attributes describing each file in the ZIP archive.""" + __slots__ = ( + 'orig_filename', + 'filename', + 'date_time', + 'compress_type', + 'comment', + 'extra', + 'create_system', + 'create_version', + 'extract_version', + 'reserved', + 'flag_bits', + 'volume', + 'internal_attr', + 'external_attr', + 'header_offset', + 'CRC', + 'compress_size', + 'file_size', + ) + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.orig_filename = filename # Original file name in archive -# Terminate the file name at the first null byte. Null bytes in file -# names are used as tricks by viruses in archives. + + # Terminate the file name at the first null byte. Null bytes in file + # names are used as tricks by viruses in archives. null_byte = filename.find(chr(0)) if null_byte >= 0: filename = filename[0:null_byte] -# This is used to ensure paths in generated ZIP files always use -# forward slashes as the directory separator, as required by the -# ZIP format specification. - if os.sep != "/": + # This is used to ensure paths in generated ZIP files always use + # forward slashes as the directory separator, as required by the + # ZIP format specification. + if os.sep != "/" and os.sep in filename: filename = filename.replace(os.sep, "/") + self.filename = filename # Normalized file name self.date_time = date_time # year, month, day, hour, min, sec # Standard values: @@ -145,7 +222,6 @@ class ZipInfo: self.external_attr = 0 # External file attributes # Other attributes are set by class ZipFile: # header_offset Byte offset to the file header - # file_offset Byte offset to the start of the file data # CRC CRC-32 of the uncompressed file # compress_size Size of the compressed file # file_size Size of the uncompressed file @@ -162,29 +238,85 @@ class ZipInfo: CRC = self.CRC compress_size = self.compress_size file_size = self.file_size + + extra = self.extra + + if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: + # File is larger than what fits into a 4 byte integer, + # fall back to the ZIP64 extension + fmt = '<hhqq' + extra = extra + struct.pack(fmt, + 1, struct.calcsize(fmt)-4, file_size, compress_size) + file_size = 0xffffffff # -1 + compress_size = 0xffffffff # -1 + self.extract_version = max(45, self.extract_version) + self.create_version = max(45, self.extract_version) + header = struct.pack(structFileHeader, stringFileHeader, self.extract_version, self.reserved, self.flag_bits, self.compress_type, dostime, dosdate, CRC, compress_size, file_size, - len(self.filename), len(self.extra)) - return header + self.filename + self.extra + len(self.filename), len(extra)) + return header + self.filename + extra + + def _decodeExtra(self): + # Try to decode the extra field. + extra = self.extra + unpack = struct.unpack + while extra: + tp, ln = unpack('<hh', extra[:4]) + if tp == 1: + if ln >= 24: + counts = unpack('<qqq', extra[4:28]) + elif ln == 16: + counts = unpack('<qq', extra[4:20]) + elif ln == 8: + counts = unpack('<q', extra[4:12]) + elif ln == 0: + counts = () + else: + raise RuntimeError, "Corrupt extra field %s"%(ln,) + + idx = 0 + + # ZIP64 extension (large files and/or large archives) + if self.file_size == -1 or self.file_size == 0xFFFFFFFFL: + self.file_size = counts[idx] + idx += 1 + if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL: + self.compress_size = counts[idx] + idx += 1 + + if self.header_offset == -1 or self.header_offset == 0xffffffffL: + old = self.header_offset + self.header_offset = counts[idx] + idx+=1 + + extra = extra[ln+4:] + class ZipFile: """ Class with methods to open, read, write, close, list zip files. - z = ZipFile(file, mode="r", compression=ZIP_STORED) + z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True) file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. mode: The mode can be either read "r", write "w" or append "a". compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). + allowZip64: if True ZipFile will create files with ZIP64 extensions when + needed, otherwise it will raise an exception when this would + be necessary. + """ fp = None # Set here since __del__ checks it - def __init__(self, file, mode="r", compression=ZIP_STORED): + def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): """Open the ZIP file with mode read "r", write "w" or append "a".""" + self._allowZip64 = allowZip64 + self._didModify = False if compression == ZIP_STORED: pass elif compression == ZIP_DEFLATED: @@ -250,7 +382,10 @@ class ZipFile: offset_cd = endrec[6] # offset of central directory self.comment = endrec[8] # archive comment # endrec[9] is the offset of the "End of Central Dir" record - x = endrec[9] - size_cd + if endrec[9] > ZIP64_LIMIT: + x = endrec[9] - size_cd - 56 - 20 + else: + x = endrec[9] - size_cd # "concat" is zero, unless zip was concatenated to another file concat = x - offset_cd if self.debug > 2: @@ -258,6 +393,8 @@ class ZipFile: # self.start_dir: Position of start of central directory self.start_dir = offset_cd + concat fp.seek(self.start_dir, 0) + data = fp.read(size_cd) + fp = cStringIO.StringIO(data) total = 0 while total < size_cd: centdir = fp.read(46) @@ -275,8 +412,7 @@ class ZipFile: total = (total + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH]) - x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat - # file_offset must be computed below... + x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] (x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size) = centdir[1:12] @@ -284,28 +420,14 @@ class ZipFile: # Convert date/time code to (year, month, day, hour, min, sec) x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) + + x._decodeExtra() + x.header_offset = x.header_offset + concat self.filelist.append(x) self.NameToInfo[x.filename] = x if self.debug > 2: print "total", total - for data in self.filelist: - fp.seek(data.header_offset, 0) - fheader = fp.read(30) - if fheader[0:4] != stringFileHeader: - raise BadZipfile, "Bad magic number for file header" - fheader = struct.unpack(structFileHeader, fheader) - # file_offset is computed here, since the extra field for - # the central directory and for the local file header - # refer to different fields, and they can have different - # lengths - data.file_offset = (data.header_offset + 30 - + fheader[_FH_FILENAME_LENGTH] - + fheader[_FH_EXTRA_FIELD_LENGTH]) - fname = fp.read(fheader[_FH_FILENAME_LENGTH]) - if fname != data.orig_filename: - raise RuntimeError, \ - 'File name in directory "%s" and header "%s" differ.' % ( - data.orig_filename, fname) + def namelist(self): """Return a list of file names in the archive.""" @@ -334,6 +456,7 @@ class ZipFile: except BadZipfile: return zinfo.filename + def getinfo(self, name): """Return the instance of ZipInfo given 'name'.""" return self.NameToInfo[name] @@ -347,7 +470,24 @@ class ZipFile: "Attempt to read ZIP archive that was already closed" zinfo = self.getinfo(name) filepos = self.fp.tell() - self.fp.seek(zinfo.file_offset, 0) + + self.fp.seek(zinfo.header_offset, 0) + + # Skip the file header: + fheader = self.fp.read(30) + if fheader[0:4] != stringFileHeader: + raise BadZipfile, "Bad magic number for file header" + + fheader = struct.unpack(structFileHeader, fheader) + fname = self.fp.read(fheader[_FH_FILENAME_LENGTH]) + if fheader[_FH_EXTRA_FIELD_LENGTH]: + self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH]) + + if fname != zinfo.orig_filename: + raise BadZipfile, \ + 'File name in directory "%s" and header "%s" differ.' % ( + zinfo.orig_filename, fname) + bytes = self.fp.read(zinfo.compress_size) self.fp.seek(filepos, 0) if zinfo.compress_type == ZIP_STORED: @@ -388,6 +528,12 @@ class ZipFile: if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): raise RuntimeError, \ "That compression method is not supported" + if zinfo.file_size > ZIP64_LIMIT: + if not self._allowZip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + if zinfo.header_offset > ZIP64_LIMIT: + if not self._allowZip64: + raise LargeZipFile("Zipfile size would require ZIP64 extensions") def write(self, filename, arcname=None, compress_type=None): """Put the bytes from filename into the archive under the name @@ -407,16 +553,19 @@ class ZipFile: zinfo.compress_type = self.compression else: zinfo.compress_type = compress_type - self._writecheck(zinfo) - fp = open(filename, "rb") + + zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 zinfo.header_offset = self.fp.tell() # Start of header bytes + + self._writecheck(zinfo) + self._didModify = True + fp = open(filename, "rb") # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 zinfo.file_size = file_size = 0 self.fp.write(zinfo.FileHeader()) - zinfo.file_offset = self.fp.tell() # Start of file bytes if zinfo.compress_type == ZIP_DEFLATED: cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) @@ -461,8 +610,10 @@ class ZipFile: zinfo.compress_type = self.compression else: zinfo = zinfo_or_arcname - self._writecheck(zinfo) zinfo.file_size = len(bytes) # Uncompressed size + zinfo.header_offset = self.fp.tell() # Start of header bytes + self._writecheck(zinfo) + self._didModify = True zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum if zinfo.compress_type == ZIP_DEFLATED: co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, @@ -473,8 +624,8 @@ class ZipFile: zinfo.compress_size = zinfo.file_size zinfo.header_offset = self.fp.tell() # Start of header bytes self.fp.write(zinfo.FileHeader()) - zinfo.file_offset = self.fp.tell() # Start of file bytes self.fp.write(bytes) + self.fp.flush() if zinfo.flag_bits & 0x08: # Write CRC and file sizes after the file data self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size, @@ -491,7 +642,8 @@ class ZipFile: records.""" if self.fp is None: return - if self.mode in ("w", "a"): # write ending records + + if self.mode in ("w", "a") and self._didModify: # write ending records count = 0 pos1 = self.fp.tell() for zinfo in self.filelist: # write central directory @@ -499,23 +651,72 @@ class ZipFile: dt = zinfo.date_time dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff #-1 + compress_size = 0xffffffff #-1 + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = 0xffffffff #-1 + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + '<hh' + 'q'*len(extra), + 1, 8*len(extra), *extra) + extra_data + + extract_version = max(45, zinfo.extract_version) + create_version = max(45, zinfo.create_version) + else: + extract_version = zinfo.extract_version + create_version = zinfo.create_version + centdir = struct.pack(structCentralDir, - stringCentralDir, zinfo.create_version, - zinfo.create_system, zinfo.extract_version, zinfo.reserved, + stringCentralDir, create_version, + zinfo.create_system, extract_version, zinfo.reserved, zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, - zinfo.CRC, zinfo.compress_size, zinfo.file_size, - len(zinfo.filename), len(zinfo.extra), len(zinfo.comment), + zinfo.CRC, compress_size, file_size, + len(zinfo.filename), len(extra_data), len(zinfo.comment), 0, zinfo.internal_attr, zinfo.external_attr, - zinfo.header_offset) + header_offset) self.fp.write(centdir) self.fp.write(zinfo.filename) - self.fp.write(zinfo.extra) + self.fp.write(extra_data) self.fp.write(zinfo.comment) + pos2 = self.fp.tell() # Write end-of-zip-archive record - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, count, count, pos2 - pos1, pos1, 0) - self.fp.write(endrec) + if pos1 > ZIP64_LIMIT: + # Need to write the ZIP64 end-of-archive records + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) + self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + self.fp.write(zip64locrec) + + pos3 = self.fp.tell() + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0) + self.fp.write(endrec) + + else: + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, count, count, pos2 - pos1, pos1, 0) + self.fp.write(endrec) self.fp.flush() if not self._filePassed: self.fp.close() @@ -619,3 +820,80 @@ class PyZipFile(ZipFile): if basename: archivename = "%s/%s" % (basename, archivename) return (fname, archivename) + + +def main(args = None): + import textwrap + USAGE=textwrap.dedent("""\ + Usage: + zipfile.py -l zipfile.zip # Show listing of a zipfile + zipfile.py -t zipfile.zip # Test if a zipfile is valid + zipfile.py -e zipfile.zip target # Extract zipfile into target dir + zipfile.py -c zipfile.zip src ... # Create zipfile from sources + """) + if args is None: + args = sys.argv[1:] + + if not args or args[0] not in ('-l', '-c', '-e', '-t'): + print USAGE + sys.exit(1) + + if args[0] == '-l': + if len(args) != 2: + print USAGE + sys.exit(1) + zf = ZipFile(args[1], 'r') + zf.printdir() + zf.close() + + elif args[0] == '-t': + if len(args) != 2: + print USAGE + sys.exit(1) + zf = ZipFile(args[1], 'r') + zf.testzip() + print "Done testing" + + elif args[0] == '-e': + if len(args) != 3: + print USAGE + sys.exit(1) + + zf = ZipFile(args[1], 'r') + out = args[2] + for path in zf.namelist(): + if path.startswith('./'): + tgt = os.path.join(out, path[2:]) + else: + tgt = os.path.join(out, path) + + tgtdir = os.path.dirname(tgt) + if not os.path.exists(tgtdir): + os.makedirs(tgtdir) + fp = open(tgt, 'wb') + fp.write(zf.read(path)) + fp.close() + zf.close() + + elif args[0] == '-c': + if len(args) < 3: + print USAGE + sys.exit(1) + + def addToZip(zf, path, zippath): + if os.path.isfile(path): + zf.write(path, zippath, ZIP_DEFLATED) + elif os.path.isdir(path): + for nm in os.listdir(path): + addToZip(zf, + os.path.join(path, nm), os.path.join(zippath, nm)) + # else: ignore + + zf = ZipFile(args[1], 'w', allowZip64=True) + for src in args[2:]: + addToZip(zf, src, os.path.basename(src)) + + zf.close() + +if __name__ == "__main__": + main() |