diff options
author | William Chargin <wchargin@gmail.com> | 2020-01-21 03:25:24 -0800 |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2020-01-21 13:25:24 +0200 |
commit | eab3b3f1c60afecfb4db3c3619109684cb04bd60 (patch) | |
tree | 311e513af209b2fa14f0d46720801aa4e1f02462 | |
parent | 85ead4fc62829cb7ef2eb0af1a2933282f58c629 (diff) | |
download | cpython-git-eab3b3f1c60afecfb4db3c3619109684cb04bd60.tar.gz |
bpo-39389: gzip: fix compression level metadata (GH-18077)
As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a
gzip member header should indicate whether the DEFLATE algorithm was
tuned for speed or compression ratio. Prior to this patch, archives
emitted by the `gzip` module always indicated maximum compression.
-rw-r--r-- | Lib/gzip.py | 12 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 20 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst | 2 |
3 files changed, 31 insertions, 3 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index e60d8ad599..e422773b3e 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -209,7 +209,7 @@ class GzipFile(_compression.BaseStream): self.fileobj = fileobj if self.mode == WRITE: - self._write_gzip_header() + self._write_gzip_header(compresslevel) @property def filename(self): @@ -236,7 +236,7 @@ class GzipFile(_compression.BaseStream): self.bufsize = 0 self.offset = 0 # Current file offset for seek(), tell(), etc - def _write_gzip_header(self): + def _write_gzip_header(self, compresslevel): self.fileobj.write(b'\037\213') # magic header self.fileobj.write(b'\010') # compression method try: @@ -257,7 +257,13 @@ class GzipFile(_compression.BaseStream): if mtime is None: mtime = time.time() write32u(self.fileobj, int(mtime)) - self.fileobj.write(b'\002') + if compresslevel == _COMPRESS_LEVEL_BEST: + xfl = b'\002' + elif compresslevel == _COMPRESS_LEVEL_FAST: + xfl = b'\004' + else: + xfl = b'\000' + self.fileobj.write(xfl) self.fileobj.write(b'\377') if fname: self.fileobj.write(fname + b'\000') diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 57d851cf9c..78334213f2 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -358,6 +358,26 @@ class TestGzip(BaseTest): isizeBytes = fRead.read(4) self.assertEqual(isizeBytes, struct.pack('<i', len(data1))) + def test_compresslevel_metadata(self): + # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html + # specifically, discussion of XFL in section 2.3.1 + cases = [ + ('fast', 1, b'\x04'), + ('best', 9, b'\x02'), + ('tradeoff', 6, b'\x00'), + ] + xflOffset = 8 + + for (name, level, expectedXflByte) in cases: + with self.subTest(name): + fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level) + with fWrite: + fWrite.write(data1) + with open(self.filename, 'rb') as fRead: + fRead.seek(xflOffset) + xflByte = fRead.read(1) + self.assertEqual(xflByte, expectedXflByte) + def test_with_open(self): # GzipFile supports the context management protocol with gzip.GzipFile(self.filename, "wb") as f: diff --git a/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst b/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst new file mode 100644 index 0000000000..d4c80506f7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-01-20-00-56-01.bpo-39389.fEirIS.rst @@ -0,0 +1,2 @@ +Write accurate compression level metadata in :mod:`gzip` archives, rather +than always signaling maximum compression. |