diff options
Diffstat (limited to 'Lib/lzma.py')
| -rw-r--r-- | Lib/lzma.py | 226 | 
1 files changed, 30 insertions, 196 deletions
| diff --git a/Lib/lzma.py b/Lib/lzma.py index f1d3958179..7dff1c319a 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -25,17 +25,16 @@ import builtins  import io  from _lzma import *  from _lzma import _encode_filter_properties, _decode_filter_properties +import _compression  _MODE_CLOSED   = 0  _MODE_READ     = 1 -_MODE_READ_EOF = 2 +# Value 2 no longer used  _MODE_WRITE    = 3 -_BUFFER_SIZE = 8192 - -class LZMAFile(io.BufferedIOBase): +class LZMAFile(_compression.BaseStream):      """A file object providing transparent LZMA (de)compression. @@ -92,8 +91,6 @@ class LZMAFile(io.BufferedIOBase):          self._fp = None          self._closefp = False          self._mode = _MODE_CLOSED -        self._pos = 0 -        self._size = -1          if mode in ("r", "rb"):              if check != -1: @@ -105,19 +102,13 @@ class LZMAFile(io.BufferedIOBase):              if format is None:                  format = FORMAT_AUTO              mode_code = _MODE_READ -            # Save the args to pass to the LZMADecompressor initializer. -            # If the file contains multiple compressed streams, each -            # stream will need a separate decompressor object. -            self._init_args = {"format":format, "filters":filters} -            self._decompressor = LZMADecompressor(**self._init_args) -            self._buffer = b"" -            self._buffer_offset = 0          elif mode in ("w", "wb", "a", "ab", "x", "xb"):              if format is None:                  format = FORMAT_XZ              mode_code = _MODE_WRITE              self._compressor = LZMACompressor(format=format, check=check,                                                preset=preset, filters=filters) +            self._pos = 0          else:              raise ValueError("Invalid mode: {!r}".format(mode)) @@ -133,6 +124,11 @@ class LZMAFile(io.BufferedIOBase):          else:              raise TypeError("filename must be a str or bytes object, or a file") +        if self._mode == _MODE_READ: +            raw = _compression.DecompressReader(self._fp, LZMADecompressor, +                trailing_error=LZMAError, format=format, filters=filters) +            self._buffer = io.BufferedReader(raw) +      def close(self):          """Flush and close the file. @@ -142,9 +138,9 @@ class LZMAFile(io.BufferedIOBase):          if self._mode == _MODE_CLOSED:              return          try: -            if self._mode in (_MODE_READ, _MODE_READ_EOF): -                self._decompressor = None -                self._buffer = b"" +            if self._mode == _MODE_READ: +                self._buffer.close() +                self._buffer = None              elif self._mode == _MODE_WRITE:                  self._fp.write(self._compressor.flush())                  self._compressor = None @@ -169,123 +165,18 @@ class LZMAFile(io.BufferedIOBase):      def seekable(self):          """Return whether the file supports seeking.""" -        return self.readable() and self._fp.seekable() +        return self.readable() and self._buffer.seekable()      def readable(self):          """Return whether the file was opened for reading."""          self._check_not_closed() -        return self._mode in (_MODE_READ, _MODE_READ_EOF) +        return self._mode == _MODE_READ      def writable(self):          """Return whether the file was opened for writing."""          self._check_not_closed()          return self._mode == _MODE_WRITE -    # Mode-checking helper functions. - -    def _check_not_closed(self): -        if self.closed: -            raise ValueError("I/O operation on closed file") - -    def _check_can_read(self): -        if self._mode not in (_MODE_READ, _MODE_READ_EOF): -            self._check_not_closed() -            raise io.UnsupportedOperation("File not open for reading") - -    def _check_can_write(self): -        if self._mode != _MODE_WRITE: -            self._check_not_closed() -            raise io.UnsupportedOperation("File not open for writing") - -    def _check_can_seek(self): -        if self._mode not in (_MODE_READ, _MODE_READ_EOF): -            self._check_not_closed() -            raise io.UnsupportedOperation("Seeking is only supported " -                                          "on files open for reading") -        if not self._fp.seekable(): -            raise io.UnsupportedOperation("The underlying file object " -                                          "does not support seeking") - -    # Fill the readahead buffer if it is empty. Returns False on EOF. -    def _fill_buffer(self): -        if self._mode == _MODE_READ_EOF: -            return False -        # Depending on the input data, our call to the decompressor may not -        # return any data. In this case, try again after reading another block. -        while self._buffer_offset == len(self._buffer): -            rawblock = (self._decompressor.unused_data or -                        self._fp.read(_BUFFER_SIZE)) - -            if not rawblock: -                if self._decompressor.eof: -                    self._mode = _MODE_READ_EOF -                    self._size = self._pos -                    return False -                else: -                    raise EOFError("Compressed file ended before the " -                                   "end-of-stream marker was reached") - -            if self._decompressor.eof: -                # Continue to next stream. -                self._decompressor = LZMADecompressor(**self._init_args) -                try: -                    self._buffer = self._decompressor.decompress(rawblock) -                except LZMAError: -                    # Trailing data isn't a valid compressed stream; ignore it. -                    self._mode = _MODE_READ_EOF -                    self._size = self._pos -                    return False -            else: -                self._buffer = self._decompressor.decompress(rawblock) -            self._buffer_offset = 0 -        return True - -    # Read data until EOF. -    # If return_data is false, consume the data without returning it. -    def _read_all(self, return_data=True): -        # The loop assumes that _buffer_offset is 0. Ensure that this is true. -        self._buffer = self._buffer[self._buffer_offset:] -        self._buffer_offset = 0 - -        blocks = [] -        while self._fill_buffer(): -            if return_data: -                blocks.append(self._buffer) -            self._pos += len(self._buffer) -            self._buffer = b"" -        if return_data: -            return b"".join(blocks) - -    # Read a block of up to n bytes. -    # If return_data is false, consume the data without returning it. -    def _read_block(self, n, return_data=True): -        # If we have enough data buffered, return immediately. -        end = self._buffer_offset + n -        if end <= len(self._buffer): -            data = self._buffer[self._buffer_offset : end] -            self._buffer_offset = end -            self._pos += len(data) -            return data if return_data else None - -        # The loop assumes that _buffer_offset is 0. Ensure that this is true. -        self._buffer = self._buffer[self._buffer_offset:] -        self._buffer_offset = 0 - -        blocks = [] -        while n > 0 and self._fill_buffer(): -            if n < len(self._buffer): -                data = self._buffer[:n] -                self._buffer_offset = n -            else: -                data = self._buffer -                self._buffer = b"" -            if return_data: -                blocks.append(data) -            self._pos += len(data) -            n -= len(data) -        if return_data: -            return b"".join(blocks) -      def peek(self, size=-1):          """Return buffered data without advancing the file position. @@ -293,9 +184,9 @@ class LZMAFile(io.BufferedIOBase):          The exact number of bytes returned is unspecified.          """          self._check_can_read() -        if not self._fill_buffer(): -            return b"" -        return self._buffer[self._buffer_offset:] +        # Relies on the undocumented fact that BufferedReader.peek() always +        # returns at least one byte (except at EOF) +        return self._buffer.peek(size)      def read(self, size=-1):          """Read up to size uncompressed bytes from the file. @@ -304,38 +195,19 @@ class LZMAFile(io.BufferedIOBase):          Returns b"" if the file is already at EOF.          """          self._check_can_read() -        if size == 0: -            return b"" -        elif size < 0: -            return self._read_all() -        else: -            return self._read_block(size) +        return self._buffer.read(size)      def read1(self, size=-1):          """Read up to size uncompressed bytes, while trying to avoid -        making multiple reads from the underlying stream. +        making multiple reads from the underlying stream. Reads up to a +        buffer's worth of data if size is negative.          Returns b"" if the file is at EOF.          """ -        # Usually, read1() calls _fp.read() at most once. However, sometimes -        # this does not give enough data for the decompressor to make progress. -        # In this case we make multiple reads, to avoid returning b"".          self._check_can_read() -        if (size == 0 or -            # Only call _fill_buffer() if the buffer is actually empty. -            # This gives a significant speedup if *size* is small. -            (self._buffer_offset == len(self._buffer) and not self._fill_buffer())): -            return b"" -        if size > 0: -            data = self._buffer[self._buffer_offset : -                                self._buffer_offset + size] -            self._buffer_offset += len(data) -        else: -            data = self._buffer[self._buffer_offset:] -            self._buffer = b"" -            self._buffer_offset = 0 -        self._pos += len(data) -        return data +        if size < 0: +            size = io.DEFAULT_BUFFER_SIZE +        return self._buffer.read1(size)      def readline(self, size=-1):          """Read a line of uncompressed bytes from the file. @@ -345,15 +217,7 @@ class LZMAFile(io.BufferedIOBase):          case the line may be incomplete). Returns b'' if already at EOF.          """          self._check_can_read() -        # Shortcut for the common case - the whole line is in the buffer. -        if size < 0: -            end = self._buffer.find(b"\n", self._buffer_offset) + 1 -            if end > 0: -                line = self._buffer[self._buffer_offset : end] -                self._buffer_offset = end -                self._pos += len(line) -                return line -        return io.BufferedIOBase.readline(self, size) +        return self._buffer.readline(size)      def write(self, data):          """Write a bytes object to the file. @@ -368,16 +232,7 @@ class LZMAFile(io.BufferedIOBase):          self._pos += len(data)          return len(data) -    # Rewind the file to the beginning of the data stream. -    def _rewind(self): -        self._fp.seek(0, 0) -        self._mode = _MODE_READ -        self._pos = 0 -        self._decompressor = LZMADecompressor(**self._init_args) -        self._buffer = b"" -        self._buffer_offset = 0 - -    def seek(self, offset, whence=0): +    def seek(self, offset, whence=io.SEEK_SET):          """Change the file position.          The new position is specified by offset, relative to the @@ -389,38 +244,17 @@ class LZMAFile(io.BufferedIOBase):          Returns the new file position. -        Note that seeking is emulated, sp depending on the parameters, +        Note that seeking is emulated, so depending on the parameters,          this operation may be extremely slow.          """          self._check_can_seek() - -        # Recalculate offset as an absolute file position. -        if whence == 0: -            pass -        elif whence == 1: -            offset = self._pos + offset -        elif whence == 2: -            # Seeking relative to EOF - we need to know the file's size. -            if self._size < 0: -                self._read_all(return_data=False) -            offset = self._size + offset -        else: -            raise ValueError("Invalid value for whence: {}".format(whence)) - -        # Make it so that offset is the number of bytes to skip forward. -        if offset < self._pos: -            self._rewind() -        else: -            offset -= self._pos - -        # Read and discard data until we reach the desired position. -        self._read_block(offset, return_data=False) - -        return self._pos +        return self._buffer.seek(offset, whence)      def tell(self):          """Return the current file position."""          self._check_not_closed() +        if self._mode == _MODE_READ: +            return self._buffer.tell()          return self._pos @@ -445,7 +279,7 @@ def open(filename, mode="rb", *,      constructor: LZMAFile(filename, mode, ...). In this case, the      encoding, errors and newline arguments must not be provided. -    For text mode, a LZMAFile object is created, and wrapped in an +    For text mode, an LZMAFile object is created, and wrapped in an      io.TextIOWrapper instance with the specified encoding, error      handling behavior, and line ending(s). | 
