diff options
Diffstat (limited to 'Lib/lzma.py')
| -rw-r--r-- | Lib/lzma.py | 130 | 
1 files changed, 86 insertions, 44 deletions
diff --git a/Lib/lzma.py b/Lib/lzma.py index 1a1b065f8e..b2e2f7ed0e 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -55,7 +55,7 @@ class LZMAFile(io.BufferedIOBase):          be an existing file object to read from or write to.          mode can be "r" for reading (default), "w" for (over)writing, or -        "a" for appending. These can equivalently be given as "rb", "wb", +        "a" for appending. These can equivalently be given as "rb", "wb"          and "ab" respectively.          format specifies the container format to use for the file. @@ -110,7 +110,8 @@ class LZMAFile(io.BufferedIOBase):              # stream will need a separate decompressor object.              self._init_args = {"format":format, "filters":filters}              self._decompressor = LZMADecompressor(**self._init_args) -            self._buffer = None +            self._buffer = b"" +            self._buffer_offset = 0          elif mode in ("w", "wb", "a", "ab"):              if format is None:                  format = FORMAT_XZ @@ -143,7 +144,7 @@ class LZMAFile(io.BufferedIOBase):          try:              if self._mode in (_MODE_READ, _MODE_READ_EOF):                  self._decompressor = None -                self._buffer = None +                self._buffer = b""              elif self._mode == _MODE_WRITE:                  self._fp.write(self._compressor.flush())                  self._compressor = None @@ -187,15 +188,18 @@ class LZMAFile(io.BufferedIOBase):              raise ValueError("I/O operation on closed file")      def _check_can_read(self): -        if not self.readable(): +        if self._mode not in (_MODE_READ, _MODE_READ_EOF): +            self._check_not_closed()              raise io.UnsupportedOperation("File not open for reading")      def _check_can_write(self): -        if not self.writable(): +        if self._mode != _MODE_WRITE: +            self._check_not_closed()              raise io.UnsupportedOperation("File not open for writing")      def _check_can_seek(self): -        if not self.readable(): +        if self._mode not in (_MODE_READ, _MODE_READ_EOF): +            self._check_not_closed()              raise io.UnsupportedOperation("Seeking is only supported "                                            "on files open for reading")          if not self._fp.seekable(): @@ -204,16 +208,13 @@ class LZMAFile(io.BufferedIOBase):      # Fill the readahead buffer if it is empty. Returns False on EOF.      def _fill_buffer(self): +        if self._mode == _MODE_READ_EOF: +            return False          # Depending on the input data, our call to the decompressor may not          # return any data. In this case, try again after reading another block. -        while True: -            if self._buffer: -                return True - -            if self._decompressor.unused_data: -                rawblock = self._decompressor.unused_data -            else: -                rawblock = self._fp.read(_BUFFER_SIZE) +        while self._buffer_offset == len(self._buffer): +            rawblock = (self._decompressor.unused_data or +                        self._fp.read(_BUFFER_SIZE))              if not rawblock:                  if self._decompressor.eof: @@ -229,30 +230,48 @@ class LZMAFile(io.BufferedIOBase):                  self._decompressor = LZMADecompressor(**self._init_args)              self._buffer = self._decompressor.decompress(rawblock) +            self._buffer_offset = 0 +        return True      # Read data until EOF.      # If return_data is false, consume the data without returning it.      def _read_all(self, return_data=True): +        # The loop assumes that _buffer_offset is 0. Ensure that this is true. +        self._buffer = self._buffer[self._buffer_offset:] +        self._buffer_offset = 0 +          blocks = []          while self._fill_buffer():              if return_data:                  blocks.append(self._buffer)              self._pos += len(self._buffer) -            self._buffer = None +            self._buffer = b""          if return_data:              return b"".join(blocks)      # Read a block of up to n bytes.      # If return_data is false, consume the data without returning it.      def _read_block(self, n, return_data=True): +        # If we have enough data buffered, return immediately. +        end = self._buffer_offset + n +        if end <= len(self._buffer): +            data = self._buffer[self._buffer_offset : end] +            self._buffer_offset = end +            self._pos += len(data) +            return data if return_data else None + +        # The loop assumes that _buffer_offset is 0. Ensure that this is true. +        self._buffer = self._buffer[self._buffer_offset:] +        self._buffer_offset = 0 +          blocks = []          while n > 0 and self._fill_buffer():              if n < len(self._buffer):                  data = self._buffer[:n] -                self._buffer = self._buffer[n:] +                self._buffer_offset = n              else:                  data = self._buffer -                self._buffer = None +                self._buffer = b""              if return_data:                  blocks.append(data)              self._pos += len(data) @@ -267,9 +286,9 @@ class LZMAFile(io.BufferedIOBase):          The exact number of bytes returned is unspecified.          """          self._check_can_read() -        if self._mode == _MODE_READ_EOF or not self._fill_buffer(): +        if not self._fill_buffer():              return b"" -        return self._buffer +        return self._buffer[self._buffer_offset:]      def read(self, size=-1):          """Read up to size uncompressed bytes from the file. @@ -278,7 +297,7 @@ class LZMAFile(io.BufferedIOBase):          Returns b"" if the file is already at EOF.          """          self._check_can_read() -        if self._mode == _MODE_READ_EOF or size == 0: +        if size == 0:              return b""          elif size < 0:              return self._read_all() @@ -295,18 +314,40 @@ class LZMAFile(io.BufferedIOBase):          # this does not give enough data for the decompressor to make progress.          # In this case we make multiple reads, to avoid returning b"".          self._check_can_read() -        if (size == 0 or self._mode == _MODE_READ_EOF or -            not self._fill_buffer()): +        if (size == 0 or +            # Only call _fill_buffer() if the buffer is actually empty. +            # This gives a significant speedup if *size* is small. +            (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):              return b"" -        if 0 < size < len(self._buffer): -            data = self._buffer[:size] -            self._buffer = self._buffer[size:] +        if size > 0: +            data = self._buffer[self._buffer_offset : +                                self._buffer_offset + size] +            self._buffer_offset += len(data)          else: -            data = self._buffer -            self._buffer = None +            data = self._buffer[self._buffer_offset:] +            self._buffer = b"" +            self._buffer_offset = 0          self._pos += len(data)          return data +    def readline(self, size=-1): +        """Read a line of uncompressed bytes from the file. + +        The terminating newline (if present) is retained. If size is +        non-negative, no more than size bytes will be read (in which +        case the line may be incomplete). Returns b'' if already at EOF. +        """ +        self._check_can_read() +        # Shortcut for the common case - the whole line is in the buffer. +        if size < 0: +            end = self._buffer.find(b"\n", self._buffer_offset) + 1 +            if end > 0: +                line = self._buffer[self._buffer_offset : end] +                self._buffer_offset = end +                self._pos += len(line) +                return line +        return io.BufferedIOBase.readline(self, size) +      def write(self, data):          """Write a bytes object to the file. @@ -326,7 +367,8 @@ class LZMAFile(io.BufferedIOBase):          self._mode = _MODE_READ          self._pos = 0          self._decompressor = LZMADecompressor(**self._init_args) -        self._buffer = None +        self._buffer = b"" +        self._buffer_offset = 0      def seek(self, offset, whence=0):          """Change the file position. @@ -365,8 +407,7 @@ class LZMAFile(io.BufferedIOBase):              offset -= self._pos          # Read and discard data until we reach the desired position. -        if self._mode != _MODE_READ_EOF: -            self._read_block(offset, return_data=False) +        self._read_block(offset, return_data=False)          return self._pos @@ -381,23 +422,24 @@ def open(filename, mode="rb", *,           encoding=None, errors=None, newline=None):      """Open an LZMA-compressed file in binary or text mode. -    filename can be either an actual file name (given as a str or bytes object), -    in which case the named file is opened, or it can be an existing file object -    to read from or write to. +    filename can be either an actual file name (given as a str or bytes +    object), in which case the named file is opened, or it can be an +    existing file object to read from or write to. -    The mode argument can be "r", "rb" (default), "w", "wb", "a", or "ab" for -    binary mode, or "rt", "wt" or "at" for text mode. +    The mode argument can be "r", "rb" (default), "w", "wb", "a" or "ab" +    for binary mode, or "rt", "wt" or "at" for text mode. -    The format, check, preset and filters arguments specify the compression -    settings, as for LZMACompressor, LZMADecompressor and LZMAFile. +    The format, check, preset and filters arguments specify the +    compression settings, as for LZMACompressor, LZMADecompressor and +    LZMAFile. -    For binary mode, this function is equivalent to the LZMAFile constructor: -    LZMAFile(filename, mode, ...). In this case, the encoding, errors and -    newline arguments must not be provided. +    For binary mode, this function is equivalent to the LZMAFile +    constructor: LZMAFile(filename, mode, ...). In this case, the +    encoding, errors and newline arguments must not be provided.      For text mode, a LZMAFile object is created, and wrapped in an -    io.TextIOWrapper instance with the specified encoding, error handling -    behavior, and line ending(s). +    io.TextIOWrapper instance with the specified encoding, error +    handling behavior, and line ending(s).      """      if "t" in mode: @@ -427,7 +469,7 @@ def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):      Refer to LZMACompressor's docstring for a description of the      optional arguments *format*, *check*, *preset* and *filters*. -    For incremental compression, use an LZMACompressor object instead. +    For incremental compression, use an LZMACompressor instead.      """      comp = LZMACompressor(format, check, preset, filters)      return comp.compress(data) + comp.flush() @@ -439,7 +481,7 @@ def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):      Refer to LZMADecompressor's docstring for a description of the      optional arguments *format*, *check* and *filters*. -    For incremental decompression, use a LZMADecompressor object instead. +    For incremental decompression, use an LZMADecompressor instead.      """      results = []      while True:  | 
