From 8fb3e0c2848d4225a7d5e13f107564646d2f5cb6 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Thu, 26 Dec 2019 17:52:50 +0000 Subject: trivial: Fix indentation Signed-off-by: Stephen Finucane --- python/subunit/v2.py | 192 ++++++++++++++++++++++++++++----------------------- 1 file changed, 104 insertions(+), 88 deletions(-) (limited to 'python/subunit') diff --git a/python/subunit/v2.py b/python/subunit/v2.py index 7f4842e..c2c63f6 100644 --- a/python/subunit/v2.py +++ b/python/subunit/v2.py @@ -405,95 +405,111 @@ class ByteStreamToStreamResult(object): return result, 4 def _parse(self, packet, result): - # 2 bytes flags, at most 3 bytes length. - packet.append(self.source.read(5)) - if len(packet[-1]) != 5: - raise ParseError( - 'Short read - got %d bytes, wanted 5' % len(packet[-1])) - flag_bytes = packet[-1][:2] - flags = struct.unpack(FMT_16, flag_bytes)[0] - length, consumed = self._parse_varint( - packet[-1], 2, max_3_bytes=True) - remainder = self.source.read(length - 6) - if len(remainder) != length - 6: - raise ParseError( - 'Short read - got %d bytes, wanted %d bytes' % ( + # 2 bytes flags, at most 3 bytes length. + packet.append(self.source.read(5)) + if len(packet[-1]) != 5: + raise ParseError( + 'Short read - got %d bytes, wanted 5' % len(packet[-1])) + + flag_bytes = packet[-1][:2] + flags = struct.unpack(FMT_16, flag_bytes)[0] + length, consumed = self._parse_varint( + packet[-1], 2, max_3_bytes=True) + remainder = self.source.read(length - 6) + if len(remainder) != length - 6: + raise ParseError( + 'Short read - got %d bytes, wanted %d bytes' % ( len(remainder), length - 6)) - if consumed != 3: - # Avoid having to parse torn values - packet[-1] += remainder - pos = 2 + consumed - else: - # Avoid copying potentially lots of data. - packet.append(remainder) - pos = 0 - crc = zlib.crc32(packet[0]) - for fragment in packet[1:-1]: - crc = zlib.crc32(fragment, crc) - crc = zlib.crc32(packet[-1][:-4], crc) & 0xffffffff - packet_crc = struct.unpack(FMT_32, packet[-1][-4:])[0] - if crc != packet_crc: - # Bad CRC, report it and stop parsing the packet. - raise ParseError( - 'Bad checksum - calculated (0x%x), stored (0x%x)' - % (crc, packet_crc)) - if safe_hasattr(builtins, 'memoryview'): - body = memoryview(packet[-1]) - else: - body = packet[-1] - # Discard CRC-32 - body = body[:-4] - # One packet could have both file and status data; the Python API - # presents these separately (perhaps it shouldn't?) - if flags & FLAG_TIMESTAMP: - seconds = struct.unpack(FMT_32, self._to_bytes(body, pos, 4))[0] - nanoseconds, consumed = self._parse_varint(body, pos+4) - pos = pos + 4 + consumed - timestamp = EPOCH + datetime.timedelta( - seconds=seconds, microseconds=nanoseconds/1000) - else: - timestamp = None - if flags & FLAG_TEST_ID: - test_id, pos = self._read_utf8(body, pos) - else: - test_id = None - if flags & FLAG_TAGS: - tag_count, consumed = self._parse_varint(body, pos) - pos += consumed - test_tags = set() - for _ in range(tag_count): - tag, pos = self._read_utf8(body, pos) - test_tags.add(tag) - else: - test_tags = None - if flags & FLAG_MIME_TYPE: - mime_type, pos = self._read_utf8(body, pos) - else: - mime_type = None - if flags & FLAG_FILE_CONTENT: - file_name, pos = self._read_utf8(body, pos) - content_length, consumed = self._parse_varint(body, pos) - pos += consumed - file_bytes = self._to_bytes(body, pos, content_length) - if len(file_bytes) != content_length: - raise ParseError('File content extends past end of packet: ' - 'claimed %d bytes, %d available' % ( - content_length, len(file_bytes))) - pos += content_length - else: - file_name = None - file_bytes = None - if flags & FLAG_ROUTE_CODE: - route_code, pos = self._read_utf8(body, pos) - else: - route_code = None - runnable = bool(flags & FLAG_RUNNABLE) - eof = bool(flags & FLAG_EOF) - test_status = self.status_lookup[flags & 0x0007] - result.status(test_id=test_id, test_status=test_status, - test_tags=test_tags, runnable=runnable, mime_type=mime_type, - eof=eof, file_name=file_name, file_bytes=file_bytes, - route_code=route_code, timestamp=timestamp) + + if consumed != 3: + # Avoid having to parse torn values + packet[-1] += remainder + pos = 2 + consumed + else: + # Avoid copying potentially lots of data. + packet.append(remainder) + pos = 0 + + crc = zlib.crc32(packet[0]) + for fragment in packet[1:-1]: + crc = zlib.crc32(fragment, crc) + + crc = zlib.crc32(packet[-1][:-4], crc) & 0xffffffff + packet_crc = struct.unpack(FMT_32, packet[-1][-4:])[0] + + if crc != packet_crc: + # Bad CRC, report it and stop parsing the packet. + raise ParseError( + 'Bad checksum - calculated (0x%x), stored (0x%x)' % ( + crc, packet_crc)) + + if safe_hasattr(builtins, 'memoryview'): + body = memoryview(packet[-1]) + else: + body = packet[-1] + + # Discard CRC-32 + body = body[:-4] + + # One packet could have both file and status data; the Python API + # presents these separately (perhaps it shouldn't?) + if flags & FLAG_TIMESTAMP: + seconds = struct.unpack(FMT_32, self._to_bytes(body, pos, 4))[0] + nanoseconds, consumed = self._parse_varint(body, pos+4) + pos = pos + 4 + consumed + timestamp = EPOCH + datetime.timedelta( + seconds=seconds, microseconds=nanoseconds/1000) + else: + timestamp = None + + if flags & FLAG_TEST_ID: + test_id, pos = self._read_utf8(body, pos) + else: + test_id = None + + if flags & FLAG_TAGS: + tag_count, consumed = self._parse_varint(body, pos) + pos += consumed + test_tags = set() + for _ in range(tag_count): + tag, pos = self._read_utf8(body, pos) + test_tags.add(tag) + else: + test_tags = None + + if flags & FLAG_MIME_TYPE: + mime_type, pos = self._read_utf8(body, pos) + else: + mime_type = None + + if flags & FLAG_FILE_CONTENT: + file_name, pos = self._read_utf8(body, pos) + content_length, consumed = self._parse_varint(body, pos) + pos += consumed + file_bytes = self._to_bytes(body, pos, content_length) + if len(file_bytes) != content_length: + raise ParseError('File content extends past end of packet: ' + 'claimed %d bytes, %d available' % ( + content_length, len(file_bytes))) + pos += content_length + else: + file_name = None + file_bytes = None + + if flags & FLAG_ROUTE_CODE: + route_code, pos = self._read_utf8(body, pos) + else: + route_code = None + + runnable = bool(flags & FLAG_RUNNABLE) + eof = bool(flags & FLAG_EOF) + test_status = self.status_lookup[flags & 0x0007] + result.status( + test_id=test_id, test_status=test_status, + test_tags=test_tags, runnable=runnable, mime_type=mime_type, + eof=eof, file_name=file_name, file_bytes=file_bytes, + route_code=route_code, timestamp=timestamp) + __call__ = run def _read_utf8(self, buf, pos): -- cgit v1.2.1 From 26d31fa7c34019fad9038addf8114bbb4b656c92 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Sat, 14 Mar 2020 00:18:03 +0000 Subject: Correctly handle py3 RawIOBase read() Python3's RawIOBase guarantees only one syscall per read() requiring a loop to accumulate the desired number of bytes or actually reach EOF. TextIOBase.read does issue multiple syscalls (it must to correctly decode partial unicode characters), but subunit unwraps that to get a binary stream, and at least some of the time the layering is io.TextIOBase(_io.FileIO), where _io.FileIO is a RawIOBase subclass rather than BufferedIOBase. Signed-off-by: Stephen Finucane Partial-bug: #1813147 --- python/subunit/v2.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'python/subunit') diff --git a/python/subunit/v2.py b/python/subunit/v2.py index c2c63f6..e8a31d6 100644 --- a/python/subunit/v2.py +++ b/python/subunit/v2.py @@ -72,13 +72,32 @@ def has_nul(buffer_or_bytes): return NUL_ELEMENT in buffer_or_bytes +def read_exactly(stream, size): + """Read exactly size bytes from stream. + + :param stream: A file like object to read bytes from. Must support + read() and return bytes. + :param size: The number of bytes to retrieve. + """ + data = b'' + remaining = size + while remaining: + read = stream.read(remaining) + if len(read) == 0: + raise ParseError('Short read - got %d bytes, wanted %d bytes' % ( + len(data), size)) + data += read + remaining -= len(read) + return data + + class ParseError(Exception): """Used to pass error messages within the parser.""" class StreamResultToBytes(object): """Convert StreamResult API calls to bytes. - + The StreamResult API is defined by testtools.StreamResult. """ @@ -276,7 +295,7 @@ class ByteStreamToStreamResult(object): def run(self, result): """Parse source and emit events to result. - + This is a blocking call: it will run until EOF is detected on source. """ self.codec.reset() @@ -406,21 +425,12 @@ class ByteStreamToStreamResult(object): def _parse(self, packet, result): # 2 bytes flags, at most 3 bytes length. - packet.append(self.source.read(5)) - if len(packet[-1]) != 5: - raise ParseError( - 'Short read - got %d bytes, wanted 5' % len(packet[-1])) - - flag_bytes = packet[-1][:2] - flags = struct.unpack(FMT_16, flag_bytes)[0] - length, consumed = self._parse_varint( - packet[-1], 2, max_3_bytes=True) - remainder = self.source.read(length - 6) - if len(remainder) != length - 6: - raise ParseError( - 'Short read - got %d bytes, wanted %d bytes' % ( - len(remainder), length - 6)) + header = read_exactly(self.source, 5) + packet.append(header) + flags = struct.unpack(FMT_16, header[:2])[0] + length, consumed = self._parse_varint(header, 2, max_3_bytes=True) + remainder = read_exactly(self.source, length - 6) if consumed != 3: # Avoid having to parse torn values packet[-1] += remainder @@ -533,4 +543,3 @@ class ByteStreamToStreamResult(object): return utf8, length+pos except UnicodeDecodeError: raise ParseError('UTF8 string at offset %d is not UTF8' % (pos-2,)) - -- cgit v1.2.1