diff options
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r-- | Lib/pickle.py | 167 |
1 files changed, 83 insertions, 84 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index 5275991662..a690ccd8bc 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -265,7 +265,7 @@ class _Pickler: if i < 256: return BINPUT + bytes([i]) else: - return LONG_BINPUT + pack("<i", i) + return LONG_BINPUT + pack("<I", i) return PUT + repr(i).encode("ascii") + b'\n' @@ -275,7 +275,7 @@ class _Pickler: if i < 256: return BINGET + bytes([i]) else: - return LONG_BINGET + pack("<i", i) + return LONG_BINGET + pack("<I", i) return GET + repr(i).encode("ascii") + b'\n' @@ -299,20 +299,20 @@ class _Pickler: f(self, obj) # Call unbound method with explicit self return - # Check for a class with a custom metaclass; treat as regular class - try: - issc = issubclass(t, type) - except TypeError: # t is not a class (old Boost; see SF #502085) - issc = 0 - if issc: - self.save_global(obj) - return - # Check copyreg.dispatch_table reduce = dispatch_table.get(t) if reduce: rv = reduce(obj) else: + # Check for a class with a custom metaclass; treat as regular class + try: + issc = issubclass(t, type) + except TypeError: # t is not a class (old Boost; see SF #502085) + issc = False + if issc: + self.save_global(obj) + return + # Check for a __reduce_ex__ method, fall back to __reduce__ reduce = getattr(obj, "__reduce_ex__", None) if reduce: @@ -364,7 +364,7 @@ class _Pickler: raise PicklingError("args from save_reduce() should be a tuple") # Assert that func is callable - if not hasattr(func, '__call__'): + if not callable(func): raise PicklingError("func from save_reduce() should be callable") save = self.save @@ -487,13 +487,17 @@ class _Pickler: def save_bytes(self, obj, pack=struct.pack): if self.proto < 3: - self.save_reduce(bytes, (list(obj),), obj=obj) + if len(obj) == 0: + self.save_reduce(bytes, (), obj=obj) + else: + self.save_reduce(codecs.encode, + (str(obj, 'latin1'), 'latin1'), obj=obj) return n = len(obj) if n < 256: self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj)) else: - self.write(BINBYTES + pack("<i", n) + bytes(obj)) + self.write(BINBYTES + pack("<I", n) + bytes(obj)) self.memoize(obj) dispatch[bytes] = save_bytes @@ -501,7 +505,7 @@ class _Pickler: if self.bin: encoded = obj.encode('utf-8', 'surrogatepass') n = len(encoded) - self.write(BINUNICODE + pack("<i", n) + encoded) + self.write(BINUNICODE + pack("<I", n) + encoded) else: obj = obj.replace("\\", "\\u005c") obj = obj.replace("\n", "\\u000a") @@ -921,6 +925,9 @@ class _Unpickler: def load_long4(self): n = mloads(b'i' + self.read(4)) + if n < 0: + # Corrupt or hostile pickle -- we never write one like this + raise UnpicklingError("LONG pickle has negative byte count"); data = self.read(n) self.append(decode_long(data)) dispatch[LONG4[0]] = load_long4 @@ -949,14 +956,19 @@ class _Unpickler: dispatch[STRING[0]] = load_string def load_binstring(self): + # Deprecated BINSTRING uses signed 32-bit length len = mloads(b'i' + self.read(4)) + if len < 0: + raise UnpicklingError("BINSTRING pickle has negative byte count"); data = self.read(len) value = str(data, self.encoding, self.errors) self.append(value) dispatch[BINSTRING[0]] = load_binstring - def load_binbytes(self): - len = mloads(b'i' + self.read(4)) + def load_binbytes(self, unpack=struct.unpack, maxsize=sys.maxsize): + len, = unpack('<I', self.read(4)) + if len > maxsize: + raise UnpicklingError("BINBYTES exceeds system's maximum size of %d bytes" % maxsize); self.append(self.read(len)) dispatch[BINBYTES[0]] = load_binbytes @@ -964,8 +976,10 @@ class _Unpickler: self.append(str(self.readline()[:-1], 'raw-unicode-escape')) dispatch[UNICODE[0]] = load_unicode - def load_binunicode(self): - len = mloads(b'i' + self.read(4)) + def load_binunicode(self, unpack=struct.unpack, maxsize=sys.maxsize): + len, = unpack('<I', self.read(4)) + if len > maxsize: + raise UnpicklingError("BINUNICODE exceeds system's maximum size of %d bytes" % maxsize); self.append(str(self.read(len), 'utf-8', 'surrogatepass')) dispatch[BINUNICODE[0]] = load_binunicode @@ -1096,6 +1110,9 @@ class _Unpickler: return key = _inverted_registry.get(code) if not key: + if code <= 0: # note that 0 is forbidden + # Corrupt or hostile pickle. + raise UnpicklingError("EXT specifies code <= 0"); raise ValueError("unregistered extension code %d" % code) obj = self.find_class(*key) _extension_cache[code] = obj @@ -1149,23 +1166,29 @@ class _Unpickler: self.append(self.memo[i]) dispatch[BINGET[0]] = load_binget - def load_long_binget(self): - i = mloads(b'i' + self.read(4)) + def load_long_binget(self, unpack=struct.unpack): + i, = unpack('<I', self.read(4)) self.append(self.memo[i]) dispatch[LONG_BINGET[0]] = load_long_binget def load_put(self): i = int(self.readline()[:-1]) + if i < 0: + raise ValueError("negative PUT argument") self.memo[i] = self.stack[-1] dispatch[PUT[0]] = load_put def load_binput(self): i = self.read(1)[0] + if i < 0: + raise ValueError("negative BINPUT argument") self.memo[i] = self.stack[-1] dispatch[BINPUT[0]] = load_binput - def load_long_binput(self): - i = mloads(b'i' + self.read(4)) + def load_long_binput(self, unpack=struct.unpack, maxsize=sys.maxsize): + i, = unpack('<I', self.read(4)) + if i > maxsize: + raise ValueError("negative LONG_BINPUT argument") self.memo[i] = self.stack[-1] dispatch[LONG_BINPUT[0]] = load_long_binput @@ -1235,9 +1258,7 @@ class _Unpickler: raise _Stop(value) dispatch[STOP[0]] = load_stop -# Encode/decode longs in linear time. - -import binascii as _binascii +# Encode/decode longs. def encode_long(x): r"""Encode a long to a two's complement little-endian binary string. @@ -1260,50 +1281,14 @@ def encode_long(x): b'\x7f' >>> """ - if x == 0: return b'' - if x > 0: - ashex = hex(x) - assert ashex.startswith("0x") - njunkchars = 2 + ashex.endswith('L') - nibbles = len(ashex) - njunkchars - if nibbles & 1: - # need an even # of nibbles for unhexlify - ashex = "0x0" + ashex[2:] - elif int(ashex[2], 16) >= 8: - # "looks negative", so need a byte of sign bits - ashex = "0x00" + ashex[2:] - else: - # Build the 256's-complement: (1L << nbytes) + x. The trick is - # to find the number of bytes in linear time (although that should - # really be a constant-time task). - ashex = hex(-x) - assert ashex.startswith("0x") - njunkchars = 2 + ashex.endswith('L') - nibbles = len(ashex) - njunkchars - if nibbles & 1: - # Extend to a full byte. - nibbles += 1 - nbits = nibbles * 4 - x += 1 << nbits - assert x > 0 - ashex = hex(x) - njunkchars = 2 + ashex.endswith('L') - newnibbles = len(ashex) - njunkchars - if newnibbles < nibbles: - ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:] - if int(ashex[2], 16) < 8: - # "looks positive", so need a byte of sign bits - ashex = "0xff" + ashex[2:] - - if ashex.endswith('L'): - ashex = ashex[2:-1] - else: - ashex = ashex[2:] - assert len(ashex) & 1 == 0, (x, ashex) - binary = _binascii.unhexlify(ashex) - return bytes(binary[::-1]) + nbytes = (x.bit_length() >> 3) + 1 + result = x.to_bytes(nbytes, byteorder='little', signed=True) + if x < 0 and nbytes > 1: + if result[-1] == 0xff and (result[-2] & 0x80) != 0: + result = result[:-1] + return result def decode_long(data): r"""Decode a long from a two's complement little-endian binary string. @@ -1323,21 +1308,7 @@ def decode_long(data): >>> decode_long(b"\x7f") 127 """ - - nbytes = len(data) - if nbytes == 0: - return 0 - ashex = _binascii.hexlify(data[::-1]) - n = int(ashex, 16) # quadratic time before Python 2.3; linear now - if data[-1] >= 0x80: - n -= 1 << (nbytes * 8) - return n - -# Use the faster _pickle if possible -try: - from _pickle import * -except ImportError: - Pickler, Unpickler = _Pickler, _Unpickler + return int.from_bytes(data, byteorder='little', signed=True) # Shorthands @@ -1362,10 +1333,38 @@ def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): return Unpickler(file, fix_imports=fix_imports, encoding=encoding, errors=errors).load() +# Use the faster _pickle if possible +try: + from _pickle import * +except ImportError: + Pickler, Unpickler = _Pickler, _Unpickler + # Doctest def _test(): import doctest return doctest.testmod() if __name__ == "__main__": - _test() + import sys, argparse + parser = argparse.ArgumentParser( + description='display contents of the pickle files') + parser.add_argument( + 'pickle_file', type=argparse.FileType('br'), + nargs='*', help='the pickle file') + parser.add_argument( + '-t', '--test', action='store_true', + help='run self-test suite') + parser.add_argument( + '-v', action='store_true', + help='run verbosely; only affects self-test run') + args = parser.parse_args() + if args.test: + _test() + else: + if not args.pickle_file: + parser.print_help() + else: + import pprint + for f in args.pickle_file: + obj = load(f) + pprint.pprint(obj) |