summaryrefslogtreecommitdiff
path: root/Lib/pickle.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r--Lib/pickle.py167
1 files changed, 83 insertions, 84 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 5275991662..a690ccd8bc 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -265,7 +265,7 @@ class _Pickler:
if i < 256:
return BINPUT + bytes([i])
else:
- return LONG_BINPUT + pack("<i", i)
+ return LONG_BINPUT + pack("<I", i)
return PUT + repr(i).encode("ascii") + b'\n'
@@ -275,7 +275,7 @@ class _Pickler:
if i < 256:
return BINGET + bytes([i])
else:
- return LONG_BINGET + pack("<i", i)
+ return LONG_BINGET + pack("<I", i)
return GET + repr(i).encode("ascii") + b'\n'
@@ -299,20 +299,20 @@ class _Pickler:
f(self, obj) # Call unbound method with explicit self
return
- # Check for a class with a custom metaclass; treat as regular class
- try:
- issc = issubclass(t, type)
- except TypeError: # t is not a class (old Boost; see SF #502085)
- issc = 0
- if issc:
- self.save_global(obj)
- return
-
# Check copyreg.dispatch_table
reduce = dispatch_table.get(t)
if reduce:
rv = reduce(obj)
else:
+ # Check for a class with a custom metaclass; treat as regular class
+ try:
+ issc = issubclass(t, type)
+ except TypeError: # t is not a class (old Boost; see SF #502085)
+ issc = False
+ if issc:
+ self.save_global(obj)
+ return
+
# Check for a __reduce_ex__ method, fall back to __reduce__
reduce = getattr(obj, "__reduce_ex__", None)
if reduce:
@@ -364,7 +364,7 @@ class _Pickler:
raise PicklingError("args from save_reduce() should be a tuple")
# Assert that func is callable
- if not hasattr(func, '__call__'):
+ if not callable(func):
raise PicklingError("func from save_reduce() should be callable")
save = self.save
@@ -487,13 +487,17 @@ class _Pickler:
def save_bytes(self, obj, pack=struct.pack):
if self.proto < 3:
- self.save_reduce(bytes, (list(obj),), obj=obj)
+ if len(obj) == 0:
+ self.save_reduce(bytes, (), obj=obj)
+ else:
+ self.save_reduce(codecs.encode,
+ (str(obj, 'latin1'), 'latin1'), obj=obj)
return
n = len(obj)
if n < 256:
self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
else:
- self.write(BINBYTES + pack("<i", n) + bytes(obj))
+ self.write(BINBYTES + pack("<I", n) + bytes(obj))
self.memoize(obj)
dispatch[bytes] = save_bytes
@@ -501,7 +505,7 @@ class _Pickler:
if self.bin:
encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded)
- self.write(BINUNICODE + pack("<i", n) + encoded)
+ self.write(BINUNICODE + pack("<I", n) + encoded)
else:
obj = obj.replace("\\", "\\u005c")
obj = obj.replace("\n", "\\u000a")
@@ -921,6 +925,9 @@ class _Unpickler:
def load_long4(self):
n = mloads(b'i' + self.read(4))
+ if n < 0:
+ # Corrupt or hostile pickle -- we never write one like this
+ raise UnpicklingError("LONG pickle has negative byte count");
data = self.read(n)
self.append(decode_long(data))
dispatch[LONG4[0]] = load_long4
@@ -949,14 +956,19 @@ class _Unpickler:
dispatch[STRING[0]] = load_string
def load_binstring(self):
+ # Deprecated BINSTRING uses signed 32-bit length
len = mloads(b'i' + self.read(4))
+ if len < 0:
+ raise UnpicklingError("BINSTRING pickle has negative byte count");
data = self.read(len)
value = str(data, self.encoding, self.errors)
self.append(value)
dispatch[BINSTRING[0]] = load_binstring
- def load_binbytes(self):
- len = mloads(b'i' + self.read(4))
+ def load_binbytes(self, unpack=struct.unpack, maxsize=sys.maxsize):
+ len, = unpack('<I', self.read(4))
+ if len > maxsize:
+ raise UnpicklingError("BINBYTES exceeds system's maximum size of %d bytes" % maxsize);
self.append(self.read(len))
dispatch[BINBYTES[0]] = load_binbytes
@@ -964,8 +976,10 @@ class _Unpickler:
self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
dispatch[UNICODE[0]] = load_unicode
- def load_binunicode(self):
- len = mloads(b'i' + self.read(4))
+ def load_binunicode(self, unpack=struct.unpack, maxsize=sys.maxsize):
+ len, = unpack('<I', self.read(4))
+ if len > maxsize:
+ raise UnpicklingError("BINUNICODE exceeds system's maximum size of %d bytes" % maxsize);
self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode
@@ -1096,6 +1110,9 @@ class _Unpickler:
return
key = _inverted_registry.get(code)
if not key:
+ if code <= 0: # note that 0 is forbidden
+ # Corrupt or hostile pickle.
+ raise UnpicklingError("EXT specifies code <= 0");
raise ValueError("unregistered extension code %d" % code)
obj = self.find_class(*key)
_extension_cache[code] = obj
@@ -1149,23 +1166,29 @@ class _Unpickler:
self.append(self.memo[i])
dispatch[BINGET[0]] = load_binget
- def load_long_binget(self):
- i = mloads(b'i' + self.read(4))
+ def load_long_binget(self, unpack=struct.unpack):
+ i, = unpack('<I', self.read(4))
self.append(self.memo[i])
dispatch[LONG_BINGET[0]] = load_long_binget
def load_put(self):
i = int(self.readline()[:-1])
+ if i < 0:
+ raise ValueError("negative PUT argument")
self.memo[i] = self.stack[-1]
dispatch[PUT[0]] = load_put
def load_binput(self):
i = self.read(1)[0]
+ if i < 0:
+ raise ValueError("negative BINPUT argument")
self.memo[i] = self.stack[-1]
dispatch[BINPUT[0]] = load_binput
- def load_long_binput(self):
- i = mloads(b'i' + self.read(4))
+ def load_long_binput(self, unpack=struct.unpack, maxsize=sys.maxsize):
+ i, = unpack('<I', self.read(4))
+ if i > maxsize:
+ raise ValueError("negative LONG_BINPUT argument")
self.memo[i] = self.stack[-1]
dispatch[LONG_BINPUT[0]] = load_long_binput
@@ -1235,9 +1258,7 @@ class _Unpickler:
raise _Stop(value)
dispatch[STOP[0]] = load_stop
-# Encode/decode longs in linear time.
-
-import binascii as _binascii
+# Encode/decode longs.
def encode_long(x):
r"""Encode a long to a two's complement little-endian binary string.
@@ -1260,50 +1281,14 @@ def encode_long(x):
b'\x7f'
>>>
"""
-
if x == 0:
return b''
- if x > 0:
- ashex = hex(x)
- assert ashex.startswith("0x")
- njunkchars = 2 + ashex.endswith('L')
- nibbles = len(ashex) - njunkchars
- if nibbles & 1:
- # need an even # of nibbles for unhexlify
- ashex = "0x0" + ashex[2:]
- elif int(ashex[2], 16) >= 8:
- # "looks negative", so need a byte of sign bits
- ashex = "0x00" + ashex[2:]
- else:
- # Build the 256's-complement: (1L << nbytes) + x. The trick is
- # to find the number of bytes in linear time (although that should
- # really be a constant-time task).
- ashex = hex(-x)
- assert ashex.startswith("0x")
- njunkchars = 2 + ashex.endswith('L')
- nibbles = len(ashex) - njunkchars
- if nibbles & 1:
- # Extend to a full byte.
- nibbles += 1
- nbits = nibbles * 4
- x += 1 << nbits
- assert x > 0
- ashex = hex(x)
- njunkchars = 2 + ashex.endswith('L')
- newnibbles = len(ashex) - njunkchars
- if newnibbles < nibbles:
- ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
- if int(ashex[2], 16) < 8:
- # "looks positive", so need a byte of sign bits
- ashex = "0xff" + ashex[2:]
-
- if ashex.endswith('L'):
- ashex = ashex[2:-1]
- else:
- ashex = ashex[2:]
- assert len(ashex) & 1 == 0, (x, ashex)
- binary = _binascii.unhexlify(ashex)
- return bytes(binary[::-1])
+ nbytes = (x.bit_length() >> 3) + 1
+ result = x.to_bytes(nbytes, byteorder='little', signed=True)
+ if x < 0 and nbytes > 1:
+ if result[-1] == 0xff and (result[-2] & 0x80) != 0:
+ result = result[:-1]
+ return result
def decode_long(data):
r"""Decode a long from a two's complement little-endian binary string.
@@ -1323,21 +1308,7 @@ def decode_long(data):
>>> decode_long(b"\x7f")
127
"""
-
- nbytes = len(data)
- if nbytes == 0:
- return 0
- ashex = _binascii.hexlify(data[::-1])
- n = int(ashex, 16) # quadratic time before Python 2.3; linear now
- if data[-1] >= 0x80:
- n -= 1 << (nbytes * 8)
- return n
-
-# Use the faster _pickle if possible
-try:
- from _pickle import *
-except ImportError:
- Pickler, Unpickler = _Pickler, _Unpickler
+ return int.from_bytes(data, byteorder='little', signed=True)
# Shorthands
@@ -1362,10 +1333,38 @@ def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
return Unpickler(file, fix_imports=fix_imports,
encoding=encoding, errors=errors).load()
+# Use the faster _pickle if possible
+try:
+ from _pickle import *
+except ImportError:
+ Pickler, Unpickler = _Pickler, _Unpickler
+
# Doctest
def _test():
import doctest
return doctest.testmod()
if __name__ == "__main__":
- _test()
+ import sys, argparse
+ parser = argparse.ArgumentParser(
+ description='display contents of the pickle files')
+ parser.add_argument(
+ 'pickle_file', type=argparse.FileType('br'),
+ nargs='*', help='the pickle file')
+ parser.add_argument(
+ '-t', '--test', action='store_true',
+ help='run self-test suite')
+ parser.add_argument(
+ '-v', action='store_true',
+ help='run verbosely; only affects self-test run')
+ args = parser.parse_args()
+ if args.test:
+ _test()
+ else:
+ if not args.pickle_file:
+ parser.print_help()
+ else:
+ import pprint
+ for f in args.pickle_file:
+ obj = load(f)
+ pprint.pprint(obj)