summaryrefslogtreecommitdiff
path: root/Lib/pickle.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/pickle.py')
-rw-r--r--Lib/pickle.py64
1 files changed, 45 insertions, 19 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index e3c112f3ba..ea8d8b545f 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -42,19 +42,22 @@ __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
bytes_types = (bytes, bytearray, memoryview)
# These are purely informational; no code uses these.
-format_version = "2.0" # File format version we write
+format_version = "3.0" # File format version we write
compatible_formats = ["1.0", # Original protocol 0
"1.1", # Protocol 0 with INST added
"1.2", # Original protocol 1
"1.3", # Protocol 1 with BINFLOAT added
"2.0", # Protocol 2
+ "3.0", # Protocol 3
] # Old format versions we can read
# This is the highest protocol number we know how to read.
-HIGHEST_PROTOCOL = 2
+HIGHEST_PROTOCOL = 3
# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
-DEFAULT_PROTOCOL = 2
+# We intentionally write a protocol that Python 2.x cannot read;
+# there are too many issues with that.
+DEFAULT_PROTOCOL = 3
# Why use struct.pack() for pickling but marshal.loads() for
# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
@@ -161,6 +164,10 @@ LONG4 = b'\x8b' # push really big long
_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+# Protocol 3 (Python 3.x)
+
+BINBYTES = b'B' # push bytes; counted binary string argument
+SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
@@ -494,20 +501,19 @@ class Pickler:
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
dispatch[float] = save_float
- def save_string(self, obj, pack=struct.pack):
- if self.bin:
- n = len(obj)
- if n < 256:
- self.write(SHORT_BINSTRING + bytes([n]) + bytes(obj))
- else:
- self.write(BINSTRING + pack("<i", n) + bytes(obj))
+ def save_bytes(self, obj, pack=struct.pack):
+ if self.proto < 3:
+ self.save_reduce(bytes, (list(obj),))
+ return
+ n = len(obj)
+ if n < 256:
+ self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
else:
- # Strip leading 'b' due to repr() of bytes() returning b'...'
- self.write(STRING + repr(obj).lstrip("b").encode("ascii") + b'\n')
+ self.write(BINBYTES + pack("<i", n) + bytes(obj))
self.memoize(obj)
- dispatch[bytes] = save_string
+ dispatch[bytes] = save_bytes
- def save_unicode(self, obj, pack=struct.pack):
+ def save_str(self, obj, pack=struct.pack):
if self.bin:
encoded = obj.encode('utf-8')
n = len(encoded)
@@ -518,7 +524,7 @@ class Pickler:
self.write(UNICODE + bytes(obj.encode('raw-unicode-escape')) +
b'\n')
self.memoize(obj)
- dispatch[str] = save_unicode
+ dispatch[str] = save_str
def save_tuple(self, obj):
write = self.write
@@ -775,7 +781,7 @@ def whichmodule(func, funcname):
class Unpickler:
- def __init__(self, file):
+ def __init__(self, file, *, encoding="ASCII", errors="strict"):
"""This takes a binary file for reading a pickle data stream.
The protocol version of the pickle is detected automatically, so no
@@ -787,10 +793,16 @@ class Unpickler:
Thus file-like object can be a binary file object opened for
reading, a BytesIO object, or any other custom object that
meets this interface.
+
+ Optional keyword arguments are encoding and errors, which are
+ used to decode 8-bit string instances pickled by Python 2.x.
+ These default to 'ASCII' and 'strict', respectively.
"""
self.readline = file.readline
self.read = file.read
self.memo = {}
+ self.encoding = encoding
+ self.errors = errors
def load(self):
"""Read a pickled object representation from the open file.
@@ -831,7 +843,7 @@ class Unpickler:
def load_proto(self):
proto = ord(self.read(1))
- if not 0 <= proto <= 2:
+ if not 0 <= proto <= HIGHEST_PROTOCOL:
raise ValueError("unsupported pickle protocol: %d" % proto)
dispatch[PROTO[0]] = load_proto
@@ -924,9 +936,16 @@ class Unpickler:
def load_binstring(self):
len = mloads(b'i' + self.read(4))
- self.append(self.read(len))
+ data = self.read(len)
+ value = str(data, self.encoding, self.errors)
+ self.append(value)
dispatch[BINSTRING[0]] = load_binstring
+ def load_binbytes(self):
+ len = mloads(b'i' + self.read(4))
+ self.append(self.read(len))
+ dispatch[BINBYTES[0]] = load_binbytes
+
def load_unicode(self):
self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
dispatch[UNICODE[0]] = load_unicode
@@ -938,9 +957,16 @@ class Unpickler:
def load_short_binstring(self):
len = ord(self.read(1))
- self.append(bytes(self.read(len)))
+ data = bytes(self.read(len))
+ value = str(data, self.encoding, self.errors)
+ self.append(value)
dispatch[SHORT_BINSTRING[0]] = load_short_binstring
+ def load_short_binbytes(self):
+ len = ord(self.read(1))
+ self.append(bytes(self.read(len)))
+ dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
+
def load_tuple(self):
k = self.marker()
self.stack[k:] = [tuple(self.stack[k+1:])]