From 6a28b28c6314158ee099c95d452406278cdb93b0 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Mon, 28 Jan 2013 12:27:24 +0100 Subject: Add pure Python fallback module Signed-off-by: Bas Westerbaan --- msgpack/__init__.py | 8 +- msgpack/fallback.py | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 482 insertions(+), 2 deletions(-) create mode 100644 msgpack/fallback.py diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 0bfe6fe..49a32d9 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,8 +1,12 @@ # coding: utf-8 from msgpack._version import version from msgpack.exceptions import * -from msgpack._packer import pack, packb, Packer -from msgpack._unpacker import unpack, unpackb, Unpacker + +try: + from msgpack._packer import pack, packb, Packer + from msgpack._unpacker import unpack, unpackb, Unpacker +except ImportError: + from msgpack.fallback import pack, packb, Packer, unpack, unpackb, Unpacker # alias for compatibility to simplejson/marshal/pickle. load = unpack diff --git a/msgpack/fallback.py b/msgpack/fallback.py new file mode 100644 index 0000000..2ca3a8f --- /dev/null +++ b/msgpack/fallback.py @@ -0,0 +1,476 @@ +# Fallback pure Python implementation of msgpack + +import sys +import array +import struct + +try: + import cStringIO as StringIO +except ImportError: + import StringIO + +from msgpack.exceptions import ( + BufferFull, + OutOfData, + UnpackValueError, + PackValueError, + ExtraData) + +EX_SKIP = 0 +EX_CONSTRUCT = 1 +EX_READ_ARRAY_HEADER = 2 +EX_READ_MAP_HEADER = 3 + +TYPE_IMMEDIATE = 0 +TYPE_ARRAY = 1 +TYPE_MAP = 2 +TYPE_RAW = 3 + +DEFAULT_RECURSE_LIMIT=511 + +def pack(o, stream, default=None, encoding='utf-8', unicode_errors='strict'): + """ Pack object `o` and write it to `stream` """ + packer = Packer(default=default, encoding=encoding, + unicode_errors=unicode_errors) + stream.write(packer.pack(o)) + +def packb(o, default=None, encoding='utf-8', unicode_errors='struct', + use_single_float=False): + """ Pack object `o` and return packed bytes """ + packer = Packer(default=default, + encoding=encoding, + unicode_errors=unicode_errors, + use_single_float=use_single_float) + return packer.pack(o) + +def unpack(stream, object_hook=None, list_hook=None, use_list=True, + encoding=None, unicode_errors='strict', + object_pairs_hook=None): + """ Unpack an object from `stream`. + + Raises `ExtraData` when `stream` has extra bytes. """ + unpacker = Unpacker(stream, object_hook=object_hook, list_hook=list_hook, + use_list=use_list, encoding=encoding, unicode_errors=unicode_errors, + object_pairs_hook=object_pairs_hook) + ret = unpacker._fb_unpack() + unpacker._fb_check_for_extradata() + return ret + +def unpackb(packed, object_hook=None, list_hook=None, use_list=True, + encoding=None, unicode_errors='strict', + object_pairs_hook=None): + """ Unpack an object from `packed`. + + Raises `ExtraData` when `packed` contains extra bytes. """ + unpacker = Unpacker(None, object_hook=object_hook, list_hook=list_hook, + use_list=use_list, encoding=encoding, unicode_errors=unicode_errors, + object_pairs_hook=object_pairs_hook) + unpacker.feed(packed) + ret = unpacker._fb_unpack() + unpacker._fb_check_for_extradata() + return ret + +class Unpacker(object): + """ + Streaming unpacker. + + `file_like` is a file-like object having a `.read(n)` method. + When `Unpacker` is initialized with a `file_like`, `.feed()` is not + usable. + + `read_size` is used for `file_like.read(read_size)`. + + If `use_list` is True (default), msgpack lists are deserialized to Python + lists. Otherwise they are deserialized to tuples. + + `object_hook` is the same as in simplejson. If it is not None, it should + be callable and Unpacker calls it with a dict argument after deserializing + a map. + + `object_pairs_hook` is the same as in simplejson. If it is not None, it + should be callable and Unpacker calls it with a list of key-value pairs + after deserializing a map. + + `encoding` is the encoding used for decoding msgpack bytes. If it is + None (default), msgpack bytes are deserialized to Python bytes. + + `unicode_errors` is used for decoding bytes. + + `max_buffer_size` limits the buffer size. 0 means INT_MAX (default). + + Raises `BufferFull` exception when it is unsufficient. + + You should set this parameter when unpacking data from an untrustred source. + + example of streaming deserialization from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + do_something(o) + + example of streaming deserialization from socket:: + + unpacker = Unapcker() + while 1: + buf = sock.recv(1024*2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + do_something(o) + """ + + def __init__(self, file_like=None, read_size=0, use_list=True, + object_hook=None, object_pairs_hook=None, list_hook=None, + encoding=None, unicode_errors='strict', max_buffer_size=0): + if file_like is None: + self._fb_feeding = True + else: + if not callable(file_like.read): + raise ValueError("`file_like.read` must be callable") + self.file_like = file_like + self._fb_feeding = False + self._fb_buffers = [] + self._fb_buf_o = 0 + self._fb_buf_i = 0 + self._fb_buf_n = 0 + self.max_buffer_size = (sys.maxint if max_buffer_size == 0 + else max_buffer_size) + self.read_size = (read_size if read_size != 0 + else min(self.max_buffer_size, 2048)) + if read_size > self.max_buffer_size: + raise ValueError("read_size must be smaller than max_buffer_size") + self.encoding = encoding + self.unicode_errors = unicode_errors + self.use_list = use_list + self.list_hook = list_hook + self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook + + if list_hook is not None and not callable(list_hook): + raise ValueError('`list_hook` is not callable') + if object_hook is not None and not callable(object_hook): + raise ValueError('`object_hook` is not callable') + if object_pairs_hook is not None and not callable(object_pairs_hook): + raise ValueError('`object_pairs_hook` is not callable') + if object_hook is not None and object_pairs_hook is not None: + raise ValueError("object_pairs_hook and object_hook are mutually "+ + "exclusive") + + def feed(self, next_bytes): + if isinstance(next_bytes, array.array): + next_bytes = next_bytes.tostring() + assert self._fb_feeding + if self._fb_buf_n + len(next_bytes) > self.max_buffer_size: + raise BufferFull + self._fb_buf_n += len(next_bytes) + self._fb_buffers.append(next_bytes) + + def _fb_consume(self): + self._fb_buffers = self._fb_buffers[self._fb_buf_i:] + if self._fb_buffers: + self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:] + self._fb_buf_o = 0 + self._fb_buf_i = 0 + self._fb_buf_n = sum(map(len, self._fb_buffers)) + + def _fb_check_for_extradata(self): + if self._fb_buf_i != len(self._fb_buffers): + raise ExtraData + if self._fb_feeding: + return + if not self.file_like: + return + if not self.file_like.read(1): + raise ExtraData + + def __iter__(self): + return self + + def next(self): + try: + ret = self._fb_unpack(None, None) + self._fb_consume() + return ret + except OutOfData: + raise StopIteration + + def read_bytes(self, n): + return self._fb_read(n) + + def _fb_read(self, n, write_bytes=None): + ret = '' + while len(ret) != n: + if self._fb_buf_i == len(self._fb_buffers): + if self._fb_feeding: + break + tmp = self.file_like.read(self.read_size) + if not tmp: + break + self._fb_buffers.append(tmp) + continue + sliced = n - len(ret) + ret += self._fb_buffers[self._fb_buf_i][ + self._fb_buf_o:self._fb_buf_o + sliced] + self._fb_buf_o += sliced + if self._fb_buf_o >= len(self._fb_buffers[self._fb_buf_i]): + self._fb_buf_o = 0 + self._fb_buf_i += 1 + if len(ret) != n: + raise OutOfData + if write_bytes is not None: + write_bytes(ret) + return ret + + def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): + typ = TYPE_IMMEDIATE + b = ord(self._fb_read(1, write_bytes)) + if b & 0b10000000 == 0: + obj = b + elif b & 0b11100000 == 0b11100000: + obj = struct.unpack("b", chr(b))[0] + elif b & 0b11100000 == 0b10100000: + n = b & 0b00011111 + obj = struct.unpack("%ds" % n, self._fb_read(n, write_bytes))[0] + typ = TYPE_RAW + elif b & 0b11110000 == 0b10010000: + n = b & 0b00001111 + typ = TYPE_ARRAY + elif b & 0b11110000 == 0b10000000: + n = b & 0b00001111 + typ = TYPE_MAP + elif b == 0xc0: + obj = None + elif b == 0xc2: + obj = False + elif b == 0xc3: + obj = True + elif b == 0xca: + obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0] + elif b == 0xcb: + obj = struct.unpack(">d", self._fb_read(8, write_bytes))[0] + elif b == 0xcc: + obj = struct.unpack("B", self._fb_read(1, write_bytes))[0] + elif b == 0xcd: + obj = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + elif b == 0xce: + obj = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + elif b == 0xcf: + obj = struct.unpack(">Q", self._fb_read(8, write_bytes))[0] + elif b == 0xd0: + obj = struct.unpack("b", self._fb_read(1, write_bytes))[0] + elif b == 0xd1: + obj = struct.unpack(">h", self._fb_read(2, write_bytes))[0] + elif b == 0xd2: + obj = struct.unpack(">i", self._fb_read(4, write_bytes))[0] + elif b == 0xd3: + obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0] + elif b == 0xda: + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + obj = struct.unpack("%ds" % n, self._fb_read(n, write_bytes))[0] + typ = TYPE_RAW + elif b == 0xdb: + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + obj = struct.unpack("%ds" % n, self._fb_read(n, write_bytes))[0] + typ = TYPE_RAW + elif b == 0xdc: + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + typ = TYPE_ARRAY + elif b == 0xdd: + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + typ = TYPE_ARRAY + elif b == 0xde: + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + typ = TYPE_MAP + elif b == 0xdf: + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + typ = TYPE_MAP + else: + raise UnpackValueError("Unknown header: 0x%x" % b) + if execute == EX_READ_ARRAY_HEADER: + if typ != TYPE_ARRAY: + raise UnpackValueError("Expected array") + return n + if execute == EX_READ_MAP_HEADER: + if typ != TYPE_MAP: + raise UnpackValueError("Expected map") + return n + # TODO should we eliminate the recursion? + if typ == TYPE_ARRAY: + if execute == EX_SKIP: + for i in xrange(n): + # TODO check whether we need to call `list_hook` + self._fb_unpack(EX_SKIP, write_bytes) + return + ret = [] + for i in xrange(n): + ret.append(self._fb_unpack(EX_CONSTRUCT, write_bytes)) + if self.list_hook is not None: + ret = self.list_hook(ret) + # TODO is the interaction between `list_hook` and `use_list` ok? + return ret if self.use_list else tuple(ret) + if typ == TYPE_MAP: + if execute == EX_SKIP: + for i in xrange(n): + # TODO check whether we need to call hooks + self._fb_unpack(EX_SKIP, write_bytes) + self._fb_unpack(EX_SKIP, write_bytes) + return + ret = [] + for i in xrange(n): + ret.append((self._fb_unpack(EX_CONSTRUCT, write_bytes), + self._fb_unpack(EX_CONSTRUCT, write_bytes))) + if self.object_pairs_hook is not None: + ret = self.object_pairs_hook(ret) + else: + ret = dict(ret) + if self.object_hook is not None: + ret = self.object_hook(ret) + return ret + if execute == EX_SKIP: + return + if typ == TYPE_RAW: + if self.encoding is not None: + obj = obj.decode(self.encoding, self.unicode_errors) + return obj + assert typ == TYPE_IMMEDIATE + return obj + + def skip(self, write_bytes=None): + self._fb_unpack(EX_SKIP, write_bytes) + self._fb_consume() + + def unpack(self, write_bytes=None): + ret = self._fb_unpack(EX_CONSTRUCT, write_bytes) + self._fb_consume() + return ret + + def read_array_header(self, write_bytes=None): + ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes) + self._fb_consume() + return ret + + def read_map_header(self, write_bytes=None): + ret = self._fb_unpack(EX_READ_MAP_HEADER, write_bytes) + self._fb_consume() + return ret + +class Packer(object): + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + use_single_float=False, autoreset=True): + self.use_float = use_single_float + self.autoreset = autoreset + self.encoding = encoding + self.unicode_errors = unicode_errors + self.buffer = StringIO.StringIO() + if default is not None: + if not callable(default): + raise TypeError("default must be callable") + self._default = default + def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT): + if nest_limit < 0: + raise PackValueError("recursion limit exceeded") + if obj is None: + return self.buffer.write(chr(0xc0)) + if isinstance(obj, bool): + if obj: + return self.buffer.write(chr(0xc3)) + return self.buffer.write(chr(0xc2)) + if isinstance(obj, int) or isinstance(obj, long): + if 0 <= obj < 0x80: + return self.buffer.write(struct.pack("B", obj)) + if -0x20 <= obj < 0: + return self.buffer.write(struct.pack("b", obj)) + if 0x80 <= obj <= 0xff: + return self.buffer.write(struct.pack("BB", 0xcc, obj)) + if -0x80 <= obj < 0: + return self.buffer.write(struct.pack(">Bb", 0xd0, obj)) + if 0xff < obj <= 0xffff: + return self.buffer.write(struct.pack(">BH", 0xcd, obj)) + if -0x8000 <= obj < -0x80: + return self.buffer.write(struct.pack(">Bh", 0xd1, obj)) + if 0xffff < obj <= 0xffffffff: + return self.buffer.write(struct.pack(">BI", 0xce, obj)) + if -0x80000000 <= obj < -0x8000: + return self.buffer.write(struct.pack(">Bi", 0xd2, obj)) + if 0xffffffff < obj <= 0xffffffffffffffff: + return self.buffer.write(struct.pack(">BQ", 0xcf, obj)) + if -0x8000000000000000 <= obj < -0x80000000: + return self.buffer.write(struct.pack(">Bq", 0xd3, obj)) + raise PackValueError("Integer value out of range") + if isinstance(obj, str) or isinstance(obj, unicode): + if isinstance(obj, unicode): + obj = obj.encode(self.encoding, self.unicode_errors) + n = len(obj) + if n <= 0x1f: + return self.buffer.write(chr(0xa0 + n) + obj) + if n <= 0xffff: + return self.buffer.write(struct.pack(">BH%ds" % n,0xda, n, obj)) + if n <= 0xffffffff: + return self.buffer.write(struct.pack(">BI%ds" % n,0xdb, n, obj)) + raise PackValueError("String is too large") + if isinstance(obj, float): + if self.use_float: + return self.buffer.write(struct.pack(">Bf", 0xca, obj)) + return self.buffer.write(struct.pack(">Bd", 0xcb, obj)) + if isinstance(obj, list) or isinstance(obj, tuple): + n = len(obj) + self._fb_pack_array_header(n) + for i in xrange(n): + self._pack(obj[i], nest_limit - 1) + return + if isinstance(obj, dict): + return self._fb_pack_map_pairs(len(obj), obj.iteritems(), + nest_limit - 1) + if self._default is not None: + return self._pack(self._default(obj), nest_limit - 1) + raise TypeError("Cannot serialize %r" % obj) + def pack(self, obj): + self._pack(obj) + ret = self.buffer.getvalue() + if self.autoreset: + self.buffer = StringIO.StringIO() + return ret + def pack_map_pairs(self, pairs): + self._fb_pack_map_pairs(len(pairs), pairs) + ret = self.buffer.getvalue() + if self.autoreset: + self.buffer = StringIO.StringIO() + return ret + def pack_array_header(self, n): + self._fb_pack_array_header(n) + ret = self.buffer.getvalue() + if self.autoreset: + self.buffer = StringIO.StringIO() + return ret + def pack_map_header(self, n): + self._fb_pack_map_header(n) + ret = self.buffer.getvalue() + if self.autoreset: + self.buffer = StringIO.StringIO() + return ret + def _fb_pack_array_header(self, n): + if n <= 0x0f: + return self.buffer.write(chr(0x90 + n)) + if n <= 0xffff: + return self.buffer.write(struct.pack(">BH", 0xdc, n)) + if n <= 0xffffffff: + return self.buffer.write(struct.pack(">BI", 0xdd, n)) + raise PackValueError("Array is too large") + def _fb_pack_map_header(self, n): + if n <= 0x0f: + return self.buffer.write(chr(0x80 + n)) + if n <= 0xffff: + return self.buffer.write(struct.pack(">BH", 0xde, n)) + if n <= 0xffffffff: + return self.buffer.write(struct.pack(">BI", 0xdf, n)) + raise PackValueError("Dict is too large") + def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): + self._fb_pack_map_header(n) + for (k, v) in pairs: + self._pack(k, nest_limit - 1) + self._pack(v, nest_limit - 1) + def bytes(self): + return self.buffer.getvalue() + def reset(self): + self.buffer = StringIO.StringIO() -- cgit v1.2.1 From 2627b6ae9f0606b388a3a3ec0110d1fdb33d082e Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Mon, 28 Jan 2013 12:27:46 +0100 Subject: setup: automatically fallback to pure Python module Signed-off-by: Bas Westerbaan --- setup.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index ac3eeb5..3d7fc04 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,9 @@ from setuptools import setup, Extension from distutils.command.build_ext import build_ext +class NoCython(Exception): + pass + try: import Cython.Compiler.Main as cython_compiler have_cython = True @@ -24,10 +27,7 @@ def ensure_source(src): if not os.path.exists(src): if not have_cython: - raise Exception("""\ -Cython is required for building extension from checkout. -Install Cython >= 0.16 or install msgpack from PyPI. -""") + raise NoCython cythonize(pyx) elif (os.path.exists(pyx) and os.stat(src).st_mtime < os.stat(pyx).st_mtime and @@ -38,7 +38,14 @@ Install Cython >= 0.16 or install msgpack from PyPI. class BuildExt(build_ext): def build_extension(self, ext): - ext.sources = list(map(ensure_source, ext.sources)) + try: + ext.sources = list(map(ensure_source, ext.sources)) + except NoCython: + print "WARNING" + print "Cython is required for building extension from checkout." + print "Install Cython >= 0.16 or install msgpack from PyPI." + print "Falling back to pure Python implementation." + return return build_ext.build_extension(self, ext) -- cgit v1.2.1 From 69ba3c9bf9737df1ade7832986b0fa6f1659b04d Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Mon, 28 Jan 2013 13:30:32 +0100 Subject: fallback: use __pypy__.builders.StringBuilder when available This increases performance *a lot* on PyPy. Signed-off-by: Bas Westerbaan --- msgpack/fallback.py | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 2ca3a8f..220e0fe 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -4,10 +4,28 @@ import sys import array import struct -try: - import cStringIO as StringIO -except ImportError: - import StringIO +if hasattr(sys, 'pypy_version_info'): + # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own + # StringBuilder is fastest. + from __pypy__.builders import StringBuilder + USING_STRINGBUILDER = True + class StringIO(object): + def __init__(self, s=''): + if s: + self.builder = StringBuilder(len(s)) + self.builder.append(s) + else: + self.builder = StringBuilder() + def write(self, s): + self.builder.append(s) + def getvalue(self): + return self.builder.build() +else: + USING_STRINGBUILDER = False + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO from msgpack.exceptions import ( BufferFull, @@ -362,7 +380,7 @@ class Packer(object): self.autoreset = autoreset self.encoding = encoding self.unicode_errors = unicode_errors - self.buffer = StringIO.StringIO() + self.buffer = StringIO() if default is not None: if not callable(default): raise TypeError("default must be callable") @@ -429,25 +447,33 @@ class Packer(object): self._pack(obj) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO.StringIO() + self.buffer = StringIO() + elif USING_STRINGBUILDER: + self.buffer = StringIO(ret) return ret def pack_map_pairs(self, pairs): self._fb_pack_map_pairs(len(pairs), pairs) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO.StringIO() + self.buffer = StringIO() + elif USING_STRINGBUILDER: + self.buffer = StringIO(ret) return ret def pack_array_header(self, n): self._fb_pack_array_header(n) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO.StringIO() + self.buffer = StringIO() + elif USING_STRINGBUILDER: + self.buffer = StringIO(ret) return ret def pack_map_header(self, n): self._fb_pack_map_header(n) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO.StringIO() + self.buffer = StringIO() + elif USING_STRINGBUILDER: + self.buffer = StringIO(ret) return ret def _fb_pack_array_header(self, n): if n <= 0x0f: @@ -473,4 +499,4 @@ class Packer(object): def bytes(self): return self.buffer.getvalue() def reset(self): - self.buffer = StringIO.StringIO() + self.buffer = StringIO() -- cgit v1.2.1 From 6fa0f46a122c4f6be35415a3e65dcdc542fd3acd Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Mon, 28 Jan 2013 14:32:01 +0100 Subject: setup: remove Python 2 only syntax --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 3d7fc04..d4808d6 100644 --- a/setup.py +++ b/setup.py @@ -41,10 +41,10 @@ class BuildExt(build_ext): try: ext.sources = list(map(ensure_source, ext.sources)) except NoCython: - print "WARNING" - print "Cython is required for building extension from checkout." - print "Install Cython >= 0.16 or install msgpack from PyPI." - print "Falling back to pure Python implementation." + print("WARNING") + print("Cython is required for building extension from checkout.") + print("Install Cython >= 0.16 or install msgpack from PyPI.") + print("Falling back to pure Python implementation.") return return build_ext.build_extension(self, ext) -- cgit v1.2.1 From b940802032adc16b074b1e29bed72825580c5c9f Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Mon, 28 Jan 2013 22:29:23 +0100 Subject: fallback: two fixes for raising ExtraData --- msgpack/fallback.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 220e0fe..392f7eb 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -71,7 +71,8 @@ def unpack(stream, object_hook=None, list_hook=None, use_list=True, use_list=use_list, encoding=encoding, unicode_errors=unicode_errors, object_pairs_hook=object_pairs_hook) ret = unpacker._fb_unpack() - unpacker._fb_check_for_extradata() + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) return ret def unpackb(packed, object_hook=None, list_hook=None, use_list=True, @@ -85,7 +86,8 @@ def unpackb(packed, object_hook=None, list_hook=None, use_list=True, object_pairs_hook=object_pairs_hook) unpacker.feed(packed) ret = unpacker._fb_unpack() - unpacker._fb_check_for_extradata() + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) return ret class Unpacker(object): @@ -192,15 +194,16 @@ class Unpacker(object): self._fb_buf_i = 0 self._fb_buf_n = sum(map(len, self._fb_buffers)) - def _fb_check_for_extradata(self): + def _fb_got_extradata(self): if self._fb_buf_i != len(self._fb_buffers): - raise ExtraData + return True if self._fb_feeding: - return + return False if not self.file_like: - return - if not self.file_like.read(1): - raise ExtraData + return False + if self.file_like.read(1): + return True + return False def __iter__(self): return self @@ -216,6 +219,12 @@ class Unpacker(object): def read_bytes(self, n): return self._fb_read(n) + def _fb_get_extradata(self): + bufs = self._fb_buffers[self._fb_buf_i:] + if bufs: + bufs[0] = bufs[0][self._fb_buf_o:] + return ''.join(bufs) + def _fb_read(self, n, write_bytes=None): ret = '' while len(ret) != n: -- cgit v1.2.1 From af9c9ca2c9cae01cf603da90530b9ba396007e5b Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 02:01:34 +0100 Subject: fallback: performance: write(a+b) -> write(a); write(b) --- msgpack/fallback.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 392f7eb..d6fd533 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -430,7 +430,8 @@ class Packer(object): obj = obj.encode(self.encoding, self.unicode_errors) n = len(obj) if n <= 0x1f: - return self.buffer.write(chr(0xa0 + n) + obj) + self.buffer.write(chr(0xa0 + n)) + return self.buffer.write(obj) if n <= 0xffff: return self.buffer.write(struct.pack(">BH%ds" % n,0xda, n, obj)) if n <= 0xffffffff: -- cgit v1.2.1 From 94925acb124998aaf9438e7c8152fbfbb7d5d8a8 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 02:15:29 +0100 Subject: fallback: do not use dynamic format strings for struct.(un)pack Increases performance on PyPy. --- msgpack/fallback.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index d6fd533..fe2b1ac 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -258,7 +258,7 @@ class Unpacker(object): obj = struct.unpack("b", chr(b))[0] elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 - obj = struct.unpack("%ds" % n, self._fb_read(n, write_bytes))[0] + obj = self._fb_read(n, write_bytes) typ = TYPE_RAW elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 @@ -294,11 +294,11 @@ class Unpacker(object): obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0] elif b == 0xda: n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] - obj = struct.unpack("%ds" % n, self._fb_read(n, write_bytes))[0] + obj = self._fb_read(n, write_bytes) typ = TYPE_RAW elif b == 0xdb: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] - obj = struct.unpack("%ds" % n, self._fb_read(n, write_bytes))[0] + obj = self._fb_read(n, write_bytes) typ = TYPE_RAW elif b == 0xdc: n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] @@ -433,9 +433,11 @@ class Packer(object): self.buffer.write(chr(0xa0 + n)) return self.buffer.write(obj) if n <= 0xffff: - return self.buffer.write(struct.pack(">BH%ds" % n,0xda, n, obj)) + self.buffer.write(struct.pack(">BH", 0xda, n)) + return self.buffer.write(obj) if n <= 0xffffffff: - return self.buffer.write(struct.pack(">BI%ds" % n,0xdb, n, obj)) + self.buffer.write(struct.pack(">BI", 0xdb, n)) + return self.buffer.write(obj) raise PackValueError("String is too large") if isinstance(obj, float): if self.use_float: -- cgit v1.2.1 From fb81f80d14613bd2ac3e63029a47bb0512c25dd5 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 02:47:41 +0100 Subject: fallback: bugfix in next() --- msgpack/fallback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index fe2b1ac..a866ff1 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -210,7 +210,7 @@ class Unpacker(object): def next(self): try: - ret = self._fb_unpack(None, None) + ret = self._fb_unpack(EX_CONSTRUCT, None) self._fb_consume() return ret except OutOfData: -- cgit v1.2.1 From d2f549a47094b2a29cc94bc50029ebdc85508861 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 02:58:26 +0100 Subject: fallback: add actual rollback and add a testcase for it Signed-off-by: Bas Westerbaan --- msgpack/fallback.py | 21 +++++++++++++-------- test/test_sequnpack.py | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index a866ff1..2f83a20 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -208,17 +208,13 @@ class Unpacker(object): def __iter__(self): return self - def next(self): - try: - ret = self._fb_unpack(EX_CONSTRUCT, None) - self._fb_consume() - return ret - except OutOfData: - raise StopIteration - def read_bytes(self, n): return self._fb_read(n) + def _fb_rollback(self): + self._fb_buf_i = 0 + self._fb_buf_o = 0 + def _fb_get_extradata(self): bufs = self._fb_buffers[self._fb_buf_i:] if bufs: @@ -244,6 +240,7 @@ class Unpacker(object): self._fb_buf_o = 0 self._fb_buf_i += 1 if len(ret) != n: + self._fb_rollback() raise OutOfData if write_bytes is not None: write_bytes(ret) @@ -363,6 +360,14 @@ class Unpacker(object): assert typ == TYPE_IMMEDIATE return obj + def next(self): + try: + ret = self._fb_unpack(EX_CONSTRUCT, None) + self._fb_consume() + return ret + except OutOfData: + raise StopIteration + def skip(self, write_bytes=None): self._fb_unpack(EX_SKIP, write_bytes) self._fb_consume() diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index fc1f712..1da383c 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -7,6 +7,21 @@ from msgpack.exceptions import OutOfData from pytest import raises +def test_partialdata(): + unpacker = Unpacker() + unpacker.feed(b'\xa5') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'h') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'a') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'l') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'l') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'o') + assert next(iter(unpacker)) == 'hallo' + def test_foobar(): unpacker = Unpacker(read_size=3, use_list=1) unpacker.feed(b'foobar') -- cgit v1.2.1 From b9e9199eea3a1654dca3e6faf6530d438788ad77 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 03:03:13 +0100 Subject: fallback: python3 bugfix for new testcase of d2f549a4 Signed-off-by: Bas Westerbaan --- test/test_sequnpack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 1da383c..9db14ca 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -20,7 +20,7 @@ def test_partialdata(): unpacker.feed(b'l') with raises(StopIteration): next(iter(unpacker)) unpacker.feed(b'o') - assert next(iter(unpacker)) == 'hallo' + assert next(iter(unpacker)) == b'hallo' def test_foobar(): unpacker = Unpacker(read_size=3, use_list=1) -- cgit v1.2.1 From 770fed6b7f1ce1f6e9c20881897a417680c8b79a Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 03:39:46 +0100 Subject: fallback: Use mmap objects instead of strings to unpack Signed-off-by: Bas Westerbaan --- msgpack/fallback.py | 80 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 2f83a20..c0fcc18 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,15 +1,27 @@ # Fallback pure Python implementation of msgpack +# +# Easy imports +# import sys import array import struct +# +# Tricky imports +# +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +# We will use wStringIO for buffering the writes for packing. +# Normally, we will use cStringIO.StringIO. +# On PyPy we will use PyPy's own StringBuilder. if hasattr(sys, 'pypy_version_info'): - # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own - # StringBuilder is fastest. from __pypy__.builders import StringBuilder USING_STRINGBUILDER = True - class StringIO(object): + class wStringIO(object): def __init__(self, s=''): if s: self.builder = StringBuilder(len(s)) @@ -22,10 +34,18 @@ if hasattr(sys, 'pypy_version_info'): return self.builder.build() else: USING_STRINGBUILDER = False - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO + wStringIO = StringIO + +# We will use rStringIO for unpacking. +# Normally, this is a mmap. A normal StringIO is not a drop-in replacement --- +# it misses the __len__ operation. +# TODO add fallback for when mmap is unavailable +import mmap +def rStringIO(s): + m = mmap.mmap(-1, len(s)) + m.write(s) + m.seek(0) + return m from msgpack.exceptions import ( BufferFull, @@ -184,13 +204,13 @@ class Unpacker(object): if self._fb_buf_n + len(next_bytes) > self.max_buffer_size: raise BufferFull self._fb_buf_n += len(next_bytes) - self._fb_buffers.append(next_bytes) + self._fb_buffers.append(rStringIO(next_bytes)) def _fb_consume(self): self._fb_buffers = self._fb_buffers[self._fb_buf_i:] if self._fb_buffers: - self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:] - self._fb_buf_o = 0 + self._fb_buffers[0] = rStringIO(self._fb_buffers[0][ + self._fb_buffers[0].tell():]) self._fb_buf_i = 0 self._fb_buf_n = sum(map(len, self._fb_buffers)) @@ -212,16 +232,20 @@ class Unpacker(object): return self._fb_read(n) def _fb_rollback(self): + for buf in self._fb_buffers: + buf.seek(0) self._fb_buf_i = 0 - self._fb_buf_o = 0 def _fb_get_extradata(self): bufs = self._fb_buffers[self._fb_buf_i:] if bufs: - bufs[0] = bufs[0][self._fb_buf_o:] - return ''.join(bufs) + bufs[0] = rStringIO(bufs[0][bufs[0].tell():]) + return ''.join([buf[:] for buf in bufs]) def _fb_read(self, n, write_bytes=None): + if (write_bytes is None and self._fb_buf_i < len(self._fb_buffers) + and self._fb_buffers[0].tell() + n < len(self._fb_buffers[0])): + return self._fb_buffers[0].read(n) ret = '' while len(ret) != n: if self._fb_buf_i == len(self._fb_buffers): @@ -230,14 +254,12 @@ class Unpacker(object): tmp = self.file_like.read(self.read_size) if not tmp: break - self._fb_buffers.append(tmp) + self._fb_buffers.append(rStringIO(tmp)) continue sliced = n - len(ret) - ret += self._fb_buffers[self._fb_buf_i][ - self._fb_buf_o:self._fb_buf_o + sliced] - self._fb_buf_o += sliced - if self._fb_buf_o >= len(self._fb_buffers[self._fb_buf_i]): - self._fb_buf_o = 0 + ret += self._fb_buffers[self._fb_buf_i].read(sliced) + if (self._fb_buffers[self._fb_buf_i].tell() + == len(self._fb_buffers[self._fb_buf_i])): self._fb_buf_i += 1 if len(ret) != n: self._fb_rollback() @@ -394,7 +416,7 @@ class Packer(object): self.autoreset = autoreset self.encoding = encoding self.unicode_errors = unicode_errors - self.buffer = StringIO() + self.buffer = wStringIO() if default is not None: if not callable(default): raise TypeError("default must be callable") @@ -464,33 +486,33 @@ class Packer(object): self._pack(obj) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO() + self.buffer = wStringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self.buffer = wStringIO(ret) return ret def pack_map_pairs(self, pairs): self._fb_pack_map_pairs(len(pairs), pairs) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO() + self.buffer = wStringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self.buffer = wStringIO(ret) return ret def pack_array_header(self, n): self._fb_pack_array_header(n) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO() + self.buffer = wStringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self.buffer = wStringIO(ret) return ret def pack_map_header(self, n): self._fb_pack_map_header(n) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = StringIO() + self.buffer = wStringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self.buffer = wStringIO(ret) return ret def _fb_pack_array_header(self, n): if n <= 0x0f: @@ -516,4 +538,4 @@ class Packer(object): def bytes(self): return self.buffer.getvalue() def reset(self): - self.buffer = StringIO() + self.buffer = wStringIO() -- cgit v1.2.1 From d91a0d3d68bef38be667ba8e931e3f53cf90be6f Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 03:45:17 +0100 Subject: Revert "fallback: Use mmap objects instead of strings to unpack" See next commit. This reverts commit 770fed6b7f1ce1f6e9c20881897a417680c8b79a. --- msgpack/fallback.py | 80 +++++++++++++++++++---------------------------------- 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index c0fcc18..2f83a20 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,27 +1,15 @@ # Fallback pure Python implementation of msgpack -# -# Easy imports -# import sys import array import struct -# -# Tricky imports -# -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -# We will use wStringIO for buffering the writes for packing. -# Normally, we will use cStringIO.StringIO. -# On PyPy we will use PyPy's own StringBuilder. if hasattr(sys, 'pypy_version_info'): + # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own + # StringBuilder is fastest. from __pypy__.builders import StringBuilder USING_STRINGBUILDER = True - class wStringIO(object): + class StringIO(object): def __init__(self, s=''): if s: self.builder = StringBuilder(len(s)) @@ -34,18 +22,10 @@ if hasattr(sys, 'pypy_version_info'): return self.builder.build() else: USING_STRINGBUILDER = False - wStringIO = StringIO - -# We will use rStringIO for unpacking. -# Normally, this is a mmap. A normal StringIO is not a drop-in replacement --- -# it misses the __len__ operation. -# TODO add fallback for when mmap is unavailable -import mmap -def rStringIO(s): - m = mmap.mmap(-1, len(s)) - m.write(s) - m.seek(0) - return m + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO from msgpack.exceptions import ( BufferFull, @@ -204,13 +184,13 @@ class Unpacker(object): if self._fb_buf_n + len(next_bytes) > self.max_buffer_size: raise BufferFull self._fb_buf_n += len(next_bytes) - self._fb_buffers.append(rStringIO(next_bytes)) + self._fb_buffers.append(next_bytes) def _fb_consume(self): self._fb_buffers = self._fb_buffers[self._fb_buf_i:] if self._fb_buffers: - self._fb_buffers[0] = rStringIO(self._fb_buffers[0][ - self._fb_buffers[0].tell():]) + self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:] + self._fb_buf_o = 0 self._fb_buf_i = 0 self._fb_buf_n = sum(map(len, self._fb_buffers)) @@ -232,20 +212,16 @@ class Unpacker(object): return self._fb_read(n) def _fb_rollback(self): - for buf in self._fb_buffers: - buf.seek(0) self._fb_buf_i = 0 + self._fb_buf_o = 0 def _fb_get_extradata(self): bufs = self._fb_buffers[self._fb_buf_i:] if bufs: - bufs[0] = rStringIO(bufs[0][bufs[0].tell():]) - return ''.join([buf[:] for buf in bufs]) + bufs[0] = bufs[0][self._fb_buf_o:] + return ''.join(bufs) def _fb_read(self, n, write_bytes=None): - if (write_bytes is None and self._fb_buf_i < len(self._fb_buffers) - and self._fb_buffers[0].tell() + n < len(self._fb_buffers[0])): - return self._fb_buffers[0].read(n) ret = '' while len(ret) != n: if self._fb_buf_i == len(self._fb_buffers): @@ -254,12 +230,14 @@ class Unpacker(object): tmp = self.file_like.read(self.read_size) if not tmp: break - self._fb_buffers.append(rStringIO(tmp)) + self._fb_buffers.append(tmp) continue sliced = n - len(ret) - ret += self._fb_buffers[self._fb_buf_i].read(sliced) - if (self._fb_buffers[self._fb_buf_i].tell() - == len(self._fb_buffers[self._fb_buf_i])): + ret += self._fb_buffers[self._fb_buf_i][ + self._fb_buf_o:self._fb_buf_o + sliced] + self._fb_buf_o += sliced + if self._fb_buf_o >= len(self._fb_buffers[self._fb_buf_i]): + self._fb_buf_o = 0 self._fb_buf_i += 1 if len(ret) != n: self._fb_rollback() @@ -416,7 +394,7 @@ class Packer(object): self.autoreset = autoreset self.encoding = encoding self.unicode_errors = unicode_errors - self.buffer = wStringIO() + self.buffer = StringIO() if default is not None: if not callable(default): raise TypeError("default must be callable") @@ -486,33 +464,33 @@ class Packer(object): self._pack(obj) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = wStringIO() + self.buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = wStringIO(ret) + self.buffer = StringIO(ret) return ret def pack_map_pairs(self, pairs): self._fb_pack_map_pairs(len(pairs), pairs) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = wStringIO() + self.buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = wStringIO(ret) + self.buffer = StringIO(ret) return ret def pack_array_header(self, n): self._fb_pack_array_header(n) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = wStringIO() + self.buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = wStringIO(ret) + self.buffer = StringIO(ret) return ret def pack_map_header(self, n): self._fb_pack_map_header(n) ret = self.buffer.getvalue() if self.autoreset: - self.buffer = wStringIO() + self.buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = wStringIO(ret) + self.buffer = StringIO(ret) return ret def _fb_pack_array_header(self, n): if n <= 0x0f: @@ -538,4 +516,4 @@ class Packer(object): def bytes(self): return self.buffer.getvalue() def reset(self): - self.buffer = wStringIO() + self.buffer = StringIO() -- cgit v1.2.1 From 4cde7f080c16fd396d758930faf9ebf15b138047 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Tue, 29 Jan 2013 03:46:07 +0100 Subject: fallback: _fb_read: add fast-path --- msgpack/fallback.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 2f83a20..bd50b47 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -222,6 +222,11 @@ class Unpacker(object): return ''.join(bufs) def _fb_read(self, n, write_bytes=None): + if (write_bytes is None and self._fb_buf_i < len(self._fb_buffers) + and self._fb_buf_o + n < len(self._fb_buffers[self._fb_buf_i])): + self._fb_buf_o += n + return self._fb_buffers[self._fb_buf_i][ + self._fb_buf_o-n:self._fb_buf_o] ret = '' while len(ret) != n: if self._fb_buf_i == len(self._fb_buffers): -- cgit v1.2.1 From 328369e52e8ef41a8b4ec09e48870f8f3edb190c Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 29 Jan 2013 14:33:37 +0900 Subject: pep8 friendly. --- msgpack/fallback.py | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index bd50b47..3d733f4 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,4 +1,4 @@ -# Fallback pure Python implementation of msgpack +"""Fallback pure Python implementation of msgpack""" import sys import array @@ -49,11 +49,11 @@ DEFAULT_RECURSE_LIMIT=511 def pack(o, stream, default=None, encoding='utf-8', unicode_errors='strict'): """ Pack object `o` and write it to `stream` """ packer = Packer(default=default, encoding=encoding, - unicode_errors=unicode_errors) + unicode_errors=unicode_errors) stream.write(packer.pack(o)) def packb(o, default=None, encoding='utf-8', unicode_errors='struct', - use_single_float=False): + use_single_float=False): """ Pack object `o` and return packed bytes """ packer = Packer(default=default, encoding=encoding, @@ -62,28 +62,30 @@ def packb(o, default=None, encoding='utf-8', unicode_errors='struct', return packer.pack(o) def unpack(stream, object_hook=None, list_hook=None, use_list=True, - encoding=None, unicode_errors='strict', - object_pairs_hook=None): + encoding=None, unicode_errors='strict', + object_pairs_hook=None): """ Unpack an object from `stream`. Raises `ExtraData` when `stream` has extra bytes. """ unpacker = Unpacker(stream, object_hook=object_hook, list_hook=list_hook, - use_list=use_list, encoding=encoding, unicode_errors=unicode_errors, - object_pairs_hook=object_pairs_hook) + use_list=use_list, + encoding=encoding, unicode_errors=unicode_errors, + object_pairs_hook=object_pairs_hook) ret = unpacker._fb_unpack() if unpacker._fb_got_extradata(): raise ExtraData(ret, unpacker._fb_get_extradata()) return ret def unpackb(packed, object_hook=None, list_hook=None, use_list=True, - encoding=None, unicode_errors='strict', - object_pairs_hook=None): + encoding=None, unicode_errors='strict', + object_pairs_hook=None): """ Unpack an object from `packed`. Raises `ExtraData` when `packed` contains extra bytes. """ unpacker = Unpacker(None, object_hook=object_hook, list_hook=list_hook, - use_list=use_list, encoding=encoding, unicode_errors=unicode_errors, - object_pairs_hook=object_pairs_hook) + use_list=use_list, + encoding=encoding, unicode_errors=unicode_errors, + object_pairs_hook=object_pairs_hook) unpacker.feed(packed) ret = unpacker._fb_unpack() if unpacker._fb_got_extradata(): @@ -141,8 +143,8 @@ class Unpacker(object): """ def __init__(self, file_like=None, read_size=0, use_list=True, - object_hook=None, object_pairs_hook=None, list_hook=None, - encoding=None, unicode_errors='strict', max_buffer_size=0): + object_hook=None, object_pairs_hook=None, list_hook=None, + encoding=None, unicode_errors='strict', max_buffer_size=0): if file_like is None: self._fb_feeding = True else: @@ -174,8 +176,8 @@ class Unpacker(object): if object_pairs_hook is not None and not callable(object_pairs_hook): raise ValueError('`object_pairs_hook` is not callable') if object_hook is not None and object_pairs_hook is not None: - raise ValueError("object_pairs_hook and object_hook are mutually "+ - "exclusive") + raise ValueError("object_pairs_hook and object_hook are mutually " + "exclusive") def feed(self, next_bytes): if isinstance(next_bytes, array.array): @@ -394,7 +396,7 @@ class Unpacker(object): class Packer(object): def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, autoreset=True): + use_single_float=False, autoreset=True): self.use_float = use_single_float self.autoreset = autoreset self.encoding = encoding @@ -404,6 +406,7 @@ class Packer(object): if not callable(default): raise TypeError("default must be callable") self._default = default + def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT): if nest_limit < 0: raise PackValueError("recursion limit exceeded") @@ -461,10 +464,11 @@ class Packer(object): return if isinstance(obj, dict): return self._fb_pack_map_pairs(len(obj), obj.iteritems(), - nest_limit - 1) + nest_limit - 1) if self._default is not None: return self._pack(self._default(obj), nest_limit - 1) raise TypeError("Cannot serialize %r" % obj) + def pack(self, obj): self._pack(obj) ret = self.buffer.getvalue() @@ -473,6 +477,7 @@ class Packer(object): elif USING_STRINGBUILDER: self.buffer = StringIO(ret) return ret + def pack_map_pairs(self, pairs): self._fb_pack_map_pairs(len(pairs), pairs) ret = self.buffer.getvalue() @@ -481,6 +486,7 @@ class Packer(object): elif USING_STRINGBUILDER: self.buffer = StringIO(ret) return ret + def pack_array_header(self, n): self._fb_pack_array_header(n) ret = self.buffer.getvalue() @@ -489,6 +495,7 @@ class Packer(object): elif USING_STRINGBUILDER: self.buffer = StringIO(ret) return ret + def pack_map_header(self, n): self._fb_pack_map_header(n) ret = self.buffer.getvalue() @@ -497,6 +504,7 @@ class Packer(object): elif USING_STRINGBUILDER: self.buffer = StringIO(ret) return ret + def _fb_pack_array_header(self, n): if n <= 0x0f: return self.buffer.write(chr(0x90 + n)) @@ -505,6 +513,7 @@ class Packer(object): if n <= 0xffffffff: return self.buffer.write(struct.pack(">BI", 0xdd, n)) raise PackValueError("Array is too large") + def _fb_pack_map_header(self, n): if n <= 0x0f: return self.buffer.write(chr(0x80 + n)) @@ -513,12 +522,15 @@ class Packer(object): if n <= 0xffffffff: return self.buffer.write(struct.pack(">BI", 0xdf, n)) raise PackValueError("Dict is too large") + def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._fb_pack_map_header(n) for (k, v) in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) + def bytes(self): return self.buffer.getvalue() + def reset(self): self.buffer = StringIO() -- cgit v1.2.1 From cbabeebc95e9e42c0356e089b742588a4de75d56 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 29 Jan 2013 14:47:16 +0900 Subject: Use MSGPACK_PUREPYTHON envvar to test fallback module --- .travis.yml | 2 +- msgpack/__init__.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2e6fc56..e536fdc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,4 +12,4 @@ install: - cython --cplus msgpack/_packer.pyx - cython --cplus msgpack/_unpacker.pyx -script: "tox" +script: "tox && MSGPACK_PUREPYTHON=x tox" diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 49a32d9..77f6b81 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -2,11 +2,15 @@ from msgpack._version import version from msgpack.exceptions import * -try: - from msgpack._packer import pack, packb, Packer - from msgpack._unpacker import unpack, unpackb, Unpacker -except ImportError: +import os +if os.environ.get('MSGPACK_PUREPYTHON'): from msgpack.fallback import pack, packb, Packer, unpack, unpackb, Unpacker +else: + try: + from msgpack._packer import pack, packb, Packer + from msgpack._unpacker import unpack, unpackb, Unpacker + except ImportError: + from msgpack.fallback import pack, packb, Packer, unpack, unpackb, Unpacker # alias for compatibility to simplejson/marshal/pickle. load = unpack -- cgit v1.2.1 From 8d6a387dff10dd2150aa86cd96e2bece26546268 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 29 Jan 2013 15:10:22 +0900 Subject: fallback: Support Python 3. --- msgpack/fallback.py | 54 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 3d733f4..ac6dbf9 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -4,13 +4,28 @@ import sys import array import struct +if sys.version_info[0] == 3: + PY3 = True + int_types = int + Unicode = str + xrange = range + def dict_iteritems(d): + return d.items() +else: + PY3 = False + int_types = (int, long) + Unicode = unicode + def dict_iteritems(d): + return d.iteritems() + + if hasattr(sys, 'pypy_version_info'): # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own # StringBuilder is fastest. from __pypy__.builders import StringBuilder USING_STRINGBUILDER = True class StringIO(object): - def __init__(self, s=''): + def __init__(self, s=b''): if s: self.builder = StringBuilder(len(s)) self.builder.append(s) @@ -22,10 +37,7 @@ if hasattr(sys, 'pypy_version_info'): return self.builder.build() else: USING_STRINGBUILDER = False - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO + from io import BytesIO as StringIO from msgpack.exceptions import ( BufferFull, @@ -156,7 +168,7 @@ class Unpacker(object): self._fb_buf_o = 0 self._fb_buf_i = 0 self._fb_buf_n = 0 - self.max_buffer_size = (sys.maxint if max_buffer_size == 0 + self.max_buffer_size = (2**31-1 if max_buffer_size == 0 else max_buffer_size) self.read_size = (read_size if read_size != 0 else min(self.max_buffer_size, 2048)) @@ -221,7 +233,7 @@ class Unpacker(object): bufs = self._fb_buffers[self._fb_buf_i:] if bufs: bufs[0] = bufs[0][self._fb_buf_o:] - return ''.join(bufs) + return b''.join(bufs) def _fb_read(self, n, write_bytes=None): if (write_bytes is None and self._fb_buf_i < len(self._fb_buffers) @@ -229,7 +241,7 @@ class Unpacker(object): self._fb_buf_o += n return self._fb_buffers[self._fb_buf_i][ self._fb_buf_o-n:self._fb_buf_o] - ret = '' + ret = b'' while len(ret) != n: if self._fb_buf_i == len(self._fb_buffers): if self._fb_feeding: @@ -255,11 +267,12 @@ class Unpacker(object): def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): typ = TYPE_IMMEDIATE - b = ord(self._fb_read(1, write_bytes)) + c = self._fb_read(1, write_bytes) + b = ord(c) if b & 0b10000000 == 0: obj = b elif b & 0b11100000 == 0b11100000: - obj = struct.unpack("b", chr(b))[0] + obj = struct.unpack("b", c)[0] elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 obj = self._fb_read(n, write_bytes) @@ -374,6 +387,7 @@ class Unpacker(object): return ret except OutOfData: raise StopIteration + __next__ = next def skip(self, write_bytes=None): self._fb_unpack(EX_SKIP, write_bytes) @@ -411,12 +425,12 @@ class Packer(object): if nest_limit < 0: raise PackValueError("recursion limit exceeded") if obj is None: - return self.buffer.write(chr(0xc0)) + return self.buffer.write(b"\xc0") if isinstance(obj, bool): if obj: - return self.buffer.write(chr(0xc3)) - return self.buffer.write(chr(0xc2)) - if isinstance(obj, int) or isinstance(obj, long): + return self.buffer.write(b"\xc3") + return self.buffer.write(b"\xc2") + if isinstance(obj, int_types): if 0 <= obj < 0x80: return self.buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -438,12 +452,12 @@ class Packer(object): if -0x8000000000000000 <= obj < -0x80000000: return self.buffer.write(struct.pack(">Bq", 0xd3, obj)) raise PackValueError("Integer value out of range") - if isinstance(obj, str) or isinstance(obj, unicode): - if isinstance(obj, unicode): + if isinstance(obj, (Unicode, bytes)): + if isinstance(obj, Unicode): obj = obj.encode(self.encoding, self.unicode_errors) n = len(obj) if n <= 0x1f: - self.buffer.write(chr(0xa0 + n)) + self.buffer.write(struct.pack('B', 0xa0 + n)) return self.buffer.write(obj) if n <= 0xffff: self.buffer.write(struct.pack(">BH", 0xda, n)) @@ -463,7 +477,7 @@ class Packer(object): self._pack(obj[i], nest_limit - 1) return if isinstance(obj, dict): - return self._fb_pack_map_pairs(len(obj), obj.iteritems(), + return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) if self._default is not None: return self._pack(self._default(obj), nest_limit - 1) @@ -507,7 +521,7 @@ class Packer(object): def _fb_pack_array_header(self, n): if n <= 0x0f: - return self.buffer.write(chr(0x90 + n)) + return self.buffer.write(struct.pack('B', 0x90 + n)) if n <= 0xffff: return self.buffer.write(struct.pack(">BH", 0xdc, n)) if n <= 0xffffffff: @@ -516,7 +530,7 @@ class Packer(object): def _fb_pack_map_header(self, n): if n <= 0x0f: - return self.buffer.write(chr(0x80 + n)) + return self.buffer.write(struct.pack('B', 0x80 + n)) if n <= 0xffff: return self.buffer.write(struct.pack(">BH", 0xde, n)) if n <= 0xffffffff: -- cgit v1.2.1