diff options
| author | INADA Naoki <inada-n@klab.com> | 2012-12-11 22:17:36 +0900 |
|---|---|---|
| committer | INADA Naoki <inada-n@klab.com> | 2012-12-11 22:17:36 +0900 |
| commit | 685026d2e1e2c014118954781d7e30bcf2ba3038 (patch) | |
| tree | 6dd5d92c809ec2c94c4effcff46617513be87d32 | |
| parent | 171145e56290a2254d6b648d438dde33cd631fc4 (diff) | |
| parent | 1c5b865db36a9cd2db5eb8de7625573a78085c56 (diff) | |
| download | msgpack-python-685026d2e1e2c014118954781d7e30bcf2ba3038.tar.gz | |
Split _msgpack.pyx (fix #34)
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | msgpack/__init__.py | 4 | ||||
| -rw-r--r-- | msgpack/_packer.pyx | 259 | ||||
| -rw-r--r-- | msgpack/_unpacker.pyx (renamed from msgpack/_msgpack.pyx) | 249 | ||||
| -rw-r--r-- | msgpack/exceptions.py | 6 | ||||
| -rw-r--r-- | setup.py | 25 |
6 files changed, 285 insertions, 261 deletions
@@ -7,6 +7,5 @@ dist/* *.so *~ msgpack/__version__.py -msgpack/_msgpack.c -msgpack/_msgpack.cpp +msgpack/*.cpp *.egg-info diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 98b1ab7..0bfe6fe 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,6 +1,8 @@ # coding: utf-8 from msgpack._version import version -from msgpack._msgpack import * +from msgpack.exceptions import * +from msgpack._packer import pack, packb, Packer +from msgpack._unpacker import unpack, unpackb, Unpacker # alias for compatibility to simplejson/marshal/pickle. load = unpack diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx new file mode 100644 index 0000000..a5bc570 --- /dev/null +++ b/msgpack/_packer.pyx @@ -0,0 +1,259 @@ +# coding: utf-8 +#cython: embedsignature=True + +from cpython cimport * +cdef extern from "Python.h": + ctypedef char* const_char_ptr "const char*" + ctypedef char* const_void_ptr "const void*" + ctypedef struct PyObject + cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1 + +from libc.stdlib cimport * +from libc.string cimport * +from libc.limits cimport * + +from msgpack.exceptions import PackValueError + +cdef extern from "pack.h": + struct msgpack_packer: + char* buf + size_t length + size_t buf_size + + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_float(msgpack_packer* pk, float d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + +cdef int DEFAULT_RECURSE_LIMIT=511 + + + +cdef class Packer(object): + """MessagePack Packer + + usage: + + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) + + Packer's constructor has some keyword arguments: + + * *defaut* - Convert user type to builtin type that Packer supports. + See also simplejson's document. + * *encoding* - Convert unicode to bytes with this encoding. (default: 'utf-8') + * *unicode_erros* - Error handler for encoding unicode. (default: 'strict') + * *use_single_float* - Use single precision float type for float. (default: False) + * *autoreset* - Reset buffer after each pack and return it's content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + """ + cdef msgpack_packer pk + cdef object _default + cdef object _bencoding + cdef object _berrors + cdef char *encoding + cdef char *unicode_errors + cdef bool use_float + cdef bint autoreset + + def __cinit__(self): + cdef int buf_size = 1024*1024 + self.pk.buf = <char*> malloc(buf_size); + if self.pk.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.pk.buf_size = buf_size + self.pk.length = 0 + + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, bint autoreset=1): + self.use_float = use_single_float + self.autoreset = autoreset + if default is not None: + if not PyCallable_Check(default): + raise TypeError("default must be a callable.") + self._default = default + if encoding is None: + self.encoding = NULL + self.unicode_errors = NULL + else: + if isinstance(encoding, unicode): + self._bencoding = encoding.encode('ascii') + else: + self._bencoding = encoding + self.encoding = PyBytes_AsString(self._bencoding) + if isinstance(unicode_errors, unicode): + self._berrors = unicode_errors.encode('ascii') + else: + self._berrors = unicode_errors + self.unicode_errors = PyBytes_AsString(self._berrors) + + def __dealloc__(self): + free(self.pk.buf); + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef long long llval + cdef unsigned long long ullval + cdef long longval + cdef float fval + cdef double dval + cdef char* rawval + cdef int ret + cdef dict d + + if nest_limit < 0: + raise PackValueError("recursion limit exceeded.") + + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif isinstance(o, bool): + if o: + ret = msgpack_pack_true(&self.pk) + else: + ret = msgpack_pack_false(&self.pk) + elif PyLong_Check(o): + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + elif PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytes_Check(o): + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyUnicode_Check(o): + if not self.encoding: + raise TypeError("Can't encode unicode string: no encoding is specified") + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyDict_CheckExact(o): + d = <dict>o + ret = msgpack_pack_map(&self.pk, len(d)) + if ret == 0: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyDict_Check(o): + ret = msgpack_pack_map(&self.pk, len(o)) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyTuple_Check(o) or PyList_Check(o): + ret = msgpack_pack_array(&self.pk, len(o)) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif self._default: + o = self._default(o) + ret = self._pack(o, nest_limit-1) + else: + raise TypeError("can't serialize %r" % (o,)) + return ret + + cpdef pack(self, object obj): + cdef int ret + ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) + if ret == -1: + raise MemoryError + elif ret: # should not happen. + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_array_header(self, size_t size): + cdef int ret = msgpack_pack_array(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_map_header(self, size_t size): + cdef int ret = msgpack_pack_map(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_map_pairs(self, object pairs): + """ + Pack *pairs* as msgpack map type. + + *pairs* should sequence of pair. + (`len(pairs)` and `for k, v in *pairs*:` should be supported.) + """ + cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) + if ret == 0: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def reset(self): + """Clear internal buffer.""" + self.pk.length = 0 + + def bytes(self): + """Return buffer content.""" + return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + + +def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): + """ + pack an object `o` and write it to stream).""" + packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) + stream.write(packer.pack(o)) + +def packb(object o, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False): + """ + pack o and return packed bytes.""" + packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, + use_single_float=use_single_float) + return packer.pack(o) diff --git a/msgpack/_msgpack.pyx b/msgpack/_unpacker.pyx index d58255e..4a27d9e 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_unpacker.pyx @@ -7,37 +7,11 @@ cdef extern from "Python.h": ctypedef char* const_void_ptr "const void*" ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1 - char* __FILE__ - int __LINE__ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * - -cdef extern from "pack.h": - struct msgpack_packer: - char* buf - size_t length - size_t buf_size - - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - -cdef int DEFAULT_RECURSE_LIMIT=511 - - from msgpack.exceptions import ( BufferFull, OutOfData, @@ -46,226 +20,6 @@ from msgpack.exceptions import ( ) -cdef class Packer(object): - """MessagePack Packer - - usage: - - packer = Packer() - astream.write(packer.pack(a)) - astream.write(packer.pack(b)) - - Packer's constructor has some keyword arguments: - - * *defaut* - Convert user type to builtin type that Packer supports. - See also simplejson's document. - * *encoding* - Convert unicode to bytes with this encoding. (default: 'utf-8') - * *unicode_erros* - Error handler for encoding unicode. (default: 'strict') - * *use_single_float* - Use single precision float type for float. (default: False) - * *autoreset* - Reset buffer after each pack and return it's content as `bytes`. (default: True). - If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. - """ - cdef msgpack_packer pk - cdef object _default - cdef object _bencoding - cdef object _berrors - cdef char *encoding - cdef char *unicode_errors - cdef bool use_float - cdef bint autoreset - - def __cinit__(self): - cdef int buf_size = 1024*1024 - self.pk.buf = <char*> malloc(buf_size); - if self.pk.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.pk.buf_size = buf_size - self.pk.length = 0 - - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, bint autoreset=1): - self.use_float = use_single_float - self.autoreset = autoreset - if default is not None: - if not PyCallable_Check(default): - raise TypeError("default must be a callable.") - self._default = default - if encoding is None: - self.encoding = NULL - self.unicode_errors = NULL - else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) - - def __dealloc__(self): - free(self.pk.buf); - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: - cdef long long llval - cdef unsigned long long ullval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d - - if nest_limit < 0: - raise UnpackValueError("recursion limit exceeded.") - - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) - else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: no encoding is specified") - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyDict_CheckExact(o): - d = <dict>o - ret = msgpack_pack_map(&self.pk, len(d)) - if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyDict_Check(o): - ret = msgpack_pack_map(&self.pk, len(o)) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyTuple_Check(o) or PyList_Check(o): - ret = msgpack_pack_array(&self.pk, len(o)) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif self._default: - o = self._default(o) - ret = self._pack(o, nest_limit-1) - else: - raise TypeError("can't serialize %r" % (o,)) - return ret - - cpdef pack(self, object obj): - cdef int ret - ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) - if ret == -1: - raise MemoryError - elif ret: # should not happen. - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_array_header(self, size_t size): - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_map_header(self, size_t size): - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_map_pairs(self, object pairs): - """ - Pack *pairs* as msgpack map type. - - *pairs* should sequence of pair. - (`len(pairs)` and `for k, v in *pairs*:` should be supported.) - """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def reset(self): - """Clear internal buffer.""" - self.pk.length = 0 - - def bytes(self): - """Return buffer content.""" - return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - - -def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): - """ - pack an object `o` and write it to stream).""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) - stream.write(packer.pack(o)) - -def packb(object o, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False): - """ - pack o and return packed bytes.""" - packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, - use_single_float=use_single_float) - return packer.pack(o) - cdef extern from "unpack.h": ctypedef struct msgpack_user: @@ -360,7 +114,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj else: - return None + raise UnpackValueError def unpack(object stream, object object_hook=None, object list_hook=None, @@ -626,4 +380,3 @@ cdef class Unpacker(object): #def _off(self): # return self.buf_head - diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index 0a75430..2565541 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -21,3 +21,9 @@ class ExtraData(ValueError): def __str__(self): return "unpack(b) recieved extra data." + +class PackException(Exception): + pass + +class PackValueError(PackException, ValueError): + pass @@ -2,7 +2,6 @@ # coding: utf-8 import os import sys -import shutil from glob import glob from distutils.command.sdist import sdist from setuptools import setup, Extension @@ -59,7 +58,6 @@ if have_cython: else: Sdist = sdist -sources = ['msgpack/_msgpack.cpp'] libraries = [] if sys.platform == 'win32': libraries.append('ws2_32') @@ -69,13 +67,20 @@ if sys.byteorder == 'big': else: macros = [('__LITTLE_ENDIAN__', '1')] -msgpack_mod = Extension('msgpack._msgpack', - sources=sources, - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - ) -del sources, libraries, macros +ext_modules = [] +ext_modules.append(Extension('msgpack._packer', + sources=['msgpack/_packer.cpp'], + libraries=libraries, + include_dirs=['.'], + define_macros=macros, + )) +ext_modules.append(Extension('msgpack._unpacker', + sources=['msgpack/_unpacker.cpp'], + libraries=libraries, + include_dirs=['.'], + define_macros=macros, + )) +del libraries, macros desc = 'MessagePack (de)serializer.' @@ -89,7 +94,7 @@ setup(name='msgpack-python', author_email='songofacandy@gmail.com', version=version_str, cmdclass={'build_ext': BuildExt, 'sdist': Sdist}, - ext_modules=[msgpack_mod], + ext_modules=ext_modules, packages=['msgpack'], description=desc, long_description=long_desc, |
