From d56e2b2c8aa1005fbac3b584cd003ba0cdece2e2 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 25 Sep 2012 00:30:15 +1000 Subject: Use C++ function templating for skip()/construct() --- msgpack/_msgpack.pyx | 23 +++++++++++------------ msgpack/unpack.h | 1 + msgpack/unpack_template.h | 21 +++++++++++++-------- setup.py | 4 ++-- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx index e0a1043..0fc3739 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_msgpack.pyx @@ -208,8 +208,10 @@ cdef extern from "unpack.h": unsigned int ct PyObject* key - int template_execute(template_context* ctx, const_char_ptr data, - size_t len, size_t* off, bint construct) except -1 + ctypedef int (*execute_fn)(template_context* ctx, const_char_ptr data, + size_t len, size_t* off) except -1 + execute_fn template_construct + execute_fn template_skip void template_init(template_context* ctx) object template_data(template_context* ctx) @@ -257,7 +259,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, if not PyCallable_Check(list_hook): raise TypeError("list_hook must be a callable.") ctx.user.list_hook = list_hook - ret = template_execute(&ctx, buf, buf_len, &off, 1) + ret = template_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = template_data(&ctx) if off < buf_len: @@ -455,16 +457,13 @@ cdef class Unpacker(object): else: self.file_like = None - cdef object _unpack(self, bint construct): + cdef object _unpack(self, execute_fn execute): cdef int ret cdef object obj while 1: - ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head, construct) + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) if ret == 1: - if construct: - obj = template_data(&self.ctx) - else: - obj = None + obj = template_data(&self.ctx) template_init(&self.ctx) return obj elif ret == 0: @@ -477,17 +476,17 @@ cdef class Unpacker(object): def unpack(self): """unpack one object""" - return self._unpack(1) + return self._unpack(template_construct) def skip(self): """read and ignore one object, returning None""" - return self._unpack(0) + return self._unpack(template_skip) def __iter__(self): return self def __next__(self): - return self._unpack(1) + return self._unpack(template_construct) # for debug. #def _buf(self): diff --git a/msgpack/unpack.h b/msgpack/unpack.h index a106f9c..3c9d4be 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -41,6 +41,7 @@ typedef struct unpack_user { #define msgpack_unpack_user unpack_user +typedef int (*execute_fn)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off); struct template_context; typedef struct template_context template_context; diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 5495a51..e0cf42e 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -95,7 +95,8 @@ msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context } -msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off, int construct) +template +msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) { assert(len >= *off); @@ -380,6 +381,8 @@ _header_again: _finish: + if (!construct) + msgpack_unpack_callback(_nil)(user, &obj); stack[0].obj = obj; ++p; ret = 1; @@ -405,13 +408,6 @@ _end: #undef construct_cb } - -#undef msgpack_unpack_func -#undef msgpack_unpack_callback -#undef msgpack_unpack_struct -#undef msgpack_unpack_object -#undef msgpack_unpack_user - #undef push_simple_value #undef push_fixed_value #undef push_variable_value @@ -419,6 +415,15 @@ _end: #undef again_fixed_trail_if_zero #undef start_container +static const execute_fn template_construct = &template_execute; +static const execute_fn template_skip = &template_execute; + +#undef msgpack_unpack_func +#undef msgpack_unpack_callback +#undef msgpack_unpack_struct +#undef msgpack_unpack_object +#undef msgpack_unpack_user + #undef NEXT_CS /* vim: set ts=4 sw=4 noexpandtab */ diff --git a/setup.py b/setup.py index 86b0b34..708fa13 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ except ImportError: def cythonize(src): sys.stderr.write("cythonize: %r\n" % (src,)) - cython_compiler.compile([src]) + cython_compiler.compile([src], cplus=True) def ensure_source(src): pyx = os.path.splitext(src)[0] + '.pyx' @@ -67,7 +67,7 @@ if have_cython: else: Sdist = sdist -sources = ['msgpack/_msgpack.c'] +sources = ['msgpack/_msgpack.cpp'] libraries = [] if sys.platform == 'win32': libraries.append('ws2_32') -- cgit v1.2.1 From 0431a766f4e069d74627441aa3facbc7e64e4511 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 25 Sep 2012 01:18:33 +1000 Subject: read_array/map_header functionality --- msgpack/_msgpack.pyx | 10 +++++++ msgpack/unpack_template.h | 63 ++++++++++++++++++++++++++++++++++++++++++++ test/test_read_size.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 test/test_read_size.py diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx index 0fc3739..7131d1f 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_msgpack.pyx @@ -212,6 +212,8 @@ cdef extern from "unpack.h": size_t len, size_t* off) except -1 execute_fn template_construct execute_fn template_skip + execute_fn read_array_header + execute_fn read_map_header void template_init(template_context* ctx) object template_data(template_context* ctx) @@ -482,6 +484,14 @@ cdef class Unpacker(object): """read and ignore one object, returning None""" return self._unpack(template_skip) + def read_array_header(self): + """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents.""" + return self._unpack(read_array_header) + + def read_map_header(self): + """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs.""" + return self._unpack(read_map_header) + def __iter__(self): return self diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index e0cf42e..69ef6e2 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -408,6 +408,10 @@ _end: #undef construct_cb } +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END #undef push_simple_value #undef push_fixed_value #undef push_variable_value @@ -415,8 +419,67 @@ _end: #undef again_fixed_trail_if_zero #undef start_container +template +msgpack_unpack_func(int, _container_header)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + msgpack_unpack_callback(_uint32)(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END + static const execute_fn template_construct = &template_execute; static const execute_fn template_skip = &template_execute; +static const execute_fn read_array_header = &template_container_header<0x90, 0xdc>; +static const execute_fn read_map_header = &template_container_header<0x80, 0xde>; #undef msgpack_unpack_func #undef msgpack_unpack_callback diff --git a/test/test_read_size.py b/test/test_read_size.py new file mode 100644 index 0000000..714f963 --- /dev/null +++ b/test/test_read_size.py @@ -0,0 +1,66 @@ +"""Test Unpacker's read_array_header and read_map_header methods""" +from msgpack import packb, Unpacker +UnexpectedTypeException = ValueError + +def test_read_array_header(): + unpacker = Unpacker() + unpacker.feed(packb(['a', 'b', 'c'])) + assert unpacker.read_array_header() == 3 + assert unpacker.unpack() == 'a' + assert unpacker.unpack() == 'b' + assert unpacker.unpack() == 'c' + try: + unpacker.unpack() + assert 0, 'should raise exception' + except StopIteration: + assert 1, 'okay' + + +def test_read_map_header(): + unpacker = Unpacker() + unpacker.feed(packb({'a': 'A'})) + assert unpacker.read_map_header() == 1 + assert unpacker.unpack() == 'a' + assert unpacker.unpack() == 'A' + try: + unpacker.unpack() + assert 0, 'should raise exception' + except StopIteration: + assert 1, 'okay' + +def test_incorrect_type_array(): + unpacker = Unpacker() + unpacker.feed(packb(1)) + try: + unpacker.read_array_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + +def test_incorrect_type_map(): + unpacker = Unpacker() + unpacker.feed(packb(1)) + try: + unpacker.read_map_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + +def test_correct_type_nested_array(): + unpacker = Unpacker() + unpacker.feed(packb({'a': ['b', 'c', 'd']})) + try: + unpacker.read_array_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + +def test_incorrect_type_nested_map(): + unpacker = Unpacker() + unpacker.feed(packb([{'a': 'b'}])) + try: + unpacker.read_map_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + -- cgit v1.2.1 From 9d9c3eecb846c6a927a31aae394dea39fa75aef4 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sun, 23 Sep 2012 17:26:16 +1000 Subject: Packer.pack_array/map_header to correspond to read functions --- msgpack/_msgpack.pyx | 11 +++++++++++ test/test_pack.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx index 7131d1f..18a75ca 100644 --- a/msgpack/_msgpack.pyx +++ b/msgpack/_msgpack.pyx @@ -178,6 +178,17 @@ cdef class Packer(object): self.pk.length = 0 return buf + cpdef pack_array_header(self, size_t size): + msgpack_pack_array(&self.pk, size) + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + cpdef pack_map_header(self, size_t size): + msgpack_pack_map(&self.pk, size) + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): """ diff --git a/test/test_pack.py b/test/test_pack.py index b216c46..937141d 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -91,6 +91,35 @@ def testPackFloat(): assert_equal(packb(1.0, use_single_float=True), b'\xca' + struct.pack('>f', 1.0)) assert_equal(packb(1.0, use_single_float=False), b'\xcb' + struct.pack('>d', 1.0)) +def testArraySize(sizes=[0, 5, 50, 1000]): + bio = six.BytesIO() + packer = Packer() + for size in sizes: + bio.write(packer.pack_array_header(size)) + for i in range(size): + bio.write(packer.pack(i)) + + bio.seek(0) + unpacker = Unpacker(bio) + for size in sizes: + assert unpacker.unpack() == tuple(range(size)) + +def testMapSize(sizes=[0, 5, 50, 1000]): + bio = six.BytesIO() + packer = Packer() + for size in sizes: + bio.write(packer.pack_map_header(size)) + for i in range(size): + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value + + bio.seek(0) + unpacker = Unpacker(bio) + for size in sizes: + assert unpacker.unpack() == {i: i * 2 for i in range(size)} + + + class odict(dict): '''Reimplement OrderedDict to run test on Python 2.6''' -- cgit v1.2.1