diff options
-rw-r--r-- | doc/Py3K.txt | 513 | ||||
-rw-r--r-- | numpy/core/src/multiarray/descriptor.c | 263 |
2 files changed, 693 insertions, 83 deletions
diff --git a/doc/Py3K.txt b/doc/Py3K.txt new file mode 100644 index 000000000..a099a67dd --- /dev/null +++ b/doc/Py3K.txt @@ -0,0 +1,513 @@ +****************************************** +Notes on making the transition to Python 3 +****************************************** + +General +======= + +Resources +--------- + +Information on porting to 3K: + +- http://wiki.python.org/moin/cporting +- http://wiki.python.org/moin/PortingExtensionModulesToPy3k + +Git trees +--------- + +- http://github.com/pv/numpy-work/commits/py3k +- http://github.com/illume/numpy3k/commits/work + +Prerequisites +------------- + +The Nose test framework has currently (Nov 2009) no released Python 3 +compatible version. Its 3K SVN branch, however, works quite well: + +- http://python-nose.googlecode.com/svn/branches/py3k + + +Semantic changes +================ + +We make the following semantic changes: + +* division: integer division is by default true_divide, also for arrays +* dtype fields: 'a' and b'a' are different fields + + +Python code +=========== + + +What we do now +-------------- + +2to3 in setup.py + + Currently, setup.py calls 2to3 automatically to convert Python sources + to Python 3 ones, and stores the results under:: + + build/py3k + + Only changed files will be re-converted when setup.py is called a second + time, making development much faster. + + Currently, this seems to handle most (all?) of the necessary Python + code conversion. + +numpy.compat.py3k + + There are some utility functions needed for 3K compatibility in + ``numpy.compat.py3k`` -- they can be imported from ``numpy.compat``. + More can be added as needed. + + +Syntax changes +-------------- + +Code that wants to cater for both Python2 and Python3 needs to take +at least the following into account: + +1) "except FooException, bar:" -> "except FooException as bar:" + +2) "from localmodule import foo" + + Syntax for relative imports has changed and is incompatible between + Python 2.4 and Python 3. The only way seems to use absolute imports + throughout. + +3) "print foo, bar" -> "print(foo, bar)" + + Print is no longer a statement. + + +C Code +====== + +What has been done so far, and some known TODOs +----------------------------------------------- + +private/npy_3kcompat.h + + Convenience macros for Python 3 support. + New ones that need to be added should be added in this file. + +ob_type etc. + + These use Py_SIZE, etc. macros now. The macros are also defined in + npy_3kcompat.h for the Python versions that don't have them natively. + +PyNumberMethod + + The structures have been converted to the new format. + + TODO: check if semantics of the methods have changed + +PyBuffer_* + + These parts have been replaced with stub code, marked by #warning XXX + + TODO: implement the new buffer protocol: for scalars and arrays + + - generate format strings from dtype + - parse format strings? + - Py_Ssize_t for strides and shape? + + TODO: decide what to do with the fact that PyMemoryView object is not + stand-alone. Do we need a separate "dummy" object? + +PyString + + PyString is currently defined to PyBytes in npy_3kcompat.h. + + Decisions: + + * field names are Unicode + + * field titles can be arbitrary objects. + If they are Unicode, insert to fields dict. + + * dtype strings are Unicode. + + * datetime tuple contains Unicode. + + * Exceptions should preferably be ASCII-only -> use AsUnicodeEscape + + + TODO: Are exception strings bytes or unicode? What about tp_doc? + + Fix lib/src/_compiled_base accordingly. + + TODO: I have a feeling that we should avoid PyUnicode_AsUTF8EncodedString + wherever possible... + + TODO: Decide on a policy between Unicode and Bytes + + a) what is allowed for the user to pass in: which one or both? + b) what is the internal format: which one or both? + c) if we do conversions, what is the encoding? + (anything apart from utf-8 or ascii does not make sense, imho) + + Some instances: + + - dtype field names (if both, which is the default?) + If unicode, what to do with serialization to npy files etc.? + force utf8? + + - dtype field titles (probably can be arbitrary object) + + - dtype format strings ('i4', '|S7' etc.) + + TODO: Replace all occurrences of String by Bytes or Unicode, to ensure + that we have made a conscious choice for each case in Py3K. + + #define PyBytes -> PyString for Python 2 in npy_3kcompath.h + + Finally remove the PyString -> PyBytes defines from npy_3kcompat.h + This is probably the *easiest* way to make sure all of + the string/unicode transition has been audited. + + The String/Unicode transition is simply too dangerous to handle + by a blanket replacement. + +PyInt + + PyInt is currently replaced by PyLong, via macros in npy_3kcompat.h + + Dtype decision rules were changed accordingly, so Numpy understands + Python int to be dtype-compatible with NPY_LONG. + + TODO: Decide on + + ... what is: array([1]).dtype + ... what is: array([2**40]).dtype + ... what is: array([2**256]).dtype + ... what is: array([1]) + 2**40 + ... what is: array([1]) + 2**256 + + ie. dtype casting rules. It seems to <pv> that we will want to + fix the dtype of Python 3 int to be the machine integer size, + despite the fact that the actual Python 3 object is not fixed-size. + + TODO: Audit the automatic dtype decision -- did I plug all the cases? + +Divide + + The Divide operation is no more. + + So we change array(1) / 10 == array(0.1) + +tp_compare + + The compare method has vanished. + + TODO: ensure that all types that had only tp_compare have also + tp_richcompare. + + +PyTypeObject +------------ + +The PyTypeObject of py3k is binary compatible with the py2k version and the +old initializers should work. However, there are several considerations to +keep in mind. + +1) Because the first three slots are now part of a struct some compilers issue + warnings if they are initialized in the old way. + + In practice, it is necessary to use the Py_TYPE, Py_SIZE, Py_REFCNT + macros instead of accessing ob_type, ob_size and ob_refcnt + directly. These are defined for backward compatibility in + private/npy_3kcompat.h + +2) The compare slot has been made reserved in order to preserve binary + compatibily while the tp_compare function went away. The tp_richcompare + function has replaced it and we need to use that slot instead. This will + likely require modifications in the searchsorted functions and generic sorts + that currently use the compare function. + +3) The previous numpy practice of initializing the COUNT_ALLOCS slots was + bogus. They are not supposed to be explicitly initialized and were out of + place in any case because an extra base slot was added in python 2.6. + +Because of these facts it was thought better to use #ifdefs to bring the old +initializers up to py3k snuff rather than just fill the tp_richcompare slot. +They also serve to mark the places where changes have been made. The new form +is shown below. Note that explicit initialization can stop once none of the +remaining entries are non-zero, because zero is the default value that +variables with non-local linkage receive. + + +NPY_NO_EXPORT PyTypeObject Foo_Type = { +#if defined(NPY_PY3K) + PyVarObject_HEAD_INIT(0,0) +#else + PyObject_HEAD_INIT(0) + 0, /* ob_size */ +#endif + "numpy.foo" /* tp_name */ + 0, /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ +#if defined(NPY_PY3K) + (void *)0, /* tp_reserved */ +#else + 0, /* tp_compare */ +#endif + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0 /* tp_version_tag (2.6) */ +}; + +checklist of types having tp_compare but no tp_richcompare + +1) multiarray/flagsobject.c + +PyNumberMethods +--------------- + +Types with tp_as_number defined + +1) multiarray/arrayobject.c + +The slots np_divide, np_long, np_oct, np_hex, and np_inplace_divide +have gone away. The slot np_int is what np_long used to be, tp_divide +is now tp_floor_divide, and np_inplace_divide is now +np_inplace_floor_divide. We will also have to make sure the +*_true_divide variants are defined. This should also be done for +python < 3.x, but that introduces a requirement for the +Py_TPFLAGS_HAVE_CLASS in the type flag. + +/* + * Number implementations must check *both* arguments for proper type and + * implement the necessary conversions in the slot functions themselves. +*/ +PyNumberMethods foo_number_methods = { + (binaryfunc)0, /* nb_add */ + (binaryfunc)0, /* nb_subtract */ + (binaryfunc)0, /* nb_multiply */ + (binaryfunc)0, /* nb_remainder */ + (binaryfunc)0, /* nb_divmod */ + (ternaryfunc)0, /* nb_power */ + (unaryfunc)0, /* nb_negative */ + (unaryfunc)0, /* nb_positive */ + (unaryfunc)0, /* nb_absolute */ + (inquiry)0, /* nb_bool, nee nb_nonzero */ + (unaryfunc)0, /* nb_invert */ + (binaryfunc)0, /* nb_lshift */ + (binaryfunc)0, /* nb_rshift */ + (binaryfunc)0, /* nb_and */ + (binaryfunc)0, /* nb_xor */ + (binaryfunc)0, /* nb_or */ + (unaryfunc)0, /* nb_int */ + (void *)0, /* nb_reserved, nee nb_long */ + (unaryfunc)0, /* nb_float */ + (binaryfunc)0, /* nb_inplace_add */ + (binaryfunc)0, /* nb_inplace_subtract */ + (binaryfunc)0, /* nb_inplace_multiply */ + (binaryfunc)0, /* nb_inplace_remainder */ + (ternaryfunc)0, /* nb_inplace_power */ + (binaryfunc)0, /* nb_inplace_lshift */ + (binaryfunc)0, /* nb_inplace_rshift */ + (binaryfunc)0, /* nb_inplace_and */ + (binaryfunc)0, /* nb_inplace_xor */ + (binaryfunc)0, /* nb_inplace_or */ + (binaryfunc)0, /* nb_floor_divide */ + (binaryfunc)0, /* nb_true_divide */ + (binaryfunc)0, /* nb_inplace_floor_divide */ + (binaryfunc)0, /* nb_inplace_true_divide */ + (unaryfunc)0 /* nb_index */ +}; + +PySequenceMethods +----------------- + +Types with tp_as_sequence defined + +1) multiarray/descriptor.c +2) multiarray/scalartypes.c.src +3) multiarray/arrayobject.c + +PySequenceMethods in py3k are binary compatible with py2k, but some of the +slots have gone away. I suspect this means some functions need redefining so +the semantics of the slots needs to be checked. + +PySequenceMethods foo_sequence_methods = { + (lenfunc)0, /* sq_length */ + (binaryfunc)0, /* sq_concat */ + (ssizeargfunc)0, /* sq_repeat */ + (ssizeargfunc)0, /* sq_item */ + (void *)0, /* nee sq_slice */ + (ssizeobjargproc)0, /* sq_ass_item */ + (void *)0, /* nee sq_ass_slice */ + (objobjproc)0, /* sq_contains */ + (binaryfunc)0, /* sq_inplace_concat */ + (ssizeargfunc)0 /* sq_inplace_repeat */ +}; + +PyMappingMethods +---------------- + +Types with tp_as_mapping defined + +1) multiarray/descriptor.c +2) multiarray/iterators.c +3) multiarray/scalartypes.c.src +4) multiarray/flagsobject.c +5) multiarray/arrayobject.c + +PyMappingMethods in py3k look to be the same as in py2k. The semantics +of the slots needs to be checked. + +PyMappingMethods foo_mapping_methods = { + (lenfunc)0, /* mp_length */ + (binaryfunc)0, /* mp_subscript */ + (objobjargproc)0 /* mp_ass_subscript */ +}; + + +PyBuffer +-------- + +Parts involving the PyBuffer_* likely require the most work, and they +are widely spread in multiarray: + +1) The void scalar makes use of buffers +2) Multiarray has methods for creating buffers etc. explicitly +3) Arrays can be created from buffers etc. +4) The .data attribute of an array is a buffer + +There are two things to note in 3K: + +1) The buffer protocol has changed. It is also now quite complicated, + and implementing it properly requires several pieces. + +2) There is no PyBuffer object any more. Instead, a MemoryView + object is present, but it always must piggy-pack on another existing + object. + +Currently, what has been done is: + +1) Replace protocol implementations with stubs that either raise errors + or offer limited functionality. + +2) Replace PyBuffer usage by PyMemoryView where possible. + +3) ... and where not possible, use stubs that raise errors. + +What likely needs to be done is: + +1) Implement a simple "stub" compatibility buffer object + the memoryview can piggy-pack on. + + +PyNumber_Divide +--------------- + +This function has vanished -- needs to be replaced with PyNumber_TrueDivide +or FloorDivide. + +PyFile +------ + +Many of the PyFile items have disappeared: + +1) PyFile_Type +2) PyFile_AsFile +3) PyFile_FromString + +Compatibility wrappers for these are now in private/npy_3kcompat.h + + +PyString +-------- + +PyString was removed, and needs to be replaced either by PyBytes or PyUnicode. +The plan of attack currently is: + +1) The 'string' array dtype will be replaced by Bytes +2) The 'unicode' array dtype will stay Unicode +3) dtype fields names can be *either* Bytes or Unicode + +Some compatibility wrappers are defined in private/npy_3kcompat.h, +redefining essentially String as Bytes. + +However, at least following points need still to be audited: + +1) PyObject_Str -> it now returns unicodes +2) tp_doc -> char* string, but is it in unicode or what? + + +RO +-- + +The RO alias for READONLY is no more. + + +Py_TPFLAGS_CHECKTYPES +--------------------- + +This has vanished and is always on in Py3K. + + +PyInt +----- + +There is no limited-range integer type any more in Py3K. + +Currently, the plan is the following: + +1) Numpy's integer types no longer inherit from Python integer. +2) Convert Longs to integers, if their size is small enough and known. +3) Otherwise, use long longs. + + +PyOS +---- + +Deprecations: + +1) PyOS_ascii_strtod -> PyOS_double_from_string; + curiously enough, PyOS_ascii_strtod is not only deprecated but also + causes segfaults diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index b9fbd4175..aed22f619 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -270,7 +270,6 @@ _convert_from_array_descr(PyObject *obj, int align) int dtypeflags = 0; int maxalign = 0; - n = PyList_GET_SIZE(obj); nameslist = PyTuple_New(n); if (!nameslist) { @@ -284,7 +283,7 @@ _convert_from_array_descr(PyObject *obj, int align) goto fail; } name = PyTuple_GET_ITEM(item, 0); - if (PyString_Check(name)) { + if (PyUString_Check(name)) { title = NULL; } else if (PyTuple_Check(name)) { @@ -293,26 +292,42 @@ _convert_from_array_descr(PyObject *obj, int align) } title = PyTuple_GET_ITEM(name, 0); name = PyTuple_GET_ITEM(name, 1); - if (!PyString_Check(name)) { + if (!PyUString_Check(name)) { goto fail; } } else { goto fail; } - if (PyString_GET_SIZE(name)==0) { + + /* Insert name into nameslist */ + Py_INCREF(name); + + if (PyUString_GET_SIZE(name) == 0) { + Py_DECREF(name); if (title == NULL) { - name = PyString_FromFormat("f%d", i); + name = PyUString_FromFormat("f%d", i); + } +#if defined(NPY_PY3K) + /* On Py3, allow only non-empty Unicode strings as field names */ + else if (PyUString_Check(title) && PyUString_GET_SIZE(title) > 0) { + name = title; + Py_INCREF(name); + } + else { + goto fail; } +#else else { name = title; Py_INCREF(name); } - } - else { - Py_INCREF(name); +#endif } PyTuple_SET_ITEM(nameslist, i, name); + + /* Process rest */ + if (PyTuple_GET_SIZE(item) == 2) { ret = PyArray_DescrConverter(PyTuple_GET_ITEM(item, 1), &conv); if (ret == PY_FAIL) { @@ -331,8 +346,11 @@ _convert_from_array_descr(PyObject *obj, int align) goto fail; } if ((PyDict_GetItem(fields, name) != NULL) || - (title && - (PyString_Check(title) || PyUnicode_Check(title)) && +#if defined(NPY_PY3K) + (title && PyUString_Check(title) && +#else + (title && (PyUString_Check(title) || PyUnicode_Check(title)) && +#endif (PyDict_GetItem(fields, title) != NULL))) { PyErr_SetString(PyExc_ValueError, "two fields with the same name"); @@ -362,7 +380,11 @@ _convert_from_array_descr(PyObject *obj, int align) if (title != NULL) { Py_INCREF(title); PyTuple_SET_ITEM(tup, 2, title); - if (PyString_Check(title) || PyUnicode_Check(title)) { +#if defined(NPY_PY3K) + if (PyUString_Check(title)) { +#else + if (PyUString_Check(title) || PyUnicode_Check(title)) { +#endif if (PyDict_GetItem(fields, title) != NULL) { PyErr_SetString(PyExc_ValueError, "title already used as a "\ @@ -421,7 +443,7 @@ _convert_from_list(PyObject *obj, int align) * can produce */ key = PyList_GET_ITEM(obj, n-1); - if (PyString_Check(key) && PyString_GET_SIZE(key) == 0) { + if (PyBytes_Check(key) && PyBytes_GET_SIZE(key) == 0) { n = n - 1; } /* End ignore code.*/ @@ -436,7 +458,7 @@ _convert_from_list(PyObject *obj, int align) fields = PyDict_New(); for (i = 0; i < n; i++) { tup = PyTuple_New(2); - key = PyString_FromFormat("f%d", i); + key = PyUString_FromFormat("f%d", i); ret = PyArray_DescrConverter(PyList_GET_ITEM(obj, i), &conv); if (ret == PY_FAIL) { Py_DECREF(tup); @@ -592,7 +614,7 @@ _get_datetime_tuple_from_cobj(PyObject *cobj) dt_tuple = PyTuple_New(4); PyTuple_SET_ITEM(dt_tuple, 0, - PyString_FromString(_datetime_strings[dt_data->base])); + PyBytes_FromString(_datetime_strings[dt_data->base])); PyTuple_SET_ITEM(dt_tuple, 1, PyInt_FromLong(dt_data->num)); PyTuple_SET_ITEM(dt_tuple, 2, @@ -611,7 +633,7 @@ _convert_datetime_tuple_to_cobj(PyObject *tuple) dt_data = _pya_malloc(sizeof(PyArray_DatetimeMetaData)); dt_data->base = _unit_from_str\ - (PyString_AsString(PyTuple_GET_ITEM(tuple, 0))); + (PyBytes_AsString(PyTuple_GET_ITEM(tuple, 0))); /* Assumes other objects are Python integers */ dt_data->num = PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 1)); @@ -684,7 +706,7 @@ _convert_from_datetime(PyObject *obj) PyArray_Descr *res; PyObject *_numpy_internal; - if (!PyString_Check(obj)) { + if (!PyBytes_Check(obj)) { return NULL; } _numpy_internal = PyImport_ImportModule("numpy.core._internal"); @@ -723,7 +745,7 @@ _convert_from_commastring(PyObject *obj, int align) PyArray_Descr *res; PyObject *_numpy_internal; - if (!PyString_Check(obj)) { + if (!PyBytes_Check(obj)) { return NULL; } _numpy_internal = PyImport_ImportModule("numpy.core._internal"); @@ -973,7 +995,11 @@ _convert_from_dict(PyObject *obj, int align) } name = PyObject_GetItem(names, index); Py_DECREF(index); - if (!(PyString_Check(name) || PyUnicode_Check(name))) { +#if defined(NPY_PY3K) + if (!PyUString_Check(name)) { +#else + if (!(PyUString_Check(name) || PyUnicode_Check(name))) { +#endif PyErr_SetString(PyExc_ValueError, "field names must be strings"); ret = PY_FAIL; @@ -989,7 +1015,11 @@ _convert_from_dict(PyObject *obj, int align) PyDict_SetItem(fields, name, tup); Py_DECREF(name); if (len == 3) { - if (PyString_Check(item) || PyUnicode_Check(item)) { +#if defined(NPY_PY3K) + if (PyUString_Check(item)) { +#else + if (PyUString_Check(item) || PyUnicode_Check(item)) { +#endif if (PyDict_GetItem(fields, item) != NULL) { PyErr_SetString(PyExc_ValueError, "title already used as a " \ @@ -1143,9 +1173,15 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) else if (obj == (PyObject *)(&PyBool_Type)) { check_num = PyArray_BOOL; } - else if (obj == (PyObject *)(&PyString_Type)) { +#if defined(NPY_PY3K) + else if (obj == (PyObject *)(&PyBytes_Type)) { + check_num = PyArray_LONG; + } +#else + else if (obj == (PyObject *)(&PyBytes_Type)) { check_num = PyArray_STRING; } +#endif else if (obj == (PyObject *)(&PyUnicode_Type)) { check_num = PyArray_UNICODE; } @@ -1168,10 +1204,24 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) } /* or a typecode string */ - if (PyString_Check(obj)) { + + if (PyUnicode_Check(obj)) { + /* Allow unicode format strings: convert to bytes */ + int retval; + PyObject *obj2; + obj2 = PyUnicode_AsASCIIString(obj); + if (obj2 == NULL) { + return PY_FAIL; + } + retval = PyArray_DescrConverter(obj2, at); + Py_DECREF(obj2); + return retval; + } + + if (PyBytes_Check(obj)) { /* Check for a string typecode. */ - type = PyString_AS_STRING(obj); - len = PyString_GET_SIZE(obj); + type = PyBytes_AS_STRING(obj); + len = PyBytes_GET_SIZE(obj); if (len <= 0) { goto fail; } @@ -1291,6 +1341,16 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) /* Now check to see if the object is registered in typeDict */ if (typeDict != NULL) { item = PyDict_GetItem(typeDict, obj); +#if defined(NPY_PY3K) + if (!item && PyBytes_Check(obj)) { + PyObject *tmp; + tmp = PyUnicode_FromEncodedObject(obj, "ascii", "strict"); + if (tmp != NULL) { + item = PyDict_GetItem(typeDict, tmp); + Py_DECREF(tmp); + } + } +#endif if (item) { return PyArray_DescrConverter(item, at); } @@ -1449,24 +1509,24 @@ _append_to_datetime_typestr(PyArray_Descr *self, PyObject *ret) basestr = _datetime_strings[dt_data->base]; if (num == 1) { - tmp = PyString_FromString(basestr); + tmp = PyUString_FromString(basestr); } else { - tmp = PyString_FromFormat("%d%s", num, basestr); + tmp = PyUString_FromFormat("%d%s", num, basestr); } if (den != 1) { - res = PyString_FromFormat("/%d", den); - PyString_ConcatAndDel(&tmp, res); + res = PyUString_FromFormat("/%d", den); + PyUString_ConcatAndDel(&tmp, res); } - res = PyString_FromString("["); - PyString_ConcatAndDel(&res, tmp); - PyString_ConcatAndDel(&res, PyString_FromString("]")); + res = PyUString_FromString("["); + PyUString_ConcatAndDel(&res, tmp); + PyUString_ConcatAndDel(&res, PyUString_FromString("]")); if (events != 1) { - tmp = PyString_FromFormat("//%d", events); - PyString_ConcatAndDel(&res, tmp); + tmp = PyUString_FromFormat("//%d", events); + PyUString_ConcatAndDel(&res, tmp); } - PyString_ConcatAndDel(&ret, res); + PyUString_ConcatAndDel(&ret, res); return ret; } @@ -1488,12 +1548,12 @@ arraydescr_protocol_typestr_get(PyArray_Descr *self) size >>= 2; } - ret = PyString_FromFormat("%c%c%d", endian, basic_, size); + ret = PyUString_FromFormat("%c%c%d", endian, basic_, size); if (PyDataType_ISDATETIME(self)) { ret = _append_to_datetime_typestr(self, ret); } - return ret; + return ret; } static PyObject * @@ -1501,7 +1561,7 @@ arraydescr_typename_get(PyArray_Descr *self) { int len; PyTypeObject *typeobj = self->typeobj; - PyObject *res; + PyObject *res, *tmp; char *s; /* fixme: not reentrant */ static int prefix_len = 0; @@ -1509,10 +1569,10 @@ arraydescr_typename_get(PyArray_Descr *self) if (PyTypeNum_ISUSERDEF(self->type_num)) { s = strrchr(typeobj->tp_name, '.'); if (s == NULL) { - res = PyString_FromString(typeobj->tp_name); + res = PyUString_FromString(typeobj->tp_name); } else { - res = PyString_FromStringAndSize(s + 1, strlen(s) - 1); + res = PyUString_FromStringAndSize(s + 1, strlen(s) - 1); } return res; } @@ -1525,16 +1585,17 @@ arraydescr_typename_get(PyArray_Descr *self) len -= 1; } len -= prefix_len; - res = PyString_FromStringAndSize(typeobj->tp_name+prefix_len, len); + res = PyUString_FromStringAndSize(typeobj->tp_name+prefix_len, len); } if (PyTypeNum_ISFLEXIBLE(self->type_num) && self->elsize != 0) { PyObject *p; - p = PyString_FromFormat("%d", self->elsize * 8); - PyString_ConcatAndDel(&res, p); + p = PyUString_FromFormat("%d", self->elsize * 8); + PyUString_ConcatAndDel(&res, p); } if (PyDataType_ISDATETIME(self)) { res = _append_to_datetime_typestr(self, res); } + return res; } @@ -1574,7 +1635,7 @@ arraydescr_protocol_descr_get(PyArray_Descr *self) if (dobj == NULL) { return NULL; } - PyTuple_SET_ITEM(dobj, 0, PyString_FromString("")); + PyTuple_SET_ITEM(dobj, 0, PyUString_FromString("")); PyTuple_SET_ITEM(dobj, 1, arraydescr_protocol_typestr_get(self)); res = PyList_New(1); if (res == NULL) { @@ -1727,7 +1788,7 @@ arraydescr_names_set(PyArray_Descr *self, PyObject *val) PyObject *item; int valid = 1; item = PySequence_GetItem(val, i); - valid = PyString_Check(item); + valid = PyUString_Check(item); Py_DECREF(item); if (!valid) { PyErr_Format(PyExc_ValueError, @@ -1950,7 +2011,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args)) if (self->type_num == PyArray_UNICODE) { elsize >>= 2; } - obj = PyString_FromFormat("%c%d",self->kind, elsize); + obj = PyBytes_FromFormat("%c%d",self->kind, elsize); } PyTuple_SET_ITEM(ret, 1, Py_BuildValue("(Nii)", obj, 0, 1)); @@ -1967,7 +2028,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args)) } state = PyTuple_New(9); PyTuple_SET_ITEM(state, 0, PyInt_FromLong(version)); - PyTuple_SET_ITEM(state, 1, PyString_FromFormat("%c", endian)); + PyTuple_SET_ITEM(state, 1, PyBytes_FromFormat("%c", endian)); PyTuple_SET_ITEM(state, 2, arraydescr_subdescr_get(self)); if (self->names) { Py_INCREF(self->names); @@ -2228,13 +2289,19 @@ arraydescr_setstate(PyArray_Descr *self, PyObject *args) NPY_NO_EXPORT int PyArray_DescrAlignConverter(PyObject *obj, PyArray_Descr **at) { - if PyDict_Check(obj) { + if (PyDict_Check(obj)) { *at = _convert_from_dict(obj, 1); } - else if PyString_Check(obj) { + else if (PyBytes_Check(obj)) { *at = _convert_from_commastring(obj, 1); } - else if PyList_Check(obj) { + else if (PyUnicode_Check(obj)) { + PyObject *tmp; + tmp = PyUnicode_AsASCIIString(obj); + *at = _convert_from_commastring(tmp, 1); + Py_DECREF(tmp); + } + else if (PyList_Check(obj)) { *at = _convert_from_array_descr(obj, 1); } else { @@ -2257,13 +2324,19 @@ PyArray_DescrAlignConverter(PyObject *obj, PyArray_Descr **at) NPY_NO_EXPORT int PyArray_DescrAlignConverter2(PyObject *obj, PyArray_Descr **at) { - if PyDict_Check(obj) { + if (PyDict_Check(obj)) { *at = _convert_from_dict(obj, 1); } - else if PyString_Check(obj) { + else if (PyBytes_Check(obj)) { *at = _convert_from_commastring(obj, 1); } - else if PyList_Check(obj) { + else if (PyUnicode_Check(obj)) { + PyObject *tmp; + tmp = PyUnicode_AsASCIIString(obj); + *at = _convert_from_commastring(tmp, 1); + Py_DECREF(tmp); + } + else if (PyList_Check(obj)) { *at = _convert_from_array_descr(obj, 1); } else { @@ -2332,12 +2405,11 @@ PyArray_DescrNewByteorder(PyArray_Descr *self, char newendian) newfields = PyDict_New(); /* make new dictionary with replaced PyArray_Descr Objects */ - while(PyDict_Next(self->fields, &pos, &key, &value)) { + while (PyDict_Next(self->fields, &pos, &key, &value)) { if NPY_TITLE_KEY(key, value) { continue; } - if (!PyString_Check(key) || - !PyTuple_Check(value) || + if (!PyUString_Check(key) || !PyTuple_Check(value) || ((len=PyTuple_GET_SIZE(value)) < 2)) { continue; } @@ -2405,40 +2477,41 @@ arraydescr_str(PyArray_Descr *self) PyObject *lst; lst = arraydescr_protocol_descr_get(self); if (!lst) { - sub = PyString_FromString("<err>"); + sub = PyUString_FromString("<err>"); PyErr_Clear(); } else { +#warning XXX -- different representation in Py3K due to use of Bytes... sub = PyObject_Str(lst); } Py_XDECREF(lst); if (self->type_num != PyArray_VOID) { - PyObject *p; - PyObject *t=PyString_FromString("'"); + PyObject *p, *t; + t=PyUString_FromString("'"); p = arraydescr_protocol_typestr_get(self); - PyString_Concat(&p, t); - PyString_ConcatAndDel(&t, p); - p = PyString_FromString("("); - PyString_ConcatAndDel(&p, t); - PyString_ConcatAndDel(&p, PyString_FromString(", ")); - PyString_ConcatAndDel(&p, sub); - PyString_ConcatAndDel(&p, PyString_FromString(")")); + PyUString_Concat(&p, t); + PyUString_ConcatAndDel(&t, p); + p = PyUString_FromString("("); + PyUString_ConcatAndDel(&p, t); + PyUString_ConcatAndDel(&p, PyUString_FromString(", ")); + PyUString_ConcatAndDel(&p, sub); + PyUString_ConcatAndDel(&p, PyUString_FromString(")")); sub = p; } } else if (self->subarray) { PyObject *p; - PyObject *t = PyString_FromString("("); + PyObject *t = PyUString_FromString("("); PyObject *sh; p = arraydescr_str(self->subarray->base); if (!self->subarray->base->names && !self->subarray->base->subarray) { - PyObject *t=PyString_FromString("'"); - PyString_Concat(&p, t); - PyString_ConcatAndDel(&t, p); + PyObject *t=PyUString_FromString("'"); + PyUString_Concat(&p, t); + PyUString_ConcatAndDel(&t, p); p = t; } - PyString_ConcatAndDel(&t, p); - PyString_ConcatAndDel(&t, PyString_FromString(",")); + PyUString_ConcatAndDel(&t, p); + PyUString_ConcatAndDel(&t, PyUString_FromString(",")); if (!PyTuple_Check(self->subarray->shape)) { sh = Py_BuildValue("(O)", self->subarray->shape); } @@ -2446,9 +2519,9 @@ arraydescr_str(PyArray_Descr *self) sh = self->subarray->shape; Py_INCREF(sh); } - PyString_ConcatAndDel(&t, PyObject_Str(sh)); + PyUString_ConcatAndDel(&t, PyObject_Str(sh)); Py_DECREF(sh); - PyString_ConcatAndDel(&t, PyString_FromString(")")); + PyUString_ConcatAndDel(&t, PyUString_FromString(")")); sub = t; } else if (PyDataType_ISFLEXIBLE(self) || !PyArray_ISNBO(self->byteorder)) { @@ -2464,17 +2537,20 @@ static PyObject * arraydescr_repr(PyArray_Descr *self) { PyObject *sub, *s; - s = PyString_FromString("dtype("); + s = PyUString_FromString("dtype("); sub = arraydescr_str(self); + if (sub == NULL) { + return sub; + } if (!self->names && !self->subarray) { - PyObject *t=PyString_FromString("'"); - PyString_Concat(&sub, t); - PyString_ConcatAndDel(&t, sub); + PyObject *t=PyUString_FromString("'"); + PyUString_Concat(&sub, t); + PyUString_ConcatAndDel(&t, sub); sub = t; } - PyString_ConcatAndDel(&s, sub); - sub = PyString_FromString(")"); - PyString_ConcatAndDel(&s, sub); + PyUString_ConcatAndDel(&s, sub); + sub = PyUString_FromString(")"); + PyUString_ConcatAndDel(&s, sub); return s; } @@ -2590,22 +2666,43 @@ descr_subscript(PyArray_Descr *self, PyObject *op) PyObject *retval; if (!self->names) { - PyObject *astr; + PyObject *astr, *bstr; astr = arraydescr_str(self); +#if defined(NPY_PY3K) + bstr = PyUnicode_AsUnicodeEscapeString(astr); + Py_DECREF(astr); + astr = bstr; +#endif PyErr_Format(PyExc_KeyError, "There are no fields in dtype %s.", - PyString_AsString(astr)); + PyBytes_AsString(astr)); Py_DECREF(astr); return NULL; } - if (PyString_Check(op) || PyUnicode_Check(op)) { +#if defined(NPY_PY3K) + if (PyUString_Check(op)) { +#else + if (PyUString_Check(op) || PyUnicode_Check(op)) { +#endif PyObject *obj = PyDict_GetItem(self->fields, op); PyObject *descr; + PyObject *s; if (obj == NULL) { + if (PyUnicode_Check(op)) { + s = PyUnicode_AsUnicodeEscapeString(op); + } + else { + s = op; + } + PyErr_Format(PyExc_KeyError, "Field named \'%s\' not found.", - PyString_AsString(op)); + PyBytes_AsString(s)); + + if (s != op) { + Py_DECREF(s); + } return NULL; } descr = PyTuple_GET_ITEM(obj, 0); |