diff options
-rw-r--r-- | numpy/core/include/numpy/arrayobject.h | 6 | ||||
-rw-r--r-- | numpy/core/src/arrayobject.c | 36 | ||||
-rw-r--r-- | numpy/core/src/arraytypes.inc.src | 62 | ||||
-rw-r--r-- | numpy/core/src/multiarraymodule.c | 14 | ||||
-rw-r--r-- | numpy/core/src/scalartypes.inc.src | 8 |
5 files changed, 69 insertions, 57 deletions
diff --git a/numpy/core/include/numpy/arrayobject.h b/numpy/core/include/numpy/arrayobject.h index 3ec68bbd6..4f0d1b1ea 100644 --- a/numpy/core/include/numpy/arrayobject.h +++ b/numpy/core/include/numpy/arrayobject.h @@ -124,7 +124,7 @@ typedef unsigned char Bool; #endif #ifndef Py_USING_UNICODE -#define Py_UNICODE char +#error Must use Python with unicode enabled. #endif @@ -197,8 +197,6 @@ enum PyArray_TYPECHAR { PyArray_BOOLLTR = '?', PyArray_STRINGLTR = 'S', PyArray_STRINGLTR2 = 'a', PyArray_UNICODELTR = 'U', - PyArray_UCS4LTR = 'W', - PyArray_UCS2LTR = 'w', PyArray_VOIDLTR = 'V', /* No Descriptor, just a define -- this let's @@ -735,6 +733,8 @@ typedef Py_uintptr_t uintp; #define INTP_FMT "Ld" #endif +#define UInt32 PyArray_UCS4 + #define ERR(str) fprintf(stderr, #str); fflush(stderr); #define ERR2(str) fprintf(stderr, str); fflush(stderr); diff --git a/numpy/core/src/arrayobject.c b/numpy/core/src/arrayobject.c index 2444551c7..22f745ebf 100644 --- a/numpy/core/src/arrayobject.c +++ b/numpy/core/src/arrayobject.c @@ -408,6 +408,12 @@ copy_and_swap(void *dst, void *src, int itemsize, intp numitems, byte_swap_vector(d1, numitems, itemsize); } + +#ifndef Py_UNICODE_WIDE +#include "ucsnarrow.c" +#endif + + static PyArray_Descr **userdescrs=NULL; #define error_converting(x) (((x) == -1) && PyErr_Occurred()) @@ -861,7 +867,7 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) } else if (type_num == PyArray_UNICODE) { PyUnicodeObject *uni = (PyUnicodeObject*)obj; - int length = itemsize / sizeof(Py_UNICODE); + int length = itemsize / 4; /* Need an extra slot and need to use Python memory manager */ uni->str = NULL; @@ -876,6 +882,12 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) uni->length = length; uni->hash = -1; uni->defenc = NULL; +#ifndef Py_UNICODE_WIDE + /* Allocate enough for 2-characters per itemsize + get the actual number of characters converted + and reallocate when done. + */ +#endif } else { PyVoidScalarObject *vobj = (PyVoidScalarObject *)obj; @@ -5007,7 +5019,7 @@ discover_itemsize(PyObject *s, int nd, int *itemsize) if ((nd == 0) || PyString_Check(s) || \ PyUnicode_Check(s) || PyBuffer_Check(s)) { if PyUnicode_Check(s) - *itemsize = MAX(*itemsize, sizeof(Py_UNICODE)*n); + *itemsize = MAX(*itemsize, 4*n); else *itemsize = MAX(*itemsize, n); return 0; @@ -5289,7 +5301,7 @@ Array_FromScalar(PyObject *op, PyArray_Descr *typecode) if (itemsize == 0 && PyTypeNum_ISEXTENDED(type)) { itemsize = PyObject_Length(op); - if (type == PyArray_UNICODE) itemsize *= sizeof(Py_UNICODE); + if (type == PyArray_UNICODE) itemsize *= 4; } ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, typecode, @@ -5357,7 +5369,7 @@ Array_FromSequence(PyObject *s, PyArray_Descr *typecode, int fortran, if (itemsize == 0 && PyTypeNum_ISEXTENDED(type)) { if (discover_itemsize(s, nd, &itemsize) == -1) goto fail; - if (type == PyArray_UNICODE) itemsize*=sizeof(Py_UNICODE); + if (type == PyArray_UNICODE) itemsize*=4; } if (itemsize != typecode->elsize) { @@ -5529,10 +5541,10 @@ PyArray_CastToType(PyArrayObject *mp, PyArray_Descr *at, int fortran) if (at == NULL) return NULL; if (mpd->type_num == PyArray_STRING && \ at->type_num == PyArray_UNICODE) - at->elsize = mpd->elsize*sizeof(Py_UNICODE); + at->elsize = mpd->elsize*4; if (mpd->type_num == PyArray_UNICODE && at->type_num == PyArray_STRING) - at->elsize = mpd->elsize/sizeof(Py_UNICODE); + at->elsize = mpd->elsize/4; if (at->type_num == PyArray_VOID) at->elsize = mpd->elsize; } @@ -5836,15 +5848,7 @@ _array_typedescr_fromstr(char *str) break; case PyArray_UNICODELTR: type_num = PyArray_UNICODE; - size *= sizeof(Py_UNICODE); - break; - case PyArray_UCS2LTR: - if (sizeof(Py_UNICODE) != 2) _MY_FAIL - type_num = PyArray_UNICODE; - break; - case PyArray_UCS4LTR: - if (sizeof(Py_UNICODE) != 4) _MY_FAIL - type_num = PyArray_UNICODE; + size *= 4; break; case 'V': type_num = PyArray_VOID; @@ -6412,7 +6416,7 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) ret = (from->elsize <= to->elsize); } else if (totype == PyArray_UNICODE) { - ret = (from->elsize * sizeof(Py_UNICODE)\ + ret = (from->elsize * 4 \ <= to->elsize); } } diff --git a/numpy/core/src/arraytypes.inc.src b/numpy/core/src/arraytypes.inc.src index 10a43cd89..6a81f7e09 100644 --- a/numpy/core/src/arraytypes.inc.src +++ b/numpy/core/src/arraytypes.inc.src @@ -218,18 +218,21 @@ static PyObject * UNICODE_getitem(char *ip, PyArrayObject *ap) { PyObject *obj; - size_t size = sizeof(Py_UNICODE); int mysize; - Py_UNICODE *dptr; + PyArray_UCS4 *dptr; - mysize = ap->descr->elsize / size; - dptr = (Py_UNICODE *)ip + mysize-1; + mysize = ap->descr->elsize / 4 + dptr = (PyArray_UCS4 *)ip + mysize-1; while(mysize > 0 && *dptr-- == 0) mysize--; - obj = PyUnicode_FromUnicode((const Py_UNICODE *)ip, mysize); if (!PyArray_ISNOTSWAPPED(ap) && (obj != NULL)) { - byte_swap_vector(PyUnicode_AS_UNICODE(obj), - mysize, size); + byte_swap_vector(PyArray_BYTES(ap), mysize, 4); } +#ifdef Py_UNICODE_WIDE + obj = PyUnicode_FromUnicode((const PyArray_UCS4 *)ip, mysize); +#else + obj = MyPyUnicode_FromUCS4((const PyArray_UCS4 *)ip, mysize); +#endif + return obj; } @@ -237,12 +240,11 @@ static int UNICODE_setitem(PyObject *op, char *ov, PyArrayObject *ap) { PyObject *temp; - Py_UNICODE *ptr; + PyArray_UCS4 *ptr; int datalen; - size_t size = sizeof(Py_UNICODE); + size_t size = sizeof(PyArray_UCS4); if ((temp=PyObject_Unicode(op)) == NULL) return -1; - ptr = PyUnicode_AS_UNICODE(temp); if ((ptr == NULL) || (PyErr_Occurred())) { Py_DECREF(temp); @@ -250,14 +252,18 @@ UNICODE_setitem(PyObject *op, char *ov, PyArrayObject *ap) } datalen = PyUnicode_GET_DATA_SIZE(op); +#ifdef Py_UNICODE_WIDE memcpy(ov, ptr, MIN(ap->descr->elsize, datalen)); +#else + MyPyUnicode_AsUCS4(ov, ptr, MIN(ap->descr->elsize, datalen)); +#endif /* Fill in the rest of the space with 0 */ if (ap->descr->elsize > datalen) { memset(ov + datalen, 0, (ap->descr->elsize - datalen)); } if (!PyArray_ISNOTSWAPPED(ap)) - byte_swap_vector(ov, ap->descr->elsize / size, size); + byte_swap_vector(ov, ap->descr->elsize / 4, 4) Py_DECREF(temp); return 0; @@ -1171,7 +1177,7 @@ VOID_copyswapn (char *dst, char *src, intp n, int swap, int itemsize) static void UNICODE_copyswapn (char *dst, char *src, intp n, int swap, int itemsize) { - int size = sizeof(Py_UNICODE); + int size = sizeof(PyArray_UCS4); if (src != NULL) memcpy(dst, src, itemsize * n); @@ -1216,7 +1222,7 @@ VOID_copyswap (char *dst, char *src, int swap, int itemsize) static void UNICODE_copyswap (char *dst, char *src, int swap, int itemsize) { - int size = sizeof(Py_UNICODE); + int size = sizeof(PyArray_UCS4); if (src != NULL) memcpy(dst, src, itemsize); @@ -1310,15 +1316,24 @@ STRING_nonzero (char *ip, PyArrayObject *ap) return nonz; } +#ifdef Py_UNICODE_WIDE +#define PyArray_UCS4_ISSPACE Py_UNICODE_ISSPACE +#else +#define PyArray_UCS4_ISSPACE(x) FALSE +#endif + +/* fixme: This must deal with unaligned and byte-swapped data + and what-to do for UCS2-builds + */ static Bool -UNICODE_nonzero (Py_UNICODE *ip, PyArrayObject *ap) +UNICODE_nonzero (PyArray_UCS4 *ip, PyArrayObject *ap) { int len = ap->descr->elsize >> 1; int i; Bool nonz = FALSE; for (i=0; i<len; i++) { - if (!Py_UNICODE_ISSPACE(*ip)) { + if (!PyArray_UCS4_ISSPACE(*ip)) { nonz = TRUE; break; } @@ -1333,8 +1348,7 @@ OBJECT_nonzero (PyObject **ip, PyArrayObject *ap) return (Bool) PyObject_IsTrue(*ip); } -/* If subclass has _nonzero method call it with buffer - object wrapping current item. Otherwise, just compare with '\0'. +/* fixme: if we have fields, then nonzero if all sub-fields are nonzero. */ static Bool VOID_nonzero (char *ip, PyArrayObject *ap) @@ -1407,11 +1421,11 @@ STRING_compare(char *ip1, char *ip2, PyArrayObject *ap) /* taken from Python */ static int -UNICODE_compare(register Py_UNICODE *ip1, register Py_UNICODE *ip2, +UNICODE_compare(register PyArray_UCS4 *ip1, register PyArray_UCS4 *ip2, PyArrayObject *ap) { register int itemsize=ap->descr->elsize; - register Py_UNICODE c1, c2; + register PyArray_UCS4 c1, c2; if (itemsize < 0) return 0; @@ -1475,7 +1489,7 @@ OBJECT_argmax(PyObject **ip, intp n, intp *max_ind, PyArrayObject *aip) /**begin repeat #fname= STRING, UNICODE# -#type= char, Py_UNICODE# +#type= char, PyArray_UCS4# */ static int @@ -1667,7 +1681,7 @@ static void /**begin repeat #from= VOID, STRING, UNICODE# -#align= char, char, Py_UNICODE# +#align= char, char, PyArray_UCS4# #NAME= Void, String, Unicode# #endian= |, |, =# */ @@ -1893,10 +1907,10 @@ set_typeinfo(PyObject *dict) @name@_Descr.fields = Py_None; /**end repeat**/ - if (sizeof(Py_UNICODE) == 2) { + if (sizeof(PyArray_UCS4) == 2) { UNICODE_Descr.kind = PyArray_UCS2LTR; } - else if (sizeof(Py_UNICODE) == 4) { + else if (sizeof(PyArray_UCS4) == 4) { UNICODE_Descr.kind = PyArray_UCS4LTR; } @@ -1966,7 +1980,7 @@ set_typeinfo(PyObject *dict) PyDict_SetItemString(infodict, "UNICODE", s=Py_BuildValue("ciiiO", PyArray_UNICODELTR, PyArray_UNICODE, 0, - _ALIGN(Py_UNICODE), + _ALIGN(PyArray_UCS4), (PyObject *)\ &PyUnicodeArrType_Type)); Py_DECREF(s); diff --git a/numpy/core/src/multiarraymodule.c b/numpy/core/src/multiarraymodule.c index 3bad80cbf..ddacb68c9 100644 --- a/numpy/core/src/multiarraymodule.c +++ b/numpy/core/src/multiarraymodule.c @@ -3885,19 +3885,7 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) the number of bytes. */ else if (check_num == PyArray_UNICODELTR) { - elsize *= sizeof(Py_UNICODE); - } - else if (check_num == PyArray_UCS2LTR || \ - check_num == PyArray_UCS4LTR) { - if ((elsize % sizeof(Py_UNICODE) != 0) || - ((check_num == PyArray_UCS4LTR) && \ - (sizeof(Py_UNICODE) != 4)) || - ((check_num == PyArray_UCS2LTR) && \ - (sizeof(Py_UNICODE) != 2))) { - PyErr_SetString(PyExc_TypeError, - "unsupported unicode format"); - return PY_FAIL; - } + elsize *= 4; } /* Support for generic processing c4, i4, f8, etc... diff --git a/numpy/core/src/scalartypes.inc.src b/numpy/core/src/scalartypes.inc.src index 1f32e1d67..58f9bebb2 100644 --- a/numpy/core/src/scalartypes.inc.src +++ b/numpy/core/src/scalartypes.inc.src @@ -182,7 +182,12 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) break; case PyArray_UNICODE: memptr = (char *)PyUnicode_AS_DATA(scalar); +#ifdef Py_UNICODE_WIDE break; +#else: + PyUCS2Unicode_AsUCS4(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r)); + goto finish; +#endif default: if (PyTypeNum_ISEXTENDED(typecode->type_num)) { memptr = (((PyVoidScalarObject *)scalar)->obval); @@ -197,7 +202,8 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) if (PyArray_ISOBJECT(r)) { Py_INCREF(*((PyObject **)memptr)); } - + + finish: if (outcode == NULL) return r; if (outcode->type_num == typecode->type_num) { |