diff options
author | Travis Oliphant <oliphant@enthought.com> | 2006-02-09 04:38:58 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2006-02-09 04:38:58 +0000 |
commit | f1bffaafc8e94cb5e1e94bd0527410108903669c (patch) | |
tree | 3971fa5d91cc728a70a9f7a3d9fa721db2e1368c /numpy/core/src | |
parent | 520b1c94e456dacf814b1a2d1ff13e8133294cb1 (diff) | |
download | numpy-f1bffaafc8e94cb5e1e94bd0527410108903669c.tar.gz |
Created NumPy unicode as 32-bit.
Diffstat (limited to 'numpy/core/src')
-rw-r--r-- | numpy/core/src/arrayobject.c | 51 | ||||
-rw-r--r-- | numpy/core/src/arraytypes.inc.src | 79 | ||||
-rw-r--r-- | numpy/core/src/multiarraymodule.c | 2 | ||||
-rw-r--r-- | numpy/core/src/scalartypes.inc.src | 7 | ||||
-rw-r--r-- | numpy/core/src/ucsnarrow.c | 72 |
5 files changed, 155 insertions, 56 deletions
diff --git a/numpy/core/src/arrayobject.c b/numpy/core/src/arrayobject.c index 22f745ebf..48ce04c52 100644 --- a/numpy/core/src/arrayobject.c +++ b/numpy/core/src/arrayobject.c @@ -867,7 +867,11 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) } else if (type_num == PyArray_UNICODE) { PyUnicodeObject *uni = (PyUnicodeObject*)obj; - int length = itemsize / 4; + int length = itemsize >> 2; + +#ifndef Py_UNICODE_WIDE + length *= 2; +#endif /* Need an extra slot and need to use Python memory manager */ uni->str = NULL; @@ -883,10 +887,21 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) uni->hash = -1; uni->defenc = NULL; #ifndef Py_UNICODE_WIDE - /* Allocate enough for 2-characters per itemsize - get the actual number of characters converted - and reallocate when done. + /* Allocated enough for 2-characters per itemsize. + Now convert from the data-buffer */ + if (!PyArray_ISNBO(descr->byteorder)) { + /* byteswap the data */ + byte_swap_vector(data, itemsize >> 2, 4); + } + length = PyUCS2Buffer_FromUCS4(uni->str, (PyArray_UCS4 *)data, + itemsize >> 2); + /* Resize the unicode result */ + if (MyPyUnicode_Resize(uni, length) < 0) { + Py_DECREF(obj); + return NULL; + } + return obj; #endif } else { @@ -5089,7 +5104,7 @@ _array_small_type(PyArray_Descr *chktype, PyArray_Descr* mintype) because string itemsize is twice as large */ if (outtype->type_num == PyArray_UNICODE && mintype->type_num == PyArray_STRING) { - testsize = MAX(chksize, 2*minsize); + testsize = MAX(chksize, 4*minsize); } else { testsize = MAX(chksize, minsize); @@ -5172,6 +5187,9 @@ _array_find_type(PyObject *op, PyArray_Descr *minitype, int max) if (PyUnicode_Check(op)) { chktype = PyArray_DescrNewFromType(PyArray_UNICODE); chktype->elsize = PyUnicode_GET_DATA_SIZE(op); +#ifndef Py_UNICODE_WIDE + chktype->elsize <<= 1; +#endif goto finish; } @@ -5541,10 +5559,10 @@ PyArray_CastToType(PyArrayObject *mp, PyArray_Descr *at, int fortran) if (at == NULL) return NULL; if (mpd->type_num == PyArray_STRING && \ at->type_num == PyArray_UNICODE) - at->elsize = mpd->elsize*4; + at->elsize = mpd->elsize << 2; if (mpd->type_num == PyArray_UNICODE && at->type_num == PyArray_STRING) - at->elsize = mpd->elsize/4; + at->elsize = mpd->elsize >> 2; if (at->type_num == PyArray_VOID) at->elsize = mpd->elsize; } @@ -5848,7 +5866,7 @@ _array_typedescr_fromstr(char *str) break; case PyArray_UNICODELTR: type_num = PyArray_UNICODE; - size *= 4; + size <<= 2; break; case 'V': type_num = PyArray_VOID; @@ -6416,7 +6434,7 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) ret = (from->elsize <= to->elsize); } else if (totype == PyArray_UNICODE) { - ret = (from->elsize * 4 \ + ret = (from->elsize << 2 \ <= to->elsize); } } @@ -8155,14 +8173,17 @@ arraydescr_protocol_typestr_get(PyArray_Descr *self) { char basic_=self->kind; char endian = self->byteorder; + int size=self->elsize; if (endian == '=') { endian = '<'; if (!PyArray_IsNativeByteOrder(endian)) endian = '>'; } - - return PyString_FromFormat("%c%c%d", endian, basic_, - self->elsize); + + if (self->type_num == PyArray_UNICODE) { + size >>= 2; + } + return PyString_FromFormat("%c%c%d", endian, basic_, size); } static PyObject * @@ -8393,7 +8414,11 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *args) Py_INCREF(obj); } else { - obj = PyString_FromFormat("%c%d",self->kind, self->elsize); + elsize = self->elsize; + if (self->type_num == PyArray_UNICODE) { + elsize >>= 2; + } + obj = PyString_FromFormat("%c%d",self->kind, elsize); } PyTuple_SET_ITEM(ret, 1, Py_BuildValue("(Nii)", obj, 0, 1)); diff --git a/numpy/core/src/arraytypes.inc.src b/numpy/core/src/arraytypes.inc.src index 6a81f7e09..e1b17a3a6 100644 --- a/numpy/core/src/arraytypes.inc.src +++ b/numpy/core/src/arraytypes.inc.src @@ -221,7 +221,7 @@ UNICODE_getitem(char *ip, PyArrayObject *ap) int mysize; PyArray_UCS4 *dptr; - mysize = ap->descr->elsize / 4 + mysize = ap->descr->elsize >> 2; dptr = (PyArray_UCS4 *)ip + mysize-1; while(mysize > 0 && *dptr-- == 0) mysize--; if (!PyArray_ISNOTSWAPPED(ap) && (obj != NULL)) { @@ -230,7 +230,16 @@ UNICODE_getitem(char *ip, PyArrayObject *ap) #ifdef Py_UNICODE_WIDE obj = PyUnicode_FromUnicode((const PyArray_UCS4 *)ip, mysize); #else - obj = MyPyUnicode_FromUCS4((const PyArray_UCS4 *)ip, mysize); + /* create new empty unicode object of length mysize*2 */ + obj = MyPyUnicode_New(mysize*2); + if (obj == NULL) return obj; + mysize = PyUCS2Buffer_FromUCS4(((PyUnicodeObject *)obj)->str, + (PyArray_UCS4 *)ip, mysize); + /* reset length of unicode object to ucs2size */ + if (MyPyUnicode_Resize((PyUnicodeObject *)obj, mysize) < 0) { + Py_DECREF(obj); + return NULL; + } #endif return obj; @@ -240,9 +249,8 @@ static int UNICODE_setitem(PyObject *op, char *ov, PyArrayObject *ap) { PyObject *temp; - PyArray_UCS4 *ptr; + Py_UNICODE *ptr; int datalen; - size_t size = sizeof(PyArray_UCS4); if ((temp=PyObject_Unicode(op)) == NULL) return -1; ptr = PyUnicode_AS_UNICODE(temp); @@ -255,16 +263,17 @@ UNICODE_setitem(PyObject *op, char *ov, PyArrayObject *ap) #ifdef Py_UNICODE_WIDE memcpy(ov, ptr, MIN(ap->descr->elsize, datalen)); #else - MyPyUnicode_AsUCS4(ov, ptr, MIN(ap->descr->elsize, datalen)); + datalen = PyUCS2Buffer_AsUCS4(ptr, (PyArray_UCS4 *)ov, datalen >> 1, + ap->descr->elsize >> 2); + datalen <<= 2; #endif /* Fill in the rest of the space with 0 */ if (ap->descr->elsize > datalen) { memset(ov + datalen, 0, (ap->descr->elsize - datalen)); } - - if (!PyArray_ISNOTSWAPPED(ap)) - byte_swap_vector(ov, ap->descr->elsize / 4, 4) + if (!PyArray_ISNOTSWAPPED(ap)) + byte_swap_vector(ov, ap->descr->elsize >> 2, 4); Py_DECREF(temp); return 0; } @@ -1177,20 +1186,17 @@ VOID_copyswapn (char *dst, char *src, intp n, int swap, int itemsize) static void UNICODE_copyswapn (char *dst, char *src, intp n, int swap, int itemsize) { - int size = sizeof(PyArray_UCS4); if (src != NULL) memcpy(dst, src, itemsize * n); if (swap) { register char *a, *b, c; - int j, i = size / 2; for (a = (char *)dst; n>0; n--) { - b = a + (size-1); - for (j=0; j<i; j++) { - c=*a; *a++ = *b; *b-- = c; - } - a += i / 2; + b = a + 3; + c=*a; *a++ = *b; *b-- = c; + c=*a; *a++ = *b; *b-- = c; + a += 1; } } } @@ -1222,19 +1228,16 @@ VOID_copyswap (char *dst, char *src, int swap, int itemsize) static void UNICODE_copyswap (char *dst, char *src, int swap, int itemsize) { - int size = sizeof(PyArray_UCS4); if (src != NULL) memcpy(dst, src, itemsize); if (swap) { register char *a, *b, c; - int j, i = size / 2; a = (char *)dst; - b = a + (size-1); - for (j=0; j<i; j++) { - c=*a; *a++ = *b; *b-- = c; - } + b = a + 3; + c=*a; *a++ = *b; *b-- = c; + c=*a; *a++ = *b; *b-- = c; } } @@ -1319,19 +1322,30 @@ STRING_nonzero (char *ip, PyArrayObject *ap) #ifdef Py_UNICODE_WIDE #define PyArray_UCS4_ISSPACE Py_UNICODE_ISSPACE #else -#define PyArray_UCS4_ISSPACE(x) FALSE +#define PyArray_UCS4_ISSPACE(ch) Py_STRING_ISSPACE((char)ch) #endif -/* fixme: This must deal with unaligned and byte-swapped data - and what-to do for UCS2-builds - */ static Bool UNICODE_nonzero (PyArray_UCS4 *ip, PyArrayObject *ap) { - int len = ap->descr->elsize >> 1; + int len = ap->descr->elsize >> 2; int i; Bool nonz = FALSE; - + char *buffer=NULL; + + if ((!PyArray_ISNOTSWAPPED(ap)) || \ + (!PyArray_ISALIGNED(ap))) { + buffer = _pya_malloc(ap->descr->elsize); + if (buffer == NULL) { + return nonz; + } + memcpy(buffer, ip, ap->descr->elsize); + if (!PyArray_ISNOTSWAPPED(ap)) { + byte_swap_vector(buffer, len, 4); + } + ip = (PyArray_UCS4 *)buffer; + } + for (i=0; i<len; i++) { if (!PyArray_UCS4_ISSPACE(*ip)) { nonz = TRUE; @@ -1339,6 +1353,7 @@ UNICODE_nonzero (PyArray_UCS4 *ip, PyArrayObject *ap) } ip++; } + _pya_free(buffer); return nonz; } @@ -1897,9 +1912,6 @@ set_typeinfo(PyObject *dict) _letter_to_num[PyArray_@name@LTR] = PyArray_@name@; /**end repeat**/ _letter_to_num[PyArray_STRINGLTR2] = PyArray_STRING; - _letter_to_num[PyArray_UCS4LTR] = PyArray_UNICODE; - _letter_to_num[PyArray_UCS2LTR] = PyArray_UNICODE; - /**begin repeat #name=BOOL,BYTE,UBYTE,SHORT,USHORT,INT,UINT,LONG,ULONG,LONGLONG,ULONGLONG,FLOAT,DOUBLE,LONGDOUBLE,CFLOAT,CDOUBLE,CLONGDOUBLE,OBJECT,STRING,UNICODE,VOID# @@ -1907,13 +1919,6 @@ set_typeinfo(PyObject *dict) @name@_Descr.fields = Py_None; /**end repeat**/ - if (sizeof(PyArray_UCS4) == 2) { - UNICODE_Descr.kind = PyArray_UCS2LTR; - } - else if (sizeof(PyArray_UCS4) == 4) { - UNICODE_Descr.kind = PyArray_UCS4LTR; - } - /* Set a dictionary with type information */ infodict = PyDict_New(); if (infodict == NULL) return -1; diff --git a/numpy/core/src/multiarraymodule.c b/numpy/core/src/multiarraymodule.c index ddacb68c9..f65c74e88 100644 --- a/numpy/core/src/multiarraymodule.c +++ b/numpy/core/src/multiarraymodule.c @@ -3885,7 +3885,7 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) the number of bytes. */ else if (check_num == PyArray_UNICODELTR) { - elsize *= 4; + elsize <<= 2; } /* Support for generic processing c4, i4, f8, etc... diff --git a/numpy/core/src/scalartypes.inc.src b/numpy/core/src/scalartypes.inc.src index 58f9bebb2..1ef8372f2 100644 --- a/numpy/core/src/scalartypes.inc.src +++ b/numpy/core/src/scalartypes.inc.src @@ -184,8 +184,11 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) memptr = (char *)PyUnicode_AS_DATA(scalar); #ifdef Py_UNICODE_WIDE break; -#else: - PyUCS2Unicode_AsUCS4(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r)); +#else + PyUCS2Buffer_AsUCS4((Py_UNICODE *)memptr, + (PyArray_UCS4 *)PyArray_DATA(r), + PyUnicode_GET_SIZE(scalar), + PyArray_ITEMSIZE(r) >> 2); goto finish; #endif default: diff --git a/numpy/core/src/ucsnarrow.c b/numpy/core/src/ucsnarrow.c index 8960026d5..6480303a9 100644 --- a/numpy/core/src/ucsnarrow.c +++ b/numpy/core/src/ucsnarrow.c @@ -1,3 +1,7 @@ +/* Functions only needed on narrow builds of Python + for converting back and forth between the NumPy Unicode data-type (always 4-byte) + and the Python Unicode scalar (2-bytes on a narrow build). + */ /* the ucs2 buffer must be large enough to hold 2*ucs4length characters due to the use of surrogate pairs. @@ -29,12 +33,74 @@ PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, PyArray_UCS4 *ucs4, int ucs4length) } -/* This converts a UCS2 buffer (from a Python unicode object) +/* This converts a UCS2 buffer of the given length to UCS4 buffer. + It converts up to ucs4len characters of UCS2 -*/ + It returns the number of characters converted which can + be less than ucslen if there are surrogate pairs in ucs2. + The return value is the actual size of the used part of the ucs4 buffer. +*/ static int -PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, PyArray_UCS4 *ucs4, int ucs4length, int ucs2length) +PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, PyArray_UCS4 *ucs4, int ucs2len, int ucs4len) { + register int i; + register PyArray_UCS4 chr; + register Py_UNICODE ch; + register int numchars=0; + + for (i=0; (i < ucs2len-1) && (numchars < ucs4len); i++) { + ch = *ucs2++; + if (ch >= 0xd800 || ch <= 0xdfff) { + /* surrogate pair */ + chr = ((PyArray_UCS4)(ch-0xd800)) << 10; + chr += *ucs2++ + 0x2400; /* -0xdc00 + 0x10000 */ + i++; + } + else { + chr = (PyArray_UCS4) ch; + } + *ucs4++ = chr; + numchars++; + } + return numchars; +} + + +static PyObject * +MyPyUnicode_New(int length) +{ + PyUnicodeObject *unicode; + unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); + if (unicode == NULL) return NULL; + unicode->str = PyMem_NEW(Py_UNICODE, length+1); + if (!unicode->str) { + _Py_ForgetReference((PyObject *)unicode); + PyObject_Del(unicode); + return PyErr_NoMemory(); + } + unicode->str[0] = 0; + unicode->str[length] = 0; + unicode->length = length; + unicode->hash = -1; + unicode->defenc = NULL; + return (PyObject *)unicode; +} + +static int +MyPyUnicode_Resize(PyUnicodeObject *uni, int length) +{ + void *oldstr; + + oldstr = uni->str; + PyMem_RESIZE(uni->str, Py_UNICODE, length+1); + if (!uni->str) { + uni->str = oldstr; + PyErr_NoMemory(); + return -1; + } + uni->str[length] = 0; + uni->length = length; + return 0; } |