summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTravis Oliphant <oliphant@enthought.com>2006-02-09 00:57:29 +0000
committerTravis Oliphant <oliphant@enthought.com>2006-02-09 00:57:29 +0000
commit9a4d9271638c5a1e28dc21a2d92d8d5d16d2fbd5 (patch)
tree732633ef63bbedec4d7bf2198b1b49663da1ded4
parent34b811398d0a7ef3583a0539b1b61b343e55d237 (diff)
downloadnumpy-9a4d9271638c5a1e28dc21a2d92d8d5d16d2fbd5.tar.gz
Fixing new-unicode branch.
-rw-r--r--numpy/core/include/numpy/arrayobject.h6
-rw-r--r--numpy/core/src/arrayobject.c36
-rw-r--r--numpy/core/src/arraytypes.inc.src62
-rw-r--r--numpy/core/src/multiarraymodule.c14
-rw-r--r--numpy/core/src/scalartypes.inc.src8
5 files changed, 69 insertions, 57 deletions
diff --git a/numpy/core/include/numpy/arrayobject.h b/numpy/core/include/numpy/arrayobject.h
index 3ec68bbd6..4f0d1b1ea 100644
--- a/numpy/core/include/numpy/arrayobject.h
+++ b/numpy/core/include/numpy/arrayobject.h
@@ -124,7 +124,7 @@ typedef unsigned char Bool;
#endif
#ifndef Py_USING_UNICODE
-#define Py_UNICODE char
+#error Must use Python with unicode enabled.
#endif
@@ -197,8 +197,6 @@ enum PyArray_TYPECHAR { PyArray_BOOLLTR = '?',
PyArray_STRINGLTR = 'S',
PyArray_STRINGLTR2 = 'a',
PyArray_UNICODELTR = 'U',
- PyArray_UCS4LTR = 'W',
- PyArray_UCS2LTR = 'w',
PyArray_VOIDLTR = 'V',
/* No Descriptor, just a define -- this let's
@@ -735,6 +733,8 @@ typedef Py_uintptr_t uintp;
#define INTP_FMT "Ld"
#endif
+#define UInt32 PyArray_UCS4
+
#define ERR(str) fprintf(stderr, #str); fflush(stderr);
#define ERR2(str) fprintf(stderr, str); fflush(stderr);
diff --git a/numpy/core/src/arrayobject.c b/numpy/core/src/arrayobject.c
index 2444551c7..22f745ebf 100644
--- a/numpy/core/src/arrayobject.c
+++ b/numpy/core/src/arrayobject.c
@@ -408,6 +408,12 @@ copy_and_swap(void *dst, void *src, int itemsize, intp numitems,
byte_swap_vector(d1, numitems, itemsize);
}
+
+#ifndef Py_UNICODE_WIDE
+#include "ucsnarrow.c"
+#endif
+
+
static PyArray_Descr **userdescrs=NULL;
#define error_converting(x) (((x) == -1) && PyErr_Occurred())
@@ -861,7 +867,7 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base)
}
else if (type_num == PyArray_UNICODE) {
PyUnicodeObject *uni = (PyUnicodeObject*)obj;
- int length = itemsize / sizeof(Py_UNICODE);
+ int length = itemsize / 4;
/* Need an extra slot and need to use
Python memory manager */
uni->str = NULL;
@@ -876,6 +882,12 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base)
uni->length = length;
uni->hash = -1;
uni->defenc = NULL;
+#ifndef Py_UNICODE_WIDE
+ /* Allocate enough for 2-characters per itemsize
+ get the actual number of characters converted
+ and reallocate when done.
+ */
+#endif
}
else {
PyVoidScalarObject *vobj = (PyVoidScalarObject *)obj;
@@ -5007,7 +5019,7 @@ discover_itemsize(PyObject *s, int nd, int *itemsize)
if ((nd == 0) || PyString_Check(s) || \
PyUnicode_Check(s) || PyBuffer_Check(s)) {
if PyUnicode_Check(s)
- *itemsize = MAX(*itemsize, sizeof(Py_UNICODE)*n);
+ *itemsize = MAX(*itemsize, 4*n);
else
*itemsize = MAX(*itemsize, n);
return 0;
@@ -5289,7 +5301,7 @@ Array_FromScalar(PyObject *op, PyArray_Descr *typecode)
if (itemsize == 0 && PyTypeNum_ISEXTENDED(type)) {
itemsize = PyObject_Length(op);
- if (type == PyArray_UNICODE) itemsize *= sizeof(Py_UNICODE);
+ if (type == PyArray_UNICODE) itemsize *= 4;
}
ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, typecode,
@@ -5357,7 +5369,7 @@ Array_FromSequence(PyObject *s, PyArray_Descr *typecode, int fortran,
if (itemsize == 0 && PyTypeNum_ISEXTENDED(type)) {
if (discover_itemsize(s, nd, &itemsize) == -1) goto fail;
- if (type == PyArray_UNICODE) itemsize*=sizeof(Py_UNICODE);
+ if (type == PyArray_UNICODE) itemsize*=4;
}
if (itemsize != typecode->elsize) {
@@ -5529,10 +5541,10 @@ PyArray_CastToType(PyArrayObject *mp, PyArray_Descr *at, int fortran)
if (at == NULL) return NULL;
if (mpd->type_num == PyArray_STRING && \
at->type_num == PyArray_UNICODE)
- at->elsize = mpd->elsize*sizeof(Py_UNICODE);
+ at->elsize = mpd->elsize*4;
if (mpd->type_num == PyArray_UNICODE &&
at->type_num == PyArray_STRING)
- at->elsize = mpd->elsize/sizeof(Py_UNICODE);
+ at->elsize = mpd->elsize/4;
if (at->type_num == PyArray_VOID)
at->elsize = mpd->elsize;
}
@@ -5836,15 +5848,7 @@ _array_typedescr_fromstr(char *str)
break;
case PyArray_UNICODELTR:
type_num = PyArray_UNICODE;
- size *= sizeof(Py_UNICODE);
- break;
- case PyArray_UCS2LTR:
- if (sizeof(Py_UNICODE) != 2) _MY_FAIL
- type_num = PyArray_UNICODE;
- break;
- case PyArray_UCS4LTR:
- if (sizeof(Py_UNICODE) != 4) _MY_FAIL
- type_num = PyArray_UNICODE;
+ size *= 4;
break;
case 'V':
type_num = PyArray_VOID;
@@ -6412,7 +6416,7 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to)
ret = (from->elsize <= to->elsize);
}
else if (totype == PyArray_UNICODE) {
- ret = (from->elsize * sizeof(Py_UNICODE)\
+ ret = (from->elsize * 4 \
<= to->elsize);
}
}
diff --git a/numpy/core/src/arraytypes.inc.src b/numpy/core/src/arraytypes.inc.src
index 10a43cd89..6a81f7e09 100644
--- a/numpy/core/src/arraytypes.inc.src
+++ b/numpy/core/src/arraytypes.inc.src
@@ -218,18 +218,21 @@ static PyObject *
UNICODE_getitem(char *ip, PyArrayObject *ap)
{
PyObject *obj;
- size_t size = sizeof(Py_UNICODE);
int mysize;
- Py_UNICODE *dptr;
+ PyArray_UCS4 *dptr;
- mysize = ap->descr->elsize / size;
- dptr = (Py_UNICODE *)ip + mysize-1;
+ mysize = ap->descr->elsize / 4
+ dptr = (PyArray_UCS4 *)ip + mysize-1;
while(mysize > 0 && *dptr-- == 0) mysize--;
- obj = PyUnicode_FromUnicode((const Py_UNICODE *)ip, mysize);
if (!PyArray_ISNOTSWAPPED(ap) && (obj != NULL)) {
- byte_swap_vector(PyUnicode_AS_UNICODE(obj),
- mysize, size);
+ byte_swap_vector(PyArray_BYTES(ap), mysize, 4);
}
+#ifdef Py_UNICODE_WIDE
+ obj = PyUnicode_FromUnicode((const PyArray_UCS4 *)ip, mysize);
+#else
+ obj = MyPyUnicode_FromUCS4((const PyArray_UCS4 *)ip, mysize);
+#endif
+
return obj;
}
@@ -237,12 +240,11 @@ static int
UNICODE_setitem(PyObject *op, char *ov, PyArrayObject *ap)
{
PyObject *temp;
- Py_UNICODE *ptr;
+ PyArray_UCS4 *ptr;
int datalen;
- size_t size = sizeof(Py_UNICODE);
+ size_t size = sizeof(PyArray_UCS4);
if ((temp=PyObject_Unicode(op)) == NULL) return -1;
-
ptr = PyUnicode_AS_UNICODE(temp);
if ((ptr == NULL) || (PyErr_Occurred())) {
Py_DECREF(temp);
@@ -250,14 +252,18 @@ UNICODE_setitem(PyObject *op, char *ov, PyArrayObject *ap)
}
datalen = PyUnicode_GET_DATA_SIZE(op);
+#ifdef Py_UNICODE_WIDE
memcpy(ov, ptr, MIN(ap->descr->elsize, datalen));
+#else
+ MyPyUnicode_AsUCS4(ov, ptr, MIN(ap->descr->elsize, datalen));
+#endif
/* Fill in the rest of the space with 0 */
if (ap->descr->elsize > datalen) {
memset(ov + datalen, 0, (ap->descr->elsize - datalen));
}
if (!PyArray_ISNOTSWAPPED(ap))
- byte_swap_vector(ov, ap->descr->elsize / size, size);
+ byte_swap_vector(ov, ap->descr->elsize / 4, 4)
Py_DECREF(temp);
return 0;
@@ -1171,7 +1177,7 @@ VOID_copyswapn (char *dst, char *src, intp n, int swap, int itemsize)
static void
UNICODE_copyswapn (char *dst, char *src, intp n, int swap, int itemsize)
{
- int size = sizeof(Py_UNICODE);
+ int size = sizeof(PyArray_UCS4);
if (src != NULL)
memcpy(dst, src, itemsize * n);
@@ -1216,7 +1222,7 @@ VOID_copyswap (char *dst, char *src, int swap, int itemsize)
static void
UNICODE_copyswap (char *dst, char *src, int swap, int itemsize)
{
- int size = sizeof(Py_UNICODE);
+ int size = sizeof(PyArray_UCS4);
if (src != NULL)
memcpy(dst, src, itemsize);
@@ -1310,15 +1316,24 @@ STRING_nonzero (char *ip, PyArrayObject *ap)
return nonz;
}
+#ifdef Py_UNICODE_WIDE
+#define PyArray_UCS4_ISSPACE Py_UNICODE_ISSPACE
+#else
+#define PyArray_UCS4_ISSPACE(x) FALSE
+#endif
+
+/* fixme: This must deal with unaligned and byte-swapped data
+ and what-to do for UCS2-builds
+ */
static Bool
-UNICODE_nonzero (Py_UNICODE *ip, PyArrayObject *ap)
+UNICODE_nonzero (PyArray_UCS4 *ip, PyArrayObject *ap)
{
int len = ap->descr->elsize >> 1;
int i;
Bool nonz = FALSE;
for (i=0; i<len; i++) {
- if (!Py_UNICODE_ISSPACE(*ip)) {
+ if (!PyArray_UCS4_ISSPACE(*ip)) {
nonz = TRUE;
break;
}
@@ -1333,8 +1348,7 @@ OBJECT_nonzero (PyObject **ip, PyArrayObject *ap)
return (Bool) PyObject_IsTrue(*ip);
}
-/* If subclass has _nonzero method call it with buffer
- object wrapping current item. Otherwise, just compare with '\0'.
+/* fixme: if we have fields, then nonzero if all sub-fields are nonzero.
*/
static Bool
VOID_nonzero (char *ip, PyArrayObject *ap)
@@ -1407,11 +1421,11 @@ STRING_compare(char *ip1, char *ip2, PyArrayObject *ap)
/* taken from Python */
static int
-UNICODE_compare(register Py_UNICODE *ip1, register Py_UNICODE *ip2,
+UNICODE_compare(register PyArray_UCS4 *ip1, register PyArray_UCS4 *ip2,
PyArrayObject *ap)
{
register int itemsize=ap->descr->elsize;
- register Py_UNICODE c1, c2;
+ register PyArray_UCS4 c1, c2;
if (itemsize < 0) return 0;
@@ -1475,7 +1489,7 @@ OBJECT_argmax(PyObject **ip, intp n, intp *max_ind, PyArrayObject *aip)
/**begin repeat
#fname= STRING, UNICODE#
-#type= char, Py_UNICODE#
+#type= char, PyArray_UCS4#
*/
static int
@@ -1667,7 +1681,7 @@ static void
/**begin repeat
#from= VOID, STRING, UNICODE#
-#align= char, char, Py_UNICODE#
+#align= char, char, PyArray_UCS4#
#NAME= Void, String, Unicode#
#endian= |, |, =#
*/
@@ -1893,10 +1907,10 @@ set_typeinfo(PyObject *dict)
@name@_Descr.fields = Py_None;
/**end repeat**/
- if (sizeof(Py_UNICODE) == 2) {
+ if (sizeof(PyArray_UCS4) == 2) {
UNICODE_Descr.kind = PyArray_UCS2LTR;
}
- else if (sizeof(Py_UNICODE) == 4) {
+ else if (sizeof(PyArray_UCS4) == 4) {
UNICODE_Descr.kind = PyArray_UCS4LTR;
}
@@ -1966,7 +1980,7 @@ set_typeinfo(PyObject *dict)
PyDict_SetItemString(infodict, "UNICODE",
s=Py_BuildValue("ciiiO", PyArray_UNICODELTR,
PyArray_UNICODE, 0,
- _ALIGN(Py_UNICODE),
+ _ALIGN(PyArray_UCS4),
(PyObject *)\
&PyUnicodeArrType_Type));
Py_DECREF(s);
diff --git a/numpy/core/src/multiarraymodule.c b/numpy/core/src/multiarraymodule.c
index 3bad80cbf..ddacb68c9 100644
--- a/numpy/core/src/multiarraymodule.c
+++ b/numpy/core/src/multiarraymodule.c
@@ -3885,19 +3885,7 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
the number of bytes.
*/
else if (check_num == PyArray_UNICODELTR) {
- elsize *= sizeof(Py_UNICODE);
- }
- else if (check_num == PyArray_UCS2LTR || \
- check_num == PyArray_UCS4LTR) {
- if ((elsize % sizeof(Py_UNICODE) != 0) ||
- ((check_num == PyArray_UCS4LTR) && \
- (sizeof(Py_UNICODE) != 4)) ||
- ((check_num == PyArray_UCS2LTR) && \
- (sizeof(Py_UNICODE) != 2))) {
- PyErr_SetString(PyExc_TypeError,
- "unsupported unicode format");
- return PY_FAIL;
- }
+ elsize *= 4;
}
/* Support for generic processing
c4, i4, f8, etc...
diff --git a/numpy/core/src/scalartypes.inc.src b/numpy/core/src/scalartypes.inc.src
index 1f32e1d67..58f9bebb2 100644
--- a/numpy/core/src/scalartypes.inc.src
+++ b/numpy/core/src/scalartypes.inc.src
@@ -182,7 +182,12 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode)
break;
case PyArray_UNICODE:
memptr = (char *)PyUnicode_AS_DATA(scalar);
+#ifdef Py_UNICODE_WIDE
break;
+#else:
+ PyUCS2Unicode_AsUCS4(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r));
+ goto finish;
+#endif
default:
if (PyTypeNum_ISEXTENDED(typecode->type_num)) {
memptr = (((PyVoidScalarObject *)scalar)->obval);
@@ -197,7 +202,8 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode)
if (PyArray_ISOBJECT(r)) {
Py_INCREF(*((PyObject **)memptr));
}
-
+
+ finish:
if (outcode == NULL) return r;
if (outcode->type_num == typecode->type_num) {