diff options
author | Travis Oliphant <oliphant@enthought.com> | 2006-01-01 02:25:37 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2006-01-01 02:25:37 +0000 |
commit | 3313204a24a621bb6bb5c0099d8b5e150640cb95 (patch) | |
tree | 86e0afbfa42b6052f01be4c8b5f35a8917758df2 | |
parent | 53d748f85bbf72268ea262695e8c16857ab8d566 (diff) | |
download | numpy-3313204a24a621bb6bb5c0099d8b5e150640cb95.tar.gz |
Added mergesorts for STRING and UNICODE and exposed lexsort
-rw-r--r-- | scipy/base/numeric.py | 2 | ||||
-rw-r--r-- | scipy/base/src/_sortmodule.c.src | 92 | ||||
-rw-r--r-- | scipy/base/src/arraytypes.inc.src | 36 |
3 files changed, 111 insertions, 19 deletions
diff --git a/scipy/base/numeric.py b/scipy/base/numeric.py index e91a82681..03a2e520f 100644 --- a/scipy/base/numeric.py +++ b/scipy/base/numeric.py @@ -1,7 +1,7 @@ __all__ = ['newaxis', 'ndarray', 'bigndarray', 'flatiter', 'ufunc', 'arange', 'array', 'zeros', 'empty', 'broadcast', 'dtypedescr', 'fromstring', 'fromfile', 'frombuffer','newbuffer','getbuffer', - 'where', 'concatenate', 'fastCopyAndTranspose', + 'where', 'concatenate', 'fastCopyAndTranspose', 'lexsort', 'register_dtype', 'set_numeric_ops', 'can_cast', 'asarray', 'asanyarray', 'isfortran', 'zeros_like', 'empty_like', 'correlate', 'convolve', 'inner', 'dot', 'outer', 'vdot', diff --git a/scipy/base/src/_sortmodule.c.src b/scipy/base/src/_sortmodule.c.src index 40a1b3d3b..35060eea7 100644 --- a/scipy/base/src/_sortmodule.c.src +++ b/scipy/base/src/_sortmodule.c.src @@ -11,6 +11,10 @@ /* Quick sort is usually the fastest, but the worst case scenario can be slower than the merge and heap sorts. The merge sort requires extra memory and so for large arrays may not be useful. + + Both the heap and merge sorts are *stable* meaning that equal components + are unmoved from their entry versions and so can be used to + implement lexigraphic sorting on multiple keys. */ @@ -342,6 +346,87 @@ static int return 0; } /**end repeat**/ + +static int +unincmp(Py_UNICODE *s1, Py_UNICODE *s2, register int len) +{ + register Py_UNICODE c1, c2; + while(len-- > 0) { + c1 = *s1++; + c2 = *s2++; + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + } + return 0; +} + +/**begin repeat +#TYPE=STRING,UNICODE# +#comp=strncmp,unincmp# +#type=char *, Py_UNICODE *# +*/ +static void +@TYPE@_amergesort0(intp *pl, intp *pr, @type@*v, intp *pw, int elsize) +{ + @type@ vp; + intp vi, *pi, *pj, *pk, *pm; + + if (pr - pl > 20) { + /* merge sort */ + pm = pl + ((pr - pl + 1)>>1); + @TYPE@_amergesort0(pl,pm-1,v,pw,elsize); + @TYPE@_amergesort0(pm,pr,v,pw,elsize); + for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) { + *pi = *pj; + } + for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) { + if (@comp@(v[*pk],v[*pj],elsize)<=0) { + *pm = *pk; + ++pk; + }else{ + *pm = *pj; + ++pj; + } + } + for(; pk < pi; ++pm, ++pk) { + *pm = *pk; + } + }else{ + /* insertion sort */ + for(pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v[vi]; + for(pj = pi, pk = pi - 1; \ + pj > pl && (@comp@(vp, v[*pk],elsize)<=0); \ + --pj, --pk) { + *pj = *pk; + } + *pj = vi; + } + } +} + +static int +@TYPE@_amergesort(@type@*v, intp *tosort, intp num, PyArrayObject *arr) +{ + intp *pl, *pr, *pw; + int elsize; + + elsize = arr->descr->elsize; + + pl = tosort; pr = pl + num - 1; + pw = PyDimMem_NEW((1+num/2)); + + if (!pw) { + PyErr_NoMemory(); + return -1; + } + + @TYPE@_amergesort0(pl, pr, v, pw, elsize); + PyDimMem_FREE(pw); + return 0; +} +/**end repeat**/ static void add_sortfuncs(void) @@ -365,6 +450,13 @@ add_sortfuncs(void) descr->f->argsort[PyArray_MERGESORT] = \ (PyArray_ArgSortFunc *)@TYPE@_amergesort; /**end repeat**/ + + descr = PyArray_DescrFromType(PyArray_STRING); + descr->f->argsort[PyArray_MERGESORT] = \ + (PyArray_ArgSortFunc *)STRING_amergesort; + descr = PyArray_DescrFromType(PyArray_UNICODE); + descr->f->argsort[PyArray_MERGESORT] = \ + (PyArray_ArgSortFunc *)UNICODE_amergesort; } static struct PyMethodDef methods[] = { diff --git a/scipy/base/src/arraytypes.inc.src b/scipy/base/src/arraytypes.inc.src index f17b514f8..322eafef8 100644 --- a/scipy/base/src/arraytypes.inc.src +++ b/scipy/base/src/arraytypes.inc.src @@ -1323,30 +1323,30 @@ STRING_compare(char *ip1, char *ip2, PyArrayObject *ap) return strncmp(ip1, ip2, ap->descr->elsize); } +/* taken from Python */ static int -UNICODE_compare(Py_UNICODE *ip1, Py_UNICODE *ip2, PyArrayObject *ap) +UNICODE_compare(register Py_UNICODE *ip1, register Py_UNICODE *ip2, + PyArrayObject *ap) { - PyObject *u=NULL, *v=NULL; - int res; - int itemsize=ap->descr->elsize; - if (itemsize < 0) goto fail; - u = PyUnicode_FromUnicode(ip1, itemsize); - v = PyUnicode_FromUnicode(ip2, itemsize); - if ((u==NULL) || (v==NULL)) goto fail; - res = PyObject_Compare(u, v); - Py_DECREF(u); - Py_DECREF(v); - return res; - - fail: - Py_XDECREF(u); - Py_XDECREF(v); + register int itemsize=ap->descr->elsize; + register Py_UNICODE c1, c2; + + if (itemsize < 0) return 0; + + while(itemsize-- > 0) { + c1 = *ip1++; + c2 = *ip2++; + + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + } return 0; } -/* redefine compare in terms of fields and subarrays if any */ +/* possibly redefine compare in terms of fields and subarrays if any */ -#define VOID_compare NULL +/* as it is, it compares raw-bytes as it they were strings */ +#define VOID_compare STRING_compare /****************** argfunc **********************************/ |