summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTravis Oliphant <oliphant@enthought.com>2006-01-01 02:25:37 +0000
committerTravis Oliphant <oliphant@enthought.com>2006-01-01 02:25:37 +0000
commit3313204a24a621bb6bb5c0099d8b5e150640cb95 (patch)
tree86e0afbfa42b6052f01be4c8b5f35a8917758df2
parent53d748f85bbf72268ea262695e8c16857ab8d566 (diff)
downloadnumpy-3313204a24a621bb6bb5c0099d8b5e150640cb95.tar.gz
Added mergesorts for STRING and UNICODE and exposed lexsort
-rw-r--r--scipy/base/numeric.py2
-rw-r--r--scipy/base/src/_sortmodule.c.src92
-rw-r--r--scipy/base/src/arraytypes.inc.src36
3 files changed, 111 insertions, 19 deletions
diff --git a/scipy/base/numeric.py b/scipy/base/numeric.py
index e91a82681..03a2e520f 100644
--- a/scipy/base/numeric.py
+++ b/scipy/base/numeric.py
@@ -1,7 +1,7 @@
__all__ = ['newaxis', 'ndarray', 'bigndarray', 'flatiter', 'ufunc',
'arange', 'array', 'zeros', 'empty', 'broadcast', 'dtypedescr',
'fromstring', 'fromfile', 'frombuffer','newbuffer','getbuffer',
- 'where', 'concatenate', 'fastCopyAndTranspose',
+ 'where', 'concatenate', 'fastCopyAndTranspose', 'lexsort',
'register_dtype', 'set_numeric_ops', 'can_cast',
'asarray', 'asanyarray', 'isfortran', 'zeros_like', 'empty_like',
'correlate', 'convolve', 'inner', 'dot', 'outer', 'vdot',
diff --git a/scipy/base/src/_sortmodule.c.src b/scipy/base/src/_sortmodule.c.src
index 40a1b3d3b..35060eea7 100644
--- a/scipy/base/src/_sortmodule.c.src
+++ b/scipy/base/src/_sortmodule.c.src
@@ -11,6 +11,10 @@
/* Quick sort is usually the fastest, but the worst case scenario can
be slower than the merge and heap sorts. The merge sort requires
extra memory and so for large arrays may not be useful.
+
+ Both the heap and merge sorts are *stable* meaning that equal components
+ are unmoved from their entry versions and so can be used to
+ implement lexigraphic sorting on multiple keys.
*/
@@ -342,6 +346,87 @@ static int
return 0;
}
/**end repeat**/
+
+static int
+unincmp(Py_UNICODE *s1, Py_UNICODE *s2, register int len)
+{
+ register Py_UNICODE c1, c2;
+ while(len-- > 0) {
+ c1 = *s1++;
+ c2 = *s2++;
+ if (c1 != c2)
+ return (c1 < c2) ? -1 : 1;
+ }
+ return 0;
+}
+
+/**begin repeat
+#TYPE=STRING,UNICODE#
+#comp=strncmp,unincmp#
+#type=char *, Py_UNICODE *#
+*/
+static void
+@TYPE@_amergesort0(intp *pl, intp *pr, @type@*v, intp *pw, int elsize)
+{
+ @type@ vp;
+ intp vi, *pi, *pj, *pk, *pm;
+
+ if (pr - pl > 20) {
+ /* merge sort */
+ pm = pl + ((pr - pl + 1)>>1);
+ @TYPE@_amergesort0(pl,pm-1,v,pw,elsize);
+ @TYPE@_amergesort0(pm,pr,v,pw,elsize);
+ for(pi = pw, pj = pl; pj < pm; ++pi, ++pj) {
+ *pi = *pj;
+ }
+ for(pk = pw, pm = pl; pk < pi && pj <= pr; ++pm) {
+ if (@comp@(v[*pk],v[*pj],elsize)<=0) {
+ *pm = *pk;
+ ++pk;
+ }else{
+ *pm = *pj;
+ ++pj;
+ }
+ }
+ for(; pk < pi; ++pm, ++pk) {
+ *pm = *pk;
+ }
+ }else{
+ /* insertion sort */
+ for(pi = pl + 1; pi <= pr; ++pi) {
+ vi = *pi;
+ vp = v[vi];
+ for(pj = pi, pk = pi - 1; \
+ pj > pl && (@comp@(vp, v[*pk],elsize)<=0); \
+ --pj, --pk) {
+ *pj = *pk;
+ }
+ *pj = vi;
+ }
+ }
+}
+
+static int
+@TYPE@_amergesort(@type@*v, intp *tosort, intp num, PyArrayObject *arr)
+{
+ intp *pl, *pr, *pw;
+ int elsize;
+
+ elsize = arr->descr->elsize;
+
+ pl = tosort; pr = pl + num - 1;
+ pw = PyDimMem_NEW((1+num/2));
+
+ if (!pw) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ @TYPE@_amergesort0(pl, pr, v, pw, elsize);
+ PyDimMem_FREE(pw);
+ return 0;
+}
+/**end repeat**/
static void
add_sortfuncs(void)
@@ -365,6 +450,13 @@ add_sortfuncs(void)
descr->f->argsort[PyArray_MERGESORT] = \
(PyArray_ArgSortFunc *)@TYPE@_amergesort;
/**end repeat**/
+
+ descr = PyArray_DescrFromType(PyArray_STRING);
+ descr->f->argsort[PyArray_MERGESORT] = \
+ (PyArray_ArgSortFunc *)STRING_amergesort;
+ descr = PyArray_DescrFromType(PyArray_UNICODE);
+ descr->f->argsort[PyArray_MERGESORT] = \
+ (PyArray_ArgSortFunc *)UNICODE_amergesort;
}
static struct PyMethodDef methods[] = {
diff --git a/scipy/base/src/arraytypes.inc.src b/scipy/base/src/arraytypes.inc.src
index f17b514f8..322eafef8 100644
--- a/scipy/base/src/arraytypes.inc.src
+++ b/scipy/base/src/arraytypes.inc.src
@@ -1323,30 +1323,30 @@ STRING_compare(char *ip1, char *ip2, PyArrayObject *ap)
return strncmp(ip1, ip2, ap->descr->elsize);
}
+/* taken from Python */
static int
-UNICODE_compare(Py_UNICODE *ip1, Py_UNICODE *ip2, PyArrayObject *ap)
+UNICODE_compare(register Py_UNICODE *ip1, register Py_UNICODE *ip2,
+ PyArrayObject *ap)
{
- PyObject *u=NULL, *v=NULL;
- int res;
- int itemsize=ap->descr->elsize;
- if (itemsize < 0) goto fail;
- u = PyUnicode_FromUnicode(ip1, itemsize);
- v = PyUnicode_FromUnicode(ip2, itemsize);
- if ((u==NULL) || (v==NULL)) goto fail;
- res = PyObject_Compare(u, v);
- Py_DECREF(u);
- Py_DECREF(v);
- return res;
-
- fail:
- Py_XDECREF(u);
- Py_XDECREF(v);
+ register int itemsize=ap->descr->elsize;
+ register Py_UNICODE c1, c2;
+
+ if (itemsize < 0) return 0;
+
+ while(itemsize-- > 0) {
+ c1 = *ip1++;
+ c2 = *ip2++;
+
+ if (c1 != c2)
+ return (c1 < c2) ? -1 : 1;
+ }
return 0;
}
-/* redefine compare in terms of fields and subarrays if any */
+/* possibly redefine compare in terms of fields and subarrays if any */
-#define VOID_compare NULL
+/* as it is, it compares raw-bytes as it they were strings */
+#define VOID_compare STRING_compare
/****************** argfunc **********************************/