diff options
-rw-r--r-- | doc/changelog/1.15.0-changelog.rst | 2 | ||||
-rw-r--r-- | doc/release/1.17.0-notes.rst | 17 | ||||
-rw-r--r-- | doc/source/reference/c-api.array.rst | 37 | ||||
-rw-r--r-- | numpy/core/_add_newdocs.py | 15 | ||||
-rw-r--r-- | numpy/core/fromnumeric.py | 36 | ||||
-rw-r--r-- | numpy/core/include/numpy/ndarraytypes.h | 11 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/conversion_utils.c | 21 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 20 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 6 |
10 files changed, 105 insertions, 64 deletions
diff --git a/doc/changelog/1.15.0-changelog.rst b/doc/changelog/1.15.0-changelog.rst index b76b9699a..4e3d3680b 100644 --- a/doc/changelog/1.15.0-changelog.rst +++ b/doc/changelog/1.15.0-changelog.rst @@ -374,7 +374,7 @@ A total of 438 pull requests were merged for this release. * `#10778 <https://github.com/numpy/numpy/pull/10778>`__: BUG: test, fix for missing flags['WRITEBACKIFCOPY'] key * `#10781 <https://github.com/numpy/numpy/pull/10781>`__: ENH: NEP index builder * `#10785 <https://github.com/numpy/numpy/pull/10785>`__: DOC: Fixed author name in reference to book -* `#10786 <https://github.com/numpy/numpy/pull/10786>`__: ENH: Add "stablesort" option to inp.sort as an alias for "mergesort". +* `#10786 <https://github.com/numpy/numpy/pull/10786>`__: ENH: Add "stable" option to np.sort as an alias for "mergesort". * `#10790 <https://github.com/numpy/numpy/pull/10790>`__: TST: Various fixes prior to switching to pytest * `#10795 <https://github.com/numpy/numpy/pull/10795>`__: BUG: Allow spaces in output string of einsum * `#10796 <https://github.com/numpy/numpy/pull/10796>`__: BUG: fix wrong inplace vectorization on overlapping arguments diff --git a/doc/release/1.17.0-notes.rst b/doc/release/1.17.0-notes.rst index ab09f85bd..d3a7779c8 100644 --- a/doc/release/1.17.0-notes.rst +++ b/doc/release/1.17.0-notes.rst @@ -54,14 +54,15 @@ identity, it is necessary to also pass in an initial value (e.g., ``initial=np.inf`` for ``np.min``). For instance, the equivalent of ``nansum`` would be, ``np.sum(a, where=~np.isnan(a))``. -Timsort is added to available sorting algorithms and is now the default stable sort ------------------------------------------------------------------------------------ -The ``kind`` keyword argument for ``sort`` and ``argsort`` now accepts ``'t'`` -or ``'timsort'`` for Timsort. The ``stable`` option of ``kind`` argument is now -mapped to Timsort. Timsort features great performace -on already or nearly sorted data and resembles Mergesort on random data. -The algorithm is stable and requires O(n/2) additional space. -For details of the algorithm, refer to +Timsort has replaced mergesort as the stable sorting implementation +------------------------------------------------------------------- +Timsort has been implemented and is now used in place of mergesort. Due to the +need to maintain backward compatibility, the sorting ``kind`` options ``"stable"`` +and ``"mergesort"`` have been made aliases of each other with the actual sort +implementation used a function of the array type. Timsort features improved +performace on already or nearly sorted data and performs like mergesort on +random data. The algorithm is stable and requires O(n/2) working space. For +details of the algorithm, refer to `CPython listsort.txt <https://github.com/python/cpython/blob/3.7/Objects/listsort.txt>`_. ``np.linalg.svd`` and ``np.linalg.pinv`` can be faster on hermitian inputs diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst index cf3c10e3b..44d09a9fe 100644 --- a/doc/source/reference/c-api.array.rst +++ b/doc/source/reference/c-api.array.rst @@ -1906,20 +1906,21 @@ Item selection and manipulation .. c:function:: PyObject* PyArray_Sort(PyArrayObject* self, int axis, NPY_SORTKIND kind) - Equivalent to :meth:`ndarray.sort<numpy.ndarray.sort>` (*self*, *axis*, *kind*). Return an array with - the items of *self* sorted along *axis*.Array is sorted according to *kind* which is an integer/enum pointing to the type of sorting algorithms used. + Equivalent to :meth:`ndarray.sort<numpy.ndarray.sort>` (*self*, *axis*, *kind*). + Return an array with the items of *self* sorted along *axis*. The array + is sorted using the algorithm denoted by *kind* , which is an integer/enum pointing + to the type of sorting algorithms used. .. c:function:: PyObject* PyArray_ArgSort(PyArrayObject* self, int axis) - Equivalent to :meth:`ndarray.argsort<numpy.ndarray.argsort>` (*self*, *axis*). Return an array of - indices such that selection of these indices along the given - ``axis`` would return a sorted version of *self*. If *self* - ->descr is a data-type with fields defined, then - self->descr->names is used to determine the sort order. A - comparison where the first field is equal will use the second - field and so on. To alter the sort order of a structured array, create - a new data-type with a different order of names and construct a - view of the array with that new data-type. + Equivalent to :meth:`ndarray.argsort<numpy.ndarray.argsort>` (*self*, *axis*). + Return an array of indices such that selection of these indices + along the given ``axis`` would return a sorted version of *self*. If *self* ->descr + is a data-type with fields defined, then self->descr->names is used + to determine the sort order. A comparison where the first field is equal + will use the second field and so on. To alter the sort order of a + structured array, create a new data-type with a different order of names + and construct a view of the array with that new data-type. .. c:function:: PyObject* PyArray_LexSort(PyObject* sort_keys, int axis) @@ -2989,8 +2990,10 @@ to. Convert Python strings into one of :c:data:`NPY_QUICKSORT` (starts with 'q' or 'Q'), :c:data:`NPY_HEAPSORT` (starts with 'h' or 'H'), - :c:data:`NPY_MERGESORT` (starts with 'm' or 'M') or :c:data:`NPY_TIMSORT` - (starts with 't' or 'T'). + :c:data:`NPY_MERGESORT` (starts with 'm' or 'M') or :c:data:`NPY_STABLESORT` + (starts with 't' or 'T'). :c:data:`NPY_MERGESORT` and :c:data:`NPY_STABLESORT` + are aliased to each other for backwards compatibility and may refer to one + of several stable sorting algorithms depending on the data type. .. c:function:: int PyArray_SearchsideConverter( \ PyObject* obj, NPY_SEARCHSIDE* side) @@ -3534,11 +3537,15 @@ Enumerated Types A special variable-type which can take on the values :c:data:`NPY_{KIND}` where ``{KIND}`` is - **QUICKSORT**, **HEAPSORT**, **MERGESORT**, **TIMSORT** + **QUICKSORT**, **HEAPSORT**, **MERGESORT**, **STABLESORT** .. c:var:: NPY_NSORTS - Defined to be the number of sorts. + Defined to be the number of sorts. It is fixed at three by the need for + backwards compatibility, and consequently :c:data:`NPY_MERGESORT` and + :c:data:`NPY_STABLESORT` are aliased to each other and may refer to one + of several stable sorting algorithms depending on the data type. + .. c:type:: NPY_SCALARKIND diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index ce23b2a16..159da0121 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -3790,15 +3790,22 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('sort', """ a.sort(axis=-1, kind='quicksort', order=None) - Sort an array, in-place. + Sort an array in-place. Refer to `numpy.sort` for full documentation. Parameters ---------- axis : int, optional Axis along which to sort. Default is -1, which means sort along the last axis. - kind : {'quicksort', 'mergesort', 'heapsort', 'timsort', 'stable'}, optional - Sorting algorithm. Default is 'quicksort'. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. The default is 'quicksort'. Note that both 'stable' + and 'mergesort' use timsort under the covers and, in general, the + actual implementation will vary with datatype. The 'mergesort' option + is retained for backwards compatibility. + + .. versionchanged:: 1.15.0. + The 'stable' option was added. + order : str or list of str, optional When `a` is an array with fields defined, this argument specifies which fields to compare first, second, etc. A single field can @@ -3816,7 +3823,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('sort', Notes ----- - See ``sort`` for notes on the different sorting algorithms. + See `numpy.sort` for notes on the different sorting algorithms. Examples -------- diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index f336ae248..cdb6c4bed 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -828,8 +828,15 @@ def sort(a, axis=-1, kind='quicksort', order=None): axis : int or None, optional Axis along which to sort. If None, the array is flattened before sorting. The default is -1, which sorts along the last axis. - kind : {'quicksort', 'mergesort', 'heapsort', 'timsort', 'stable'}, optional - Sorting algorithm. Default is 'quicksort'. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. The default is 'quicksort'. Note that both 'stable' + and 'mergesort' use timsort under the covers and, in general, the + actual implementation will vary with data type. The 'mergesort' option + is retained for backwards compatibility. + + .. versionchanged:: 1.15.0. + The 'stable' option was added. + order : str or list of str, optional When `a` is an array with fields defined, this argument specifies which fields to compare first, second, etc. A single field can @@ -855,18 +862,22 @@ def sort(a, axis=-1, kind='quicksort', order=None): The various sorting algorithms are characterized by their average speed, worst case performance, work space size, and whether they are stable. A stable sort keeps items with the same key in the same relative - order. The four available algorithms have the following + order. The four algorithms implemented in NumPy have the following properties: =========== ======= ============= ============ ======== kind speed worst case work space stable =========== ======= ============= ============ ======== 'quicksort' 1 O(n^2) 0 no - 'mergesort' 2 O(n*log(n)) ~n/2 yes 'heapsort' 3 O(n*log(n)) 0 no + 'mergesort' 2 O(n*log(n)) ~n/2 yes 'timsort' 2 O(n*log(n)) ~n/2 yes =========== ======= ============= ============ ======== + .. note:: The datatype determines which of 'mergesort' or 'timsort' + is actually used, even if 'mergesort' is specified. User selection + at a finer scale is not currently available. + All the sort algorithms make temporary copies of the data when sorting along any but the last axis. Consequently, sorting along the last axis is faster and uses less space than sorting along @@ -895,7 +906,10 @@ def sort(a, axis=-1, kind='quicksort', order=None): worst case O(n*log(n)). 'stable' automatically choses the best stable sorting algorithm - for the data type being sorted. It is currently mapped to timsort. + for the data type being sorted. It, along with 'mergesort' is + currently mapped to timsort. API forward compatibility currently limits the + ability to select the implementation and it is hardwired for the different + data types. .. versionadded:: 1.17.0 Timsort is added for better performance on already or nearly @@ -967,8 +981,16 @@ def argsort(a, axis=-1, kind='quicksort', order=None): axis : int or None, optional Axis along which to sort. The default is -1 (the last axis). If None, the flattened array is used. - kind : {'quicksort', 'mergesort', 'heapsort', 'timsort', 'stable'}, optional - Sorting algorithm. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional + Sorting algorithm. The default is 'quicksort'. Note that both 'stable' + and 'mergesort' use timsort under the covers and, in general, the + actual implementation will vary with data type. The 'mergesort' option + is retained for backwards compatibility. + + .. versionchanged:: 1.15.0. + The 'stable' option was added. + + order : str or list of str, optional When `a` is an array with fields defined, this argument specifies which fields to compare first, second, etc. A single field can diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 62895049c..1221aeece 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -156,13 +156,20 @@ enum NPY_TYPECHAR { NPY_COMPLEXLTR = 'c' }; +/* + * Changing this may break Numpy API compatibility + * due to changing offsets in PyArray_ArrFuncs, so be + * careful. Here we have reused the mergesort slot for + * any kind of stable sort, the actual implementation will + * depend on the data type. + */ typedef enum { NPY_QUICKSORT=0, NPY_HEAPSORT=1, NPY_MERGESORT=2, - NPY_TIMSORT=3, + NPY_STABLESORT=2, } NPY_SORTKIND; -#define NPY_NSORTS (NPY_TIMSORT + 1) +#define NPY_NSORTS (NPY_STABLESORT + 1) typedef enum { diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index ddef0de52..49819ca4a 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -4320,13 +4320,11 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { { quicksort_@suff@, heapsort_@suff@, - mergesort_@suff@, timsort_@suff@ }, { aquicksort_@suff@, aheapsort_@suff@, - amergesort_@suff@, atimsort_@suff@ }, #else @@ -4463,13 +4461,11 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { { quicksort_@suff@, heapsort_@suff@, - mergesort_@suff@, timsort_@suff@ }, { aquicksort_@suff@, aheapsort_@suff@, - amergesort_@suff@, atimsort_@suff@ }, #else diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c index 437ea78d8..fa8de8b37 100644 --- a/numpy/core/src/multiarray/conversion_utils.c +++ b/numpy/core/src/multiarray/conversion_utils.c @@ -419,16 +419,23 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind) *sortkind = NPY_HEAPSORT; } else if (str[0] == 'm' || str[0] == 'M') { - *sortkind = NPY_MERGESORT; - } - else if (str[0] == 't' || str[0] == 'T'){ - *sortkind = NPY_TIMSORT; + /* + * Mergesort is an alias for NPY_STABLESORT. + * That maintains backwards compatibility while + * allowing other types of stable sorts to be used. + */ + *sortkind = NPY_STABLESORT; } else if (str[0] == 's' || str[0] == 'S') { - /* available options: mergesort and timsort - * among which timsort is assumed to be better + /* + * NPY_STABLESORT is one of + * + * - mergesort + * - timsort + * + * Which one is used depends on the data type. */ - *sortkind = NPY_TIMSORT; + *sortkind = NPY_STABLESORT; } else { PyErr_Format(PyExc_ValueError, diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 7f560363c..4888224f3 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1132,10 +1132,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) case NPY_HEAPSORT: sort = npy_heapsort; break; - case NPY_MERGESORT: - sort = npy_mergesort; - break; - case NPY_TIMSORT: + case NPY_STABLESORT: sort = npy_timsort; break; } @@ -1286,10 +1283,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) case NPY_HEAPSORT: argsort = npy_aheapsort; break; - case NPY_MERGESORT: - argsort = npy_amergesort; - break; - case NPY_TIMSORT: + case NPY_STABLESORT: argsort = npy_atimsort; break; } @@ -1431,7 +1425,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis) goto fail; } } - if (!PyArray_DESCR(mps[i])->f->argsort[NPY_MERGESORT] + if (!PyArray_DESCR(mps[i])->f->argsort[NPY_STABLESORT] && !PyArray_DESCR(mps[i])->f->compare) { PyErr_Format(PyExc_TypeError, "item %zd type does not have compare function", i); @@ -1527,9 +1521,9 @@ PyArray_LexSort(PyObject *sort_keys, int axis) int rcode; elsize = PyArray_DESCR(mps[j])->elsize; astride = PyArray_STRIDES(mps[j])[axis]; - argsort = PyArray_DESCR(mps[j])->f->argsort[NPY_MERGESORT]; + argsort = PyArray_DESCR(mps[j])->f->argsort[NPY_STABLESORT]; if(argsort == NULL) { - argsort = npy_amergesort; + argsort = npy_atimsort; } _unaligned_strided_byte_copy(valbuffer, (npy_intp) elsize, its[j]->dataptr, astride, N, elsize); @@ -1566,9 +1560,9 @@ PyArray_LexSort(PyObject *sort_keys, int axis) } for (j = 0; j < n; j++) { int rcode; - argsort = PyArray_DESCR(mps[j])->f->argsort[NPY_MERGESORT]; + argsort = PyArray_DESCR(mps[j])->f->argsort[NPY_STABLESORT]; if(argsort == NULL) { - argsort = npy_amergesort; + argsort = npy_atimsort; } rcode = argsort(its[j]->dataptr, (npy_intp *)rit->dataptr, N, mps[j]); diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 62184de4e..cf197df38 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -1395,10 +1395,10 @@ class TestZeroSizeFlexible(object): sort_func(zs, kind=kind, **kwargs) def test_sort(self): - self._test_sort_partition('sort', kinds='qhmt') + self._test_sort_partition('sort', kinds='qhs') def test_argsort(self): - self._test_sort_partition('argsort', kinds='qhmt') + self._test_sort_partition('argsort', kinds='qhs') def test_partition(self): self._test_sort_partition('partition', kinds=['introselect'], kth=2) @@ -1450,7 +1450,7 @@ class TestZeroSizeFlexible(object): class TestMethods(object): - sort_kinds = [r'm', 'q', 'h', 't'] + sort_kinds = ['quicksort', 'heapsort', 'stable'] def test_compress(self): tgt = [[5, 6, 7, 8, 9]] |