summaryrefslogtreecommitdiff
path: root/numpy/lib/recfunctions.py
diff options
context:
space:
mode:
authorDavid Cournapeau <cournape@gmail.com>2009-02-20 16:37:01 +0000
committerDavid Cournapeau <cournape@gmail.com>2009-02-20 16:37:01 +0000
commitda364a18e447c334dfa2ca5083b08e1b6a7c0d10 (patch)
tree7783575e8fe37f8e13ddfc050e9c9d94529bca8e /numpy/lib/recfunctions.py
parent745cfe4c60a59c01a8c2fd6ef68a8ca87f2180f5 (diff)
parenta271ac2e41aacab2903864cf371c4ba94cf4ec3f (diff)
downloadnumpy-da364a18e447c334dfa2ca5083b08e1b6a7c0d10.tar.gz
Merged revisions 6185-6187,6191-6221,6235-6238,6240-6241,6244,6250-6251,6253,6256,6258,6260-6261,6263,6265-6266,6268,6271,6283-6286,6291-6316,6320-6352,6354,6356,6358-6368,6370-6373,6398-6400,6410,6421-6424 via svnmerge from
http://svn.scipy.org/svn/numpy/trunk ................ r6185 | cdavid | 2008-12-22 01:19:14 +0900 (Mon, 22 Dec 2008) | 1 line Add doc sources so that sdist tarball contains them. ................ r6186 | pierregm | 2008-12-22 19:01:51 +0900 (Mon, 22 Dec 2008) | 4 lines testutils: assert_array_compare : make sure that the comparison is performed on ndarrays, and make sure we use the np version of the comparison function. core: * Try not to touch the data in unary/binary ufuncs, (including inplace) ................ r6187 | pearu | 2008-12-22 19:05:00 +0900 (Mon, 22 Dec 2008) | 1 line Fix a bug. ................ r6191 | cdavid | 2008-12-23 13:10:59 +0900 (Tue, 23 Dec 2008) | 1 line Fix typos in the comments for manifest. ................ r6192 | cdavid | 2008-12-23 13:11:12 +0900 (Tue, 23 Dec 2008) | 1 line Use msvcrt values if available for manifest generation: only there starting from python 2.6.1. ................ r6193 | pearu | 2008-12-23 18:02:15 +0900 (Tue, 23 Dec 2008) | 1 line Fix issue 964: f2py python 2.6, 2.6.1 support. ................ r6194 | pierregm | 2008-12-24 08:43:43 +0900 (Wed, 24 Dec 2008) | 12 lines testutils: * assert_equal : use assert_equal_array on records * assert_array_compare : prevent the common mask to be back-propagated to the initial input arrays. * assert_equal_array : use operator.__eq__ instead of ma.equal * assert_equal_less: use operator.__less__ instead of ma.less core: * Fixed _check_fill_value for nested flexible types * Add a ndtype option to _make_mask_descr * Fixed mask_or for nested flexible types * Fixed the printing of masked arrays w/ flexible types. ................ r6195 | cdavid | 2008-12-26 21:16:45 +0900 (Fri, 26 Dec 2008) | 1 line Update to handle numscons 0.10.0 and above. ................ r6196 | cdavid | 2008-12-26 21:36:19 +0900 (Fri, 26 Dec 2008) | 1 line Do not import msvcrt globally in mingw32compiler module, since the module is imported on all platforms. ................ r6197 | cdavid | 2008-12-26 23:39:55 +0900 (Fri, 26 Dec 2008) | 1 line Do not test for functions already tested by python configure script. ................ r6198 | cdavid | 2008-12-27 14:56:58 +0900 (Sat, 27 Dec 2008) | 1 line BUG: Add a runtime check about endianness, to detect bug 4728 in python on Mac OS X. ................ r6199 | cdavid | 2008-12-27 19:06:25 +0900 (Sat, 27 Dec 2008) | 1 line Fix some typo/syntax errors when converting dict access to a function in manifest generation. ................ r6200 | cdavid | 2008-12-27 19:15:30 +0900 (Sat, 27 Dec 2008) | 1 line BUG (#970): fix a python 2.6 bug in distutils which caused an unhelpful Error:None message when trying to build with no VS installed and without the -c mingw32 option. ................ r6201 | cdavid | 2008-12-27 19:30:49 +0900 (Sat, 27 Dec 2008) | 1 line Improve the error message when initializing compiler failed. ................ r6202 | cdavid | 2008-12-27 19:32:05 +0900 (Sat, 27 Dec 2008) | 1 line Try to initialize the msvc compiler before the general code to detect the error early. ................ r6203 | cdavid | 2008-12-27 19:43:41 +0900 (Sat, 27 Dec 2008) | 1 line BUG (#970): this commit should fix the actual bug, which albeeit linked to commir r6200, was caused in anoter code path. ................ r6204 | cdavid | 2008-12-27 19:57:05 +0900 (Sat, 27 Dec 2008) | 1 line Fix manifest generation. ................ r6205 | cdavid | 2008-12-27 20:46:08 +0900 (Sat, 27 Dec 2008) | 1 line BUG (#827): close temp file before reopning them on windows, and make sure they are not automatically deleted on close either (2.6and higher specific). ................ r6206 | cdavid | 2008-12-27 21:18:47 +0900 (Sat, 27 Dec 2008) | 1 line Do not define the union for runtime endianness detection if we don't check endianness. ................ r6207 | cdavid | 2008-12-27 22:48:52 +0900 (Sat, 27 Dec 2008) | 1 line Start working on formatting failure on 2.6: copy how python does complex formatting. ................ r6208 | cdavid | 2008-12-27 23:44:11 +0900 (Sat, 27 Dec 2008) | 1 line Fix formatting for purely imaginary complex numbers. ................ r6209 | cdavid | 2008-12-27 23:53:15 +0900 (Sat, 27 Dec 2008) | 1 line More work on formatting float. ................ r6210 | cdavid | 2008-12-27 23:59:41 +0900 (Sat, 27 Dec 2008) | 1 line Finish formatting fixes for float scalar arrays. ................ r6211 | cdavid | 2008-12-28 00:12:20 +0900 (Sun, 28 Dec 2008) | 1 line Include umath_funcs_c99 in multiarray so that we can use isinf and co macros. ................ r6212 | cdavid | 2008-12-28 01:15:04 +0900 (Sun, 28 Dec 2008) | 1 line Include config.h before our C99 math compat layer. ................ r6213 | cdavid | 2008-12-28 01:15:41 +0900 (Sun, 28 Dec 2008) | 1 line Fix formatting. ................ r6214 | cdavid | 2008-12-28 01:16:18 +0900 (Sun, 28 Dec 2008) | 1 line Do not define FMTR and FMTI macros, as those are already defined on some platforms. ................ r6215 | cdavid | 2008-12-28 01:16:52 +0900 (Sun, 28 Dec 2008) | 1 line More formatting fixes. ................ r6216 | cdavid | 2008-12-28 01:17:27 +0900 (Sun, 28 Dec 2008) | 1 line Remove undef of removed macro. ................ r6217 | cdavid | 2008-12-28 01:33:40 +0900 (Sun, 28 Dec 2008) | 1 line Do not use PyOS_ascii_formatd, as it does not handle long double correctly. ................ r6218 | cdavid | 2008-12-28 02:19:40 +0900 (Sun, 28 Dec 2008) | 1 line Try ugly hack to circumvent long double brokenness with mingw. ................ r6219 | cdavid | 2008-12-28 02:25:50 +0900 (Sun, 28 Dec 2008) | 1 line Use ugly hack for mingw long double pb with complex format function as well. ................ r6220 | cdavid | 2008-12-28 12:18:20 +0900 (Sun, 28 Dec 2008) | 1 line Revert formatting changes: ascii_formatd only works for double, so we can't use it as it is for our formatting needs. ................ r6221 | cdavid | 2008-12-28 15:44:06 +0900 (Sun, 28 Dec 2008) | 1 line Do not add doc sources through add_data_dir: it will put the docs alongside numpy, as a separate package, which is not what we want. Use the manifest instead, since that's the only way I know of to include something in sdist-generated tarballs. ................ r6235 | cdavid | 2008-12-29 16:57:52 +0900 (Mon, 29 Dec 2008) | 13 lines Merged revisions 6233-6234 via svnmerge from http://svn.scipy.org/svn/numpy/branches/fix_float_format ........ r6233 | cdavid | 2008-12-29 12:49:09 +0900 (Mon, 29 Dec 2008) | 1 line Use parametric tests for format tests so that it is clearer which type is failing. ........ r6234 | cdavid | 2008-12-29 12:49:27 +0900 (Mon, 29 Dec 2008) | 1 line Fix formatting tests: cfloat and cdouble as well as np.float and np.double are the same; make sure we test 4 bytes float. ........ ................ r6236 | cdavid | 2008-12-29 17:02:15 +0900 (Mon, 29 Dec 2008) | 1 line Add nan/inf tests for formatting. ................ r6237 | cdavid | 2008-12-29 17:26:04 +0900 (Mon, 29 Dec 2008) | 1 line Add test for real float types locale independance. ................ r6238 | cdavid | 2008-12-29 17:35:06 +0900 (Mon, 29 Dec 2008) | 1 line Clearer error messages for formatting failures. ................ r6240 | cdavid | 2008-12-30 12:48:11 +0900 (Tue, 30 Dec 2008) | 1 line Add tests for print of float types. ................ r6241 | cdavid | 2008-12-30 12:56:54 +0900 (Tue, 30 Dec 2008) | 1 line Add print tests for complex types. ................ r6244 | cdavid | 2008-12-30 13:20:48 +0900 (Tue, 30 Dec 2008) | 1 line Fix test for print: forgot to make sure the value is a float before comparing it. ................ r6250 | cdavid | 2008-12-30 14:02:28 +0900 (Tue, 30 Dec 2008) | 17 lines Merged revisions 6247-6249 via svnmerge from http://svn.scipy.org/svn/numpy/branches/fix_float_format ........ r6247 | cdavid | 2008-12-30 13:41:37 +0900 (Tue, 30 Dec 2008) | 1 line Handle 1e10 specially, as it is the limit where exp notation is shorter than decimal for single precision, but not for double (python native one). ........ r6248 | cdavid | 2008-12-30 13:47:38 +0900 (Tue, 30 Dec 2008) | 1 line Refactor a bit redirected output print test. ........ r6249 | cdavid | 2008-12-30 13:49:31 +0900 (Tue, 30 Dec 2008) | 1 line Fix test for single precision print. ........ ................ r6251 | cdavid | 2008-12-30 14:12:50 +0900 (Tue, 30 Dec 2008) | 1 line Use np.inf instead of float('inf'), as the later does not work on windows for python < 2.6. ................ r6253 | cdavid | 2008-12-30 14:15:09 +0900 (Tue, 30 Dec 2008) | 1 line Fix typo in test. ................ r6256 | cdavid | 2008-12-30 14:34:22 +0900 (Tue, 30 Dec 2008) | 1 line Special case float tests on windows: python 2.5 and below have >=3 digits in the exp. ................ r6258 | cdavid | 2008-12-30 14:42:03 +0900 (Tue, 30 Dec 2008) | 1 line Hardcode reference for inf/nan-involved values. ................ r6260 | cdavid | 2008-12-30 14:50:18 +0900 (Tue, 30 Dec 2008) | 1 line Fix more formatting tests on win32. ................ r6261 | cdavid | 2008-12-30 14:52:16 +0900 (Tue, 30 Dec 2008) | 1 line Fix some more redirected output print tests. ................ r6263 | cdavid | 2008-12-30 15:01:31 +0900 (Tue, 30 Dec 2008) | 1 line More fixes for print tests. ................ r6265 | cdavid | 2008-12-30 15:03:56 +0900 (Tue, 30 Dec 2008) | 1 line Fix typo. ................ r6266 | cdavid | 2008-12-30 15:08:06 +0900 (Tue, 30 Dec 2008) | 1 line Fix typo. ................ r6268 | cdavid | 2008-12-30 15:12:26 +0900 (Tue, 30 Dec 2008) | 1 line complex scalar arrays cannot be created from real/imag args: wrap init values in a complex. ................ r6271 | cdavid | 2008-12-30 15:32:03 +0900 (Tue, 30 Dec 2008) | 1 line Do not use dict for reference: hashing on scalar arrays does not work as I expected. ................ r6283 | ptvirtan | 2008-12-31 10:14:47 +0900 (Wed, 31 Dec 2008) | 1 line Fix #951: make tests to clean temp files properly ................ r6284 | jarrod.millman | 2009-01-01 08:25:03 +0900 (Thu, 01 Jan 2009) | 2 lines ran reindent ................ r6285 | alan.mcintyre | 2009-01-01 08:46:34 +0900 (Thu, 01 Jan 2009) | 15 lines Remove the following deprecated items from numpy.testing: - ParametricTestCase - The following arguments from numpy.testing.Tester.test(): level, verbosity, all, sys_argv, testcase_pattern - Path manipulation functions: set_package_path, set_local_path, restore_path - NumpyTestCase, NumpyTest Also separated testing parameter setup from NoseTester.test into NoseTester.prepare_test_args for use in a utility script for valgrind testing (see NumPy ticket #784). ................ r6286 | jarrod.millman | 2009-01-01 16:56:53 +0900 (Thu, 01 Jan 2009) | 2 lines add default include dir for Fedora/Red Hat (see SciPy ticket 817) ................ r6291 | cdavid | 2009-01-04 19:57:39 +0900 (Sun, 04 Jan 2009) | 1 line Do not import md5 on python >= 2.6; use hashlib instead. ................ r6292 | cdavid | 2009-01-04 20:08:16 +0900 (Sun, 04 Jan 2009) | 1 line Do not use popen* but subprocess.Popen instead. ................ r6293 | cdavid | 2009-01-04 21:03:29 +0900 (Sun, 04 Jan 2009) | 1 line Revert md5 change: hashlib.md5 is not a drop-in replacement for md5. ................ r6294 | pierregm | 2009-01-05 05:16:00 +0900 (Mon, 05 Jan 2009) | 2 lines * adapted default_fill_value for flexible datatype * fixed max/minimum_fill_value for flexible datatype ................ r6295 | stefan | 2009-01-06 06:51:18 +0900 (Tue, 06 Jan 2009) | 1 line Credit more developers. ................ r6296 | pierregm | 2009-01-06 07:52:21 +0900 (Tue, 06 Jan 2009) | 1 line *moved the printing templates out of MaskedArray.__repr__ ................ r6297 | stefan | 2009-01-06 19:09:00 +0900 (Tue, 06 Jan 2009) | 1 line Use new-style classes with multiple-inheritance to address bug in IronPython. ................ r6298 | pierregm | 2009-01-07 05:35:37 +0900 (Wed, 07 Jan 2009) | 1 line * Bugfix #961 ................ r6299 | pierregm | 2009-01-08 03:14:12 +0900 (Thu, 08 Jan 2009) | 1 line * Fixed iadd/isub/imul when the base array has no mask but the other array does ................ r6300 | pierregm | 2009-01-08 07:34:51 +0900 (Thu, 08 Jan 2009) | 3 lines * Renamed `torecords` to `toflex`, keeping `torecords` as an alias * Introduced `fromflex`, to reconstruct a masked_array from the output of `toflex` (can?\226?\128?\153t `use fromrecords` as it would clash with `numpy.ma.mrecords.fromrecords`) * Fixed a bug in MaskedBinaryOperation (#979) (wrong array broadcasting) ................ r6301 | cdavid | 2009-01-08 18:19:00 +0900 (Thu, 08 Jan 2009) | 1 line Avoid putting things into stderr when errors occurs in f2py wrappers; put all the info in the python error string instead. ................ r6302 | cdavid | 2009-01-09 00:11:32 +0900 (Fri, 09 Jan 2009) | 1 line Fix python 2.4 issue. ................ r6303 | chanley | 2009-01-09 01:30:01 +0900 (Fri, 09 Jan 2009) | 1 line Fix test_print.py function _test_locale_independance() since str(1.2) does not use the LC_NUMERIC locale to convert numbers. Fix from Mark Sienkiewicz. ................ r6304 | cdavid | 2009-01-09 04:22:21 +0900 (Fri, 09 Jan 2009) | 1 line Revert buggy test fix for locale independecce. ................ r6305 | pierregm | 2009-01-09 05:02:29 +0900 (Fri, 09 Jan 2009) | 2 lines * Add __eq__ and __ne__ for support of flexible arrays. * Fixed .filled for nested structures ................ r6306 | pierregm | 2009-01-09 06:51:04 +0900 (Fri, 09 Jan 2009) | 1 line * Remove a debugging print statement. ................ r6307 | jarrod.millman | 2009-01-09 11:14:35 +0900 (Fri, 09 Jan 2009) | 2 lines Updated license file ................ r6308 | cdavid | 2009-01-09 14:26:58 +0900 (Fri, 09 Jan 2009) | 1 line Tag formatting unit tests as known failures. ................ r6309 | jarrod.millman | 2009-01-09 17:59:29 +0900 (Fri, 09 Jan 2009) | 2 lines should be more reliable way to determine what bit platform ................ r6310 | jarrod.millman | 2009-01-09 18:14:17 +0900 (Fri, 09 Jan 2009) | 2 lines better default library paths for 64bit arch ................ r6311 | jarrod.millman | 2009-01-09 18:57:15 +0900 (Fri, 09 Jan 2009) | 2 lines simplification suggested by stefan ................ r6312 | jarrod.millman | 2009-01-09 19:02:09 +0900 (Fri, 09 Jan 2009) | 2 lines switch the order [lib,lib64] --> [lib64,lib] ................ r6313 | jarrod.millman | 2009-01-09 19:18:29 +0900 (Fri, 09 Jan 2009) | 2 lines removed unneeded import ................ r6314 | jarrod.millman | 2009-01-10 04:37:16 +0900 (Sat, 10 Jan 2009) | 2 lines can't use append an int to a string ................ r6315 | pierregm | 2009-01-10 05:18:12 +0900 (Sat, 10 Jan 2009) | 2 lines * Added flatten_structured_arrays * Fixed _get_recordarray for nested structures ................ r6316 | pierregm | 2009-01-10 10:53:05 +0900 (Sat, 10 Jan 2009) | 1 line * Add flatten_structured_array to the namespace ................ r6320 | pierregm | 2009-01-14 06:01:58 +0900 (Wed, 14 Jan 2009) | 9 lines numpy.ma.core: * introduced baseclass, sharedmask and hardmask as readonly properties of MaskedArray * docstrings update numpy.ma.extras: * docstring updates docs/reference * introduced maskedarray, maskedarray.baseclass, maskedarray.generic ................ r6321 | stefan | 2009-01-14 16:14:27 +0900 (Wed, 14 Jan 2009) | 2 lines Docstring: remove old floating point arithmetic, parallel execution and postponed import references. ................ r6322 | stefan | 2009-01-14 16:55:16 +0900 (Wed, 14 Jan 2009) | 1 line Fix printing of limits. ................ r6323 | stefan | 2009-01-14 16:56:10 +0900 (Wed, 14 Jan 2009) | 1 line Fix finfo to work on all instances, not just NumPy scalars. ................ r6324 | pierregm | 2009-01-17 09:15:15 +0900 (Sat, 17 Jan 2009) | 1 line * fixed _arraymethod.__call__ for structured arrays ................ r6325 | ptvirtan | 2009-01-18 06:24:13 +0900 (Sun, 18 Jan 2009) | 3 lines Make `trapz` accept 1-D `x` parameter for n-d `y`, even if axis != -1. Additional tests included. ................ r6326 | pierregm | 2009-01-19 17:53:53 +0900 (Mon, 19 Jan 2009) | 3 lines * renamed FlatIter to MaskedIterator * added __getitem__ to MaskedIterator ................ r6327 | pierregm | 2009-01-19 18:01:24 +0900 (Mon, 19 Jan 2009) | 2 lines * replace np.asarray by np.asanyarray in unique1d ................ r6328 | pierregm | 2009-01-19 18:04:20 +0900 (Mon, 19 Jan 2009) | 2 lines * add intersect1d, intersect1d_nu, setdiff1d, setmember1d, setxor1d, unique1d, union1d * use np.atleast1d instead of ma.atleast1d ................ r6329 | pierregm | 2009-01-20 06:22:52 +0900 (Tue, 20 Jan 2009) | 3 lines * lib : introduced _iotools * lib.io : introduced genfromtxt, ndfromtxt, mafromtxt, recfromtxt, recfromcsv. ................ r6330 | pierregm | 2009-01-22 14:37:36 +0900 (Thu, 22 Jan 2009) | 1 line * genfromtxt : if names is True, accept a line starting with a comment character as header. ................ r6331 | pierregm | 2009-01-22 14:40:25 +0900 (Thu, 22 Jan 2009) | 1 line * added recfunctions, a collection of utilities to manipulate structured arrays. ................ r6332 | pierregm | 2009-01-23 03:21:32 +0900 (Fri, 23 Jan 2009) | 2 lines * fixed a machine-dependent issue on default int ('<i4' on OS X, '<i8' on linux) ? * fixed an machine-dependent issue on argsort ? ................ r6333 | cdavid | 2009-01-24 17:02:14 +0900 (Sat, 24 Jan 2009) | 1 line Fix compilation error on 2.4. ................ r6334 | pierregm | 2009-01-27 06:04:26 +0900 (Tue, 27 Jan 2009) | 7 lines * _iotools.StringConverter : - add a _checked attribute to indicate whether the converter has been upgraded or not. - switched the default value for bool to False * io.genfromtxt: - fixed for the case where a whole column is masked: switch to bool or the common dtype (if needed) ................ r6335 | pierregm | 2009-01-27 11:46:26 +0900 (Tue, 27 Jan 2009) | 1 line * prevent MaskedBinaryOperation and DomainedBinaryOperation to shrink the mask of the output when at least one of the inputs has a mask full of False ................ r6336 | matthew.brett@gmail.com | 2009-01-30 09:26:44 +0900 (Fri, 30 Jan 2009) | 1 line New docstrings for byteorder and newbyteorder() ................ r6337 | pierregm | 2009-02-02 14:20:17 +0900 (Mon, 02 Feb 2009) | 2 lines * Added a 'autoconvert' option to stack_arrays. * Fixed 'stack_arrays' to work with fields with titles. ................ r6338 | pierregm | 2009-02-04 02:11:44 +0900 (Wed, 04 Feb 2009) | 1 line * Make sure that StringConverter.update sets the type to object if it can't define it. ................ r6339 | pierregm | 2009-02-05 05:52:36 +0900 (Thu, 05 Feb 2009) | 2 lines * test__iotools : prevent test_upgrademapper if dateutil is not installed * MaskedArray.__rmul__ : switch to multiply(self, other) ................ r6340 | pierregm | 2009-02-05 06:53:05 +0900 (Thu, 05 Feb 2009) | 1 line test_upgrademapper : got rid of the dateutil import ................ r6341 | pierregm | 2009-02-05 13:31:51 +0900 (Thu, 05 Feb 2009) | 2 lines * genfromtxt : Fixed when a dtype involving objects is explicitly given. Raise a NotImplementedError if the dtype is nested. * _iotools : make sure StringConverter gets properly initiated when a function returning a np.object is used as input parameter. ................ r6342 | alan.mcintyre | 2009-02-06 05:11:40 +0900 (Fri, 06 Feb 2009) | 6 lines Issue #957: - Fix problems with test decorators when used on test generators. - The skip/fail arguments for skipif and knownfailureif can now be either a bool or a callable that returns a bool. - Added tests for the test decorators. ................ r6343 | ptvirtan | 2009-02-06 09:27:08 +0900 (Fri, 06 Feb 2009) | 1 line doc/numpydoc: work better together with Sphinx's config option ................ r6344 | ptvirtan | 2009-02-06 09:51:41 +0900 (Fri, 06 Feb 2009) | 1 line doc: Move maskedarray docs upward in TOC ................ r6345 | oliphant | 2009-02-06 15:25:50 +0900 (Fri, 06 Feb 2009) | 1 line Avoid re-creating the sequence when there is only one field in the regular expression. ................ r6346 | oliphant | 2009-02-06 15:31:11 +0900 (Fri, 06 Feb 2009) | 1 line Removed an unneccessary return statement in a unit test. ................ r6347 | pearu | 2009-02-06 23:36:58 +0900 (Fri, 06 Feb 2009) | 1 line Fix a bug: python system_info.py failed because _pkg_config_info defined section to be None. ................ r6348 | pearu | 2009-02-06 23:38:57 +0900 (Fri, 06 Feb 2009) | 1 line Fix another bug, see last commit. ................ r6349 | pierregm | 2009-02-07 18:19:12 +0900 (Sat, 07 Feb 2009) | 2 lines MaskedArray.resize : systematically raise a TypeError exception, as a masked array never owns its data MaskedIterator : fixed to allow .flat on masked matrices ................ r6350 | pierregm | 2009-02-08 03:51:31 +0900 (Sun, 08 Feb 2009) | 1 line ................ r6351 | ptvirtan | 2009-02-10 05:18:08 +0900 (Tue, 10 Feb 2009) | 1 line Fix #955: fix errobj leak in scalarmath floating point error handling ................ r6352 | pierregm | 2009-02-10 09:42:40 +0900 (Tue, 10 Feb 2009) | 1 line * prevent modifications to the mask to be back-propagated w/ __array_wrap__ ................ r6354 | cdavid | 2009-02-10 19:44:01 +0900 (Tue, 10 Feb 2009) | 1 line Fix hyphen (patch from debian package). ................ r6356 | pierregm | 2009-02-11 10:51:28 +0900 (Wed, 11 Feb 2009) | 1 line * MaskedArray.__array_wrap__ : forces the domain (if any) to a ndarray (fill with True) ................ r6358 | oliphant | 2009-02-12 13:22:03 +0900 (Thu, 12 Feb 2009) | 1 line Add multiple-field access by making a copy of the array and filling with the selected fields. ................ r6359 | stefan | 2009-02-12 14:44:07 +0900 (Thu, 12 Feb 2009) | 2 lines Trust user's specification of MACOSX_DEPLOYMENT_TARGET [patch by Brian Granger]. ................ r6360 | cdavid | 2009-02-14 23:54:26 +0900 (Sat, 14 Feb 2009) | 1 line Merge fix_float_format branch into the trunk. ................ r6361 | cdavid | 2009-02-15 00:02:39 +0900 (Sun, 15 Feb 2009) | 1 line Fix typo in multiarray tests. ................ r6362 | cdavid | 2009-02-15 00:03:22 +0900 (Sun, 15 Feb 2009) | 1 line Remove leftover in TestIO. ................ r6363 | cdavid | 2009-02-15 02:03:51 +0900 (Sun, 15 Feb 2009) | 1 line Include C99 math compatbility layer in multiarray - isnan and co needed by numpyos.c ................ r6364 | ptvirtan | 2009-02-15 07:09:26 +0900 (Sun, 15 Feb 2009) | 1 line More add_newdocs entries, and make add_newdoc capable of adding docs also to normal Python objects. ................ r6365 | ptvirtan | 2009-02-15 07:10:24 +0900 (Sun, 15 Feb 2009) | 1 line Move (un)packbits docstrings to add_newdocs.py. Fix typos. ................ r6366 | ptvirtan | 2009-02-15 07:11:19 +0900 (Sun, 15 Feb 2009) | 1 line Document constants in numpy.doc.constants ................ r6367 | ptvirtan | 2009-02-15 07:38:32 +0900 (Sun, 15 Feb 2009) | 1 line Move numpy.lib __doc__ back to info.py; was moved to __init__.py by mistake. ................ r6368 | pierregm | 2009-02-15 07:42:29 +0900 (Sun, 15 Feb 2009) | 1 line * genfromtxt : fixed case when using explicit converters and explicit dtype. ................ r6370 | ptvirtan | 2009-02-16 00:44:47 +0900 (Mon, 16 Feb 2009) | 1 line sphinxext: clean up plot directive, and merge some features from matplotlib ................ r6371 | charris | 2009-02-18 06:19:10 +0900 (Wed, 18 Feb 2009) | 2 lines Coding style cleanups. ................ r6372 | cdavid | 2009-02-18 14:06:57 +0900 (Wed, 18 Feb 2009) | 1 line Fix some const issues in NumPyOs_ascii_strtod. ................ r6373 | charris | 2009-02-18 14:36:35 +0900 (Wed, 18 Feb 2009) | 1 line Coding style cleanups. ................ r6398 | charris | 2009-02-19 05:54:52 +0900 (Thu, 19 Feb 2009) | 2 lines Coding style cleanups. ................ r6399 | charris | 2009-02-19 09:45:14 +0900 (Thu, 19 Feb 2009) | 2 lines Coding style cleanups. ................ r6400 | charris | 2009-02-19 13:58:23 +0900 (Thu, 19 Feb 2009) | 1 line Coding style cleanups. ................ r6410 | cdavid | 2009-02-19 19:05:28 +0900 (Thu, 19 Feb 2009) | 1 line Tag known failure on win32. ................ r6421 | stefan | 2009-02-20 04:28:08 +0900 (Fri, 20 Feb 2009) | 1 line Fix tests using strptime to be Python 2.4 compatible. ................ r6422 | charris | 2009-02-20 08:25:01 +0900 (Fri, 20 Feb 2009) | 2 lines Coding style cleanups. ................ r6423 | charris | 2009-02-20 12:40:53 +0900 (Fri, 20 Feb 2009) | 1 line Coding style cleanups. ................ r6424 | cdavid | 2009-02-20 22:30:20 +0900 (Fri, 20 Feb 2009) | 1 line Unhelpful message for compaq fortran compiler. ................
Diffstat (limited to 'numpy/lib/recfunctions.py')
-rw-r--r--numpy/lib/recfunctions.py942
1 files changed, 942 insertions, 0 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
new file mode 100644
index 000000000..b3eecdc0e
--- /dev/null
+++ b/numpy/lib/recfunctions.py
@@ -0,0 +1,942 @@
+"""
+Collection of utilities to manipulate structured arrays.
+
+Most of these functions were initially implemented by John Hunter for matplotlib.
+They have been rewritten and extended for convenience.
+
+
+"""
+
+
+import itertools
+from itertools import chain as iterchain, repeat as iterrepeat, izip as iterizip
+import numpy as np
+from numpy import ndarray, recarray
+import numpy.ma as ma
+from numpy.ma import MaskedArray
+from numpy.ma.mrecords import MaskedRecords
+
+from numpy.lib._iotools import _is_string_like
+
+_check_fill_value = np.ma.core._check_fill_value
+
+__all__ = ['append_fields',
+ 'drop_fields',
+ 'find_duplicates',
+ 'get_fieldstructure',
+ 'join_by',
+ 'merge_arrays',
+ 'rec_append_fields', 'rec_drop_fields', 'rec_join',
+ 'recursive_fill_fields', 'rename_fields',
+ 'stack_arrays',
+ ]
+
+
+def recursive_fill_fields(input, output):
+ """
+ Fills fields from output with fields from input,
+ with support for nested structures.
+
+ Parameters
+ ----------
+ input : ndarray
+ Input array.
+ output : ndarray
+ Output array.
+
+ Notes
+ -----
+ * `output` should be at least the same size as `input`
+
+ Examples
+ --------
+ >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)])
+ >>> b = np.zeros((3,), dtype=a.dtype)
+ >>> recursive_fill_fields(a, b)
+ np.array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', int), ('B', float)])
+
+ """
+ newdtype = output.dtype
+ for field in newdtype.names:
+ try:
+ current = input[field]
+ except ValueError:
+ continue
+ if current.dtype.names:
+ recursive_fill_fields(current, output[field])
+ else:
+ output[field][:len(current)] = current
+ return output
+
+
+
+def get_names(adtype):
+ """
+ Returns the field names of the input datatype as a tuple.
+
+ Parameters
+ ----------
+ adtype : dtype
+ Input datatype
+
+ Examples
+ --------
+ >>> get_names(np.empty((1,), dtype=int)) is None
+ True
+ >>> get_names(np.empty((1,), dtype=[('A',int), ('B', float)]))
+ ('A', 'B')
+ >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
+ >>> get_names(adtype)
+ ('a', ('b', ('ba', 'bb')))
+ """
+ listnames = []
+ names = adtype.names
+ for name in names:
+ current = adtype[name]
+ if current.names:
+ listnames.append((name, tuple(get_names(current))))
+ else:
+ listnames.append(name)
+ return tuple(listnames) or None
+
+
+def get_names_flat(adtype):
+ """
+ Returns the field names of the input datatype as a tuple. Nested structure
+ are flattend beforehand.
+
+ Parameters
+ ----------
+ adtype : dtype
+ Input datatype
+
+ Examples
+ --------
+ >>> get_names_flat(np.empty((1,), dtype=int)) is None
+ True
+ >>> get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)]))
+ ('A', 'B')
+ >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])])
+ >>> get_names_flat(adtype)
+ ('a', 'b', 'ba', 'bb')
+ """
+ listnames = []
+ names = adtype.names
+ for name in names:
+ listnames.append(name)
+ current = adtype[name]
+ if current.names:
+ listnames.extend(get_names_flat(current))
+ return tuple(listnames) or None
+
+
+def flatten_descr(ndtype):
+ """
+ Flatten a structured data-type description.
+
+ Examples
+ --------
+ >>> ndtype = np.dtype([('a', '<i4'), ('b', [('ba', '<f8'), ('bb', '<i4')])])
+ >>> flatten_descr(ndtype)
+ (('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32')))
+
+ """
+ names = ndtype.names
+ if names is None:
+ return ndtype.descr
+ else:
+ descr = []
+ for field in names:
+ (typ, _) = ndtype.fields[field]
+ if typ.names:
+ descr.extend(flatten_descr(typ))
+ else:
+ descr.append((field, typ))
+ return tuple(descr)
+
+
+def zip_descr(seqarrays, flatten=False):
+ """
+ Combine the dtype description of a series of arrays.
+
+ Parameters
+ ----------
+ seqarrays : sequence of arrays
+ Sequence of arrays
+ flatten : {boolean}, optional
+ Whether to collapse nested descriptions.
+ """
+ newdtype = []
+ if flatten:
+ for a in seqarrays:
+ newdtype.extend(flatten_descr(a.dtype))
+ else:
+ for a in seqarrays:
+ current = a.dtype
+ names = current.names or ()
+ if len(names) > 1:
+ newdtype.append(('', current.descr))
+ else:
+ newdtype.extend(current.descr)
+ return np.dtype(newdtype).descr
+
+
+def get_fieldstructure(adtype, lastname=None, parents=None,):
+ """
+ Returns a dictionary with fields as keys and a list of parent fields as values.
+
+ This function is used to simplify access to fields nested in other fields.
+
+ Parameters
+ ----------
+ adtype : np.dtype
+ Input datatype
+ lastname : optional
+ Last processed field name (used internally during recursion).
+ parents : dictionary
+ Dictionary of parent fields (used interbally during recursion).
+
+ Examples
+ --------
+ >>> ndtype = np.dtype([('A', int),
+ ... ('B', [('BA', int),
+ ... ('BB', [('BBA', int), ('BBB', int)])])])
+ >>> get_fieldstructure(ndtype)
+ {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'],
+ 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']}
+
+ """
+ if parents is None:
+ parents = {}
+ names = adtype.names
+ for name in names:
+ current = adtype[name]
+ if current.names:
+ if lastname:
+ parents[name] = [lastname,]
+ else:
+ parents[name] = []
+ parents.update(get_fieldstructure(current, name, parents))
+ else:
+ lastparent = [_ for _ in (parents.get(lastname, []) or [])]
+ if lastparent:
+# if (lastparent[-1] != lastname):
+ lastparent.append(lastname)
+ elif lastname:
+ lastparent = [lastname,]
+ parents[name] = lastparent or []
+ return parents or None
+
+
+def _izip_fields_flat(iterable):
+ """
+ Returns an iterator of concatenated fields from a sequence of arrays,
+ collapsing any nested structure.
+ """
+ for element in iterable:
+ if isinstance(element, np.void):
+ for f in _izip_fields_flat(tuple(element)):
+ yield f
+ else:
+ yield element
+
+
+def _izip_fields(iterable):
+ """
+ Returns an iterator of concatenated fields from a sequence of arrays.
+ """
+ for element in iterable:
+ if hasattr(element, '__iter__') and not isinstance(element, basestring):
+ for f in _izip_fields(element):
+ yield f
+ elif isinstance(element, np.void) and len(tuple(element)) == 1:
+ for f in _izip_fields(element):
+ yield f
+ else:
+ yield element
+
+
+def izip_records(seqarrays, fill_value=None, flatten=True):
+ """
+ Returns an iterator of concatenated items from a sequence of arrays.
+
+ Parameters
+ ----------
+ seqarray : sequence of arrays
+ Sequence of arrays.
+ fill_value : {None, integer}
+ Value used to pad shorter iterables.
+ flatten : {True, False},
+ Whether to
+ """
+ # OK, that's a complete ripoff from Python2.6 itertools.izip_longest
+ def sentinel(counter = ([fill_value]*(len(seqarrays)-1)).pop):
+ "Yields the fill_value or raises IndexError"
+ yield counter()
+ #
+ fillers = iterrepeat(fill_value)
+ iters = [iterchain(it, sentinel(), fillers) for it in seqarrays]
+ # Should we flatten the items, or just use a nested approach
+ if flatten:
+ zipfunc = _izip_fields_flat
+ else:
+ zipfunc = _izip_fields
+ #
+ try:
+ for tup in iterizip(*iters):
+ yield tuple(zipfunc(tup))
+ except IndexError:
+ pass
+
+
+def _fix_output(output, usemask=True, asrecarray=False):
+ """
+ Private function: return a recarray, a ndarray, a MaskedArray
+ or a MaskedRecords depending on the input parameters
+ """
+ if not isinstance(output, MaskedArray):
+ usemask = False
+ if usemask:
+ if asrecarray:
+ output = output.view(MaskedRecords)
+ else:
+ output = ma.filled(output)
+ if asrecarray:
+ output = output.view(recarray)
+ return output
+
+
+def _fix_defaults(output, defaults=None):
+ """
+ Update the fill_value and masked data of `output`
+ from the default given in a dictionary defaults.
+ """
+ names = output.dtype.names
+ (data, mask, fill_value) = (output.data, output.mask, output.fill_value)
+ for (k, v) in (defaults or {}).iteritems():
+ if k in names:
+ fill_value[k] = v
+ data[k][mask[k]] = v
+ return output
+
+
+def merge_arrays(seqarrays,
+ fill_value=-1, flatten=False, usemask=True, asrecarray=False):
+ """
+ Merge arrays field by field.
+
+ Parameters
+ ----------
+ seqarrays : sequence of ndarrays
+ Sequence of arrays
+ fill_value : {float}, optional
+ Filling value used to pad missing data on the shorter arrays.
+ flatten : {False, True}, optional
+ Whether to collapse nested fields.
+ usemask : {False, True}, optional
+ Whether to return a masked array or not.
+ asrecarray : {False, True}, optional
+ Whether to return a recarray (MaskedRecords) or not.
+
+ Examples
+ --------
+ >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])))
+ masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)],
+ mask = [(False, False) (False, False) (True, False)],
+ fill_value=(999999, 1e+20)
+ dtype=[('f0', '<i4'), ('f1', '<f8')])
+ >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])),
+ ... usemask=False)
+ array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)],
+ dtype=[('f0', '<i4'), ('f1', '<f8')])
+ >>> merge_arrays((np.array([1, 2]).view([('a', int)]),
+ np.array([10., 20., 30.])),
+ usemask=False, asrecarray=True)
+ rec.array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)],
+ dtype=[('a', int), ('f1', '<f8')])
+ """
+ if (len(seqarrays) == 1):
+ seqarrays = seqarrays[0]
+ if isinstance(seqarrays, ndarray):
+ seqdtype = seqarrays.dtype
+ if (not flatten) or \
+ (zip_descr((seqarrays,), flatten=True) == seqdtype.descr):
+ seqarrays = seqarrays.ravel()
+ if not seqdtype.names:
+ seqarrays = seqarrays.view([('', seqdtype)])
+ if usemask:
+ if asrecarray:
+ return seqarrays.view(MaskedRecords)
+ return seqarrays.view(MaskedArray)
+ elif asrecarray:
+ return seqarrays.view(recarray)
+ return seqarrays
+ else:
+ seqarrays = (seqarrays,)
+ # Get the dtype
+ newdtype = zip_descr(seqarrays, flatten=flatten)
+ # Get the data and the fill_value from each array
+ seqdata = [ma.getdata(a.ravel()) for a in seqarrays]
+ seqmask = [ma.getmaskarray(a).ravel() for a in seqarrays]
+ fill_value = [_check_fill_value(fill_value, a.dtype) for a in seqdata]
+ # Make an iterator from each array, padding w/ fill_values
+ maxlength = max(len(a) for a in seqarrays)
+ for (i, (a, m, fval)) in enumerate(zip(seqdata, seqmask, fill_value)):
+ # Flatten the fill_values if there's only one field
+ if isinstance(fval, (ndarray, np.void)):
+ fmsk = ma.ones((1,), m.dtype)[0]
+ if len(fval.dtype) == 1:
+ fval = fval.item()[0]
+ fmsk = True
+ else:
+ # fval and fmsk should be np.void objects
+ fval = np.array([fval,], dtype=a.dtype)[0]
+# fmsk = np.array([fmsk,], dtype=m.dtype)[0]
+ else:
+ fmsk = True
+ nbmissing = (maxlength-len(a))
+ seqdata[i] = iterchain(a, [fval]*nbmissing)
+ seqmask[i] = iterchain(m, [fmsk]*nbmissing)
+ #
+ data = izip_records(seqdata, flatten=flatten)
+ data = tuple(data)
+ if usemask:
+ mask = izip_records(seqmask, fill_value=True, flatten=flatten)
+ mask = tuple(mask)
+ output = ma.array(np.fromiter(data, dtype=newdtype))
+ output._mask[:] = list(mask)
+ if asrecarray:
+ output = output.view(MaskedRecords)
+ else:
+ output = np.fromiter(data, dtype=newdtype)
+ if asrecarray:
+ output = output.view(recarray)
+ return output
+
+
+
+def drop_fields(base, drop_names, usemask=True, asrecarray=False):
+ """
+ Return a new array with fields in `drop_names` dropped.
+
+ Nested fields are supported.
+
+ Parameters
+ ----------
+ base : array
+ Input array
+ drop_names : string or sequence
+ String or sequence of strings corresponding to the names of the fields
+ to drop.
+ usemask : {False, True}, optional
+ Whether to return a masked array or not.
+ asrecarray : string or sequence
+ Whether to return a recarray or a mrecarray (`asrecarray=True`) or
+ a plain ndarray or masked array with flexible dtype (`asrecarray=False`)
+
+ Examples
+ --------
+ >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
+ dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])
+ >>> drop_fields(a, 'a')
+ array([((2.0, 3),), ((5.0, 6),)],
+ dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])])
+ >>> drop_fields(a, 'ba')
+ array([(1, (3,)), (4, (6,))],
+ dtype=[('a', '<i4'), ('b', [('bb', '<i4')])])
+ >>> drop_fields(a, ['ba', 'bb'])
+ array([(1,), (4,)],
+ dtype=[('a', '<i4')])
+ """
+ if _is_string_like(drop_names):
+ drop_names = [drop_names,]
+ else:
+ drop_names = set(drop_names)
+ #
+ def _drop_descr(ndtype, drop_names):
+ names = ndtype.names
+ newdtype = []
+ for name in names:
+ current = ndtype[name]
+ if name in drop_names:
+ continue
+ if current.names:
+ descr = _drop_descr(current, drop_names)
+ if descr:
+ newdtype.append((name, descr))
+ else:
+ newdtype.append((name, current))
+ return newdtype
+ #
+ newdtype = _drop_descr(base.dtype, drop_names)
+ if not newdtype:
+ return None
+ #
+ output = np.empty(base.shape, dtype=newdtype)
+ output = recursive_fill_fields(base, output)
+ return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
+
+
+def rec_drop_fields(base, drop_names):
+ """
+ Returns a new numpy.recarray with fields in `drop_names` dropped.
+ """
+ return drop_fields(base, drop_names, usemask=False, asrecarray=True)
+
+
+
+def rename_fields(base, namemapper):
+ """
+ Rename the fields from a flexible-datatype ndarray or recarray.
+
+ Nested fields are supported.
+
+ Parameters
+ ----------
+ base : ndarray
+ Input array whose fields must be modified.
+ namemapper : dictionary
+ Dictionary mapping old field names to their new version.
+
+ Examples
+ --------
+ >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))],
+ dtype=[('a', int),
+ ('b', [('ba', float), ('bb', (float, 2))])])
+ >>> rename_fields(a, {'a':'A', 'bb':'BB'})
+ array([(1, (2.0, 3)), (4, (5.0, 6))],
+ dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<i4')])])
+
+ """
+ def _recursive_rename_fields(ndtype, namemapper):
+ newdtype = []
+ for name in ndtype.names:
+ newname = namemapper.get(name, name)
+ current = ndtype[name]
+ if current.names:
+ newdtype.append((newname,
+ _recursive_rename_fields(current, namemapper)))
+ else:
+ newdtype.append((newname, current))
+ return newdtype
+ newdtype = _recursive_rename_fields(base.dtype, namemapper)
+ return base.view(newdtype)
+
+
+def append_fields(base, names, data=None, dtypes=None,
+ fill_value=-1, usemask=True, asrecarray=False):
+ """
+ Add new fields to an existing array.
+
+ The names of the fields are given with the `names` arguments,
+ the corresponding values with the `data` arguments.
+ If a single field is appended, `names`, `data` and `dtypes` do not have
+ to be lists but just values.
+
+ Parameters
+ ----------
+ base : array
+ Input array to extend.
+ names : string, sequence
+ String or sequence of strings corresponding to the names
+ of the new fields.
+ data : array or sequence of arrays
+ Array or sequence of arrays storing the fields to add to the base.
+ dtypes : sequence of datatypes
+ Datatype or sequence of datatypes.
+ If None, the datatypes are estimated from the `data`.
+ fill_value : {float}, optional
+ Filling value used to pad missing data on the shorter arrays.
+ usemask : {False, True}, optional
+ Whether to return a masked array or not.
+ asrecarray : {False, True}, optional
+ Whether to return a recarray (MaskedRecords) or not.
+
+ """
+ # Check the names
+ if isinstance(names, (tuple, list)):
+ if len(names) != len(data):
+ err_msg = "The number of arrays does not match the number of names"
+ raise ValueError(err_msg)
+ elif isinstance(names, basestring):
+ names = [names,]
+ data = [data,]
+ #
+ if dtypes is None:
+ data = [np.array(a, copy=False, subok=True) for a in data]
+ data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)]
+ elif not hasattr(dtypes, '__iter__'):
+ dtypes = [dtypes,]
+ if len(data) != len(dtypes):
+ if len(dtypes) == 1:
+ dtypes = dtypes * len(data)
+ else:
+ msg = "The dtypes argument must be None, "\
+ "a single dtype or a list."
+ raise ValueError(msg)
+ data = [np.array(a, copy=False, subok=True, dtype=d).view([(n, d)])
+ for (a, n, d) in zip(data, names, dtypes)]
+ #
+ base = merge_arrays(base, usemask=usemask, fill_value=fill_value)
+ if len(data) > 1:
+ data = merge_arrays(data, flatten=True, usemask=usemask,
+ fill_value=fill_value)
+ else:
+ data = data.pop()
+ #
+ output = ma.masked_all(max(len(base), len(data)),
+ dtype=base.dtype.descr + data.dtype.descr)
+ output = recursive_fill_fields(base, output)
+ output = recursive_fill_fields(data, output)
+ #
+ return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
+
+
+
+def rec_append_fields(base, names, data, dtypes=None):
+ """
+ Add new fields to an existing array.
+
+ The names of the fields are given with the `names` arguments,
+ the corresponding values with the `data` arguments.
+ If a single field is appended, `names`, `data` and `dtypes` do not have
+ to be lists but just values.
+
+ Parameters
+ ----------
+ base : array
+ Input array to extend.
+ names : string, sequence
+ String or sequence of strings corresponding to the names
+ of the new fields.
+ data : array or sequence of arrays
+ Array or sequence of arrays storing the fields to add to the base.
+ dtypes : sequence of datatypes, optional
+ Datatype or sequence of datatypes.
+ If None, the datatypes are estimated from the `data`.
+
+ See Also
+ --------
+ append_fields
+
+ Returns
+ -------
+ appended_array : np.recarray
+ """
+ return append_fields(base, names, data=data, dtypes=dtypes,
+ asrecarray=True, usemask=False)
+
+
+
+def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
+ autoconvert=False):
+ """
+ Superposes arrays fields by fields
+
+ Parameters
+ ----------
+ seqarrays : array or sequence
+ Sequence of input arrays.
+ defaults : dictionary, optional
+ Dictionary mapping field names to the corresponding default values.
+ usemask : {True, False}, optional
+ Whether to return a MaskedArray (or MaskedRecords is `asrecarray==True`)
+ or a ndarray.
+ asrecarray : {False, True}, optional
+ Whether to return a recarray (or MaskedRecords if `usemask==True`) or
+ just a flexible-type ndarray.
+ autoconvert : {False, True}, optional
+ Whether automatically cast the type of the field to the maximum.
+
+ Examples
+ --------
+ >>> x = np.array([1, 2,])
+ >>> stack_arrays(x) is x
+ True
+ >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)])
+ >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)],
+ dtype=[('A', '|S3'), ('B', float), ('C', float)])
+ >>> test = stack_arrays((z,zz))
+ >>> masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0)
+ ... ('b', 20.0, 200.0) ('c', 30.0, 300.0)],
+ ... mask = [(False, False, True) (False, False, True) (False, False, False)
+ ... (False, False, False) (False, False, False)],
+ ... fill_value=('N/A', 1e+20, 1e+20)
+ ... dtype=[('A', '|S3'), ('B', '<f8'), ('C', '<f8')])
+
+ """
+ if isinstance(arrays, ndarray):
+ return arrays
+ elif len(arrays) == 1:
+ return arrays[0]
+ seqarrays = [np.asanyarray(a).ravel() for a in arrays]
+ nrecords = [len(a) for a in seqarrays]
+ ndtype = [a.dtype for a in seqarrays]
+ fldnames = [d.names for d in ndtype]
+ #
+ dtype_l = ndtype[0]
+ newdescr = dtype_l.descr
+ names = [_[0] for _ in newdescr]
+ for dtype_n in ndtype[1:]:
+ for descr in dtype_n.descr:
+ name = descr[0] or ''
+ if name not in names:
+ newdescr.append(descr)
+ names.append(name)
+ else:
+ nameidx = names.index(name)
+ current_descr = newdescr[nameidx]
+ if autoconvert:
+ if np.dtype(descr[1]) > np.dtype(current_descr[-1]):
+ current_descr = list(current_descr)
+ current_descr[-1] = descr[1]
+ newdescr[nameidx] = tuple(current_descr)
+ elif descr[1] != current_descr[-1]:
+ raise TypeError("Incompatible type '%s' <> '%s'" %\
+ (dict(newdescr)[name], descr[1]))
+ # Only one field: use concatenate
+ if len(newdescr) == 1:
+ output = ma.concatenate(seqarrays)
+ else:
+ #
+ output = ma.masked_all((np.sum(nrecords),), newdescr)
+ offset = np.cumsum(np.r_[0, nrecords])
+ seen = []
+ for (a, n, i, j) in zip(seqarrays, fldnames, offset[:-1], offset[1:]):
+ names = a.dtype.names
+ if names is None:
+ output['f%i' % len(seen)][i:j] = a
+ else:
+ for name in n:
+ output[name][i:j] = a[name]
+ if name not in seen:
+ seen.append(name)
+ #
+ return _fix_output(_fix_defaults(output, defaults),
+ usemask=usemask, asrecarray=asrecarray)
+
+
+
+def find_duplicates(a, key=None, ignoremask=True, return_index=False):
+ """
+ Find the duplicates in a structured array along a given key
+
+ Parameters
+ ----------
+ a : array-like
+ Input array
+ key : {string, None}, optional
+ Name of the fields along which to check the duplicates.
+ If None, the search is performed by records
+ ignoremask : {True, False}, optional
+ Whether masked data should be discarded or considered as duplicates.
+ return_index : {False, True}, optional
+ Whether to return the indices of the duplicated values.
+
+ Examples
+ --------
+ >>> ndtype = [('a', int)]
+ >>> a = ma.array([1, 1, 1, 2, 2, 3, 3],
+ ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
+ >>> find_duplicates(a, ignoremask=True, return_index=True)
+ """
+ a = np.asanyarray(a).ravel()
+ # Get a dictionary of fields
+ fields = get_fieldstructure(a.dtype)
+ # Get the sorting data (by selecting the corresponding field)
+ base = a
+ if key:
+ for f in fields[key]:
+ base = base[f]
+ base = base[key]
+ # Get the sorting indices and the sorted data
+ sortidx = base.argsort()
+ sortedbase = base[sortidx]
+ sorteddata = sortedbase.filled()
+ # Compare the sorting data
+ flag = (sorteddata[:-1] == sorteddata[1:])
+ # If masked data must be ignored, set the flag to false where needed
+ if ignoremask:
+ sortedmask = sortedbase.recordmask
+ flag[sortedmask[1:]] = False
+ flag = np.concatenate(([False], flag))
+ # We need to take the point on the left as well (else we're missing it)
+ flag[:-1] = flag[:-1] + flag[1:]
+ duplicates = a[sortidx][flag]
+ if return_index:
+ return (duplicates, sortidx[flag])
+ else:
+ return duplicates
+
+
+
+def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
+ defaults=None, usemask=True, asrecarray=False):
+ """
+ Join arrays `r1` and `r2` on key `key`.
+
+ The key should be either a string or a sequence of string corresponding
+ to the fields used to join the array.
+ An exception is raised if the `key` field cannot be found in the two input
+ arrays.
+ Neither `r1` nor `r2` should have any duplicates along `key`: the presence
+ of duplicates will make the output quite unreliable. Note that duplicates
+ are not looked for by the algorithm.
+
+ Parameters
+ ----------
+ key : {string, sequence}
+ A string or a sequence of strings corresponding to the fields used
+ for comparison.
+ r1, r2 : arrays
+ Structured arrays.
+ jointype : {'inner', 'outer', 'leftouter'}, optional
+ If 'inner', returns the elements common to both r1 and r2.
+ If 'outer', returns the common elements as well as the elements of r1
+ not in r2 and the elements of not in r2.
+ If 'leftouter', returns the common elements and the elements of r1 not
+ in r2.
+ r1postfix : string, optional
+ String appended to the names of the fields of r1 that are present in r2
+ but absent of the key.
+ r2postfix : string, optional
+ String appended to the names of the fields of r2 that are present in r1
+ but absent of the key.
+ defaults : {dictionary}, optional
+ Dictionary mapping field names to the corresponding default values.
+ usemask : {True, False}, optional
+ Whether to return a MaskedArray (or MaskedRecords is `asrecarray==True`)
+ or a ndarray.
+ asrecarray : {False, True}, optional
+ Whether to return a recarray (or MaskedRecords if `usemask==True`) or
+ just a flexible-type ndarray.
+
+ Notes
+ -----
+ * The output is sorted along the key.
+ * A temporary array is formed by dropping the fields not in the key for the
+ two arrays and concatenating the result. This array is then sorted, and
+ the common entries selected. The output is constructed by filling the fields
+ with the selected entries. Matching is not preserved if there are some
+ duplicates...
+
+ """
+ # Check jointype
+ if jointype not in ('inner', 'outer', 'leftouter'):
+ raise ValueError("The 'jointype' argument should be in 'inner', "\
+ "'outer' or 'leftouter' (got '%s' instead)" % jointype)
+ # If we have a single key, put it in a tuple
+ if isinstance(key, basestring):
+ key = (key, )
+
+ # Check the keys
+ for name in key:
+ if name not in r1.dtype.names:
+ raise ValueError('r1 does not have key field %s'%name)
+ if name not in r2.dtype.names:
+ raise ValueError('r2 does not have key field %s'%name)
+
+ # Make sure we work with ravelled arrays
+ r1 = r1.ravel()
+ r2 = r2.ravel()
+ (nb1, nb2) = (len(r1), len(r2))
+ (r1names, r2names) = (r1.dtype.names, r2.dtype.names)
+
+ # Make temporary arrays of just the keys
+ r1k = drop_fields(r1, [n for n in r1names if n not in key])
+ r2k = drop_fields(r2, [n for n in r2names if n not in key])
+
+ # Concatenate the two arrays for comparison
+ aux = ma.concatenate((r1k, r2k))
+ idx_sort = aux.argsort(order=key)
+ aux = aux[idx_sort]
+ #
+ # Get the common keys
+ flag_in = ma.concatenate(([False], aux[1:] == aux[:-1]))
+ flag_in[:-1] = flag_in[1:] + flag_in[:-1]
+ idx_in = idx_sort[flag_in]
+ idx_1 = idx_in[(idx_in < nb1)]
+ idx_2 = idx_in[(idx_in >= nb1)] - nb1
+ (r1cmn, r2cmn) = (len(idx_1), len(idx_2))
+ if jointype == 'inner':
+ (r1spc, r2spc) = (0, 0)
+ elif jointype == 'outer':
+ idx_out = idx_sort[~flag_in]
+ idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)]))
+ idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1))
+ (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn)
+ elif jointype == 'leftouter':
+ idx_out = idx_sort[~flag_in]
+ idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)]))
+ (r1spc, r2spc) = (len(idx_1) - r1cmn, 0)
+ # Select the entries from each input
+ (s1, s2) = (r1[idx_1], r2[idx_2])
+ #
+ # Build the new description of the output array .......
+ # Start with the key fields
+ ndtype = [list(_) for _ in r1k.dtype.descr]
+ # Add the other fields
+ ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key)
+ # Find the new list of names (it may be different from r1names)
+ names = list(_[0] for _ in ndtype)
+ for desc in r2.dtype.descr:
+ desc = list(desc)
+ name = desc[0]
+ # Have we seen the current name already ?
+ if name in names:
+ nameidx = names.index(name)
+ current = ndtype[nameidx]
+ # The current field is part of the key: take the largest dtype
+ if name in key:
+ current[-1] = max(desc[1], current[-1])
+ # The current field is not part of the key: add the suffixes
+ else:
+ current[0] += r1postfix
+ desc[0] += r2postfix
+ ndtype.insert(nameidx+1, desc)
+ #... we haven't: just add the description to the current list
+ else:
+ names.extend(desc[0])
+ ndtype.append(desc)
+ # Revert the elements to tuples
+ ndtype = [tuple(_) for _ in ndtype]
+ # Find the largest nb of common fields : r1cmn and r2cmn should be equal, but...
+ cmn = max(r1cmn, r2cmn)
+ # Construct an empty array
+ output = ma.masked_all((cmn + r1spc + r2spc,), dtype=ndtype)
+ names = output.dtype.names
+ for f in r1names:
+ selected = s1[f]
+ if f not in names:
+ f += r1postfix
+ current = output[f]
+ current[:r1cmn] = selected[:r1cmn]
+ if jointype in ('outer', 'leftouter'):
+ current[cmn:cmn+r1spc] = selected[r1cmn:]
+ for f in r2names:
+ selected = s2[f]
+ if f not in names:
+ f += r2postfix
+ current = output[f]
+ current[:r2cmn] = selected[:r2cmn]
+ if (jointype == 'outer') and r2spc:
+ current[-r2spc:] = selected[r2cmn:]
+ # Sort and finalize the output
+ output.sort(order=key)
+ kwargs = dict(usemask=usemask, asrecarray=asrecarray)
+ return _fix_output(_fix_defaults(output, defaults), **kwargs)
+
+
+def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
+ defaults=None):
+ """
+ Join arrays `r1` and `r2` on keys.
+ Alternative to join_by, that always returns a np.recarray.
+
+ See Also
+ --------
+ join_by : equivalent function
+ """
+ kwargs = dict(jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix,
+ defaults=defaults, usemask=False, asrecarray=True)
+ return join_by(key, r1, r2, **kwargs)