diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/_string_helpers.py | 100 | ||||
-rw-r--r-- | numpy/core/numerictypes.py | 123 | ||||
-rw-r--r-- | numpy/core/src/multiarray/number.c | 2 | ||||
-rw-r--r-- | numpy/core/tests/test_dtype.py | 78 | ||||
-rw-r--r-- | numpy/ma/mrecords.py | 19 |
5 files changed, 152 insertions, 170 deletions
diff --git a/numpy/core/_string_helpers.py b/numpy/core/_string_helpers.py new file mode 100644 index 000000000..45e6a739e --- /dev/null +++ b/numpy/core/_string_helpers.py @@ -0,0 +1,100 @@ +""" +String-handling utilities to avoid locale-dependence. + +Used primarily to generate type name aliases. +""" +# "import string" is costly to import! +# Construct the translation tables directly +# "A" = chr(65), "a" = chr(97) +_all_chars = [chr(_m) for _m in range(256)] +_ascii_upper = _all_chars[65:65+26] +_ascii_lower = _all_chars[97:97+26] +LOWER_TABLE = "".join(_all_chars[:65] + _ascii_lower + _all_chars[65+26:]) +UPPER_TABLE = "".join(_all_chars[:97] + _ascii_upper + _all_chars[97+26:]) + + +def english_lower(s): + """ Apply English case rules to convert ASCII strings to all lower case. + + This is an internal utility function to replace calls to str.lower() such + that we can avoid changing behavior with changing locales. In particular, + Turkish has distinct dotted and dotless variants of the Latin letter "I" in + both lowercase and uppercase. Thus, "I".lower() != "i" in a "tr" locale. + + Parameters + ---------- + s : str + + Returns + ------- + lowered : str + + Examples + -------- + >>> from numpy.core.numerictypes import english_lower + >>> english_lower('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_') + 'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789_' + >>> english_lower('') + '' + """ + lowered = s.translate(LOWER_TABLE) + return lowered + + +def english_upper(s): + """ Apply English case rules to convert ASCII strings to all upper case. + + This is an internal utility function to replace calls to str.upper() such + that we can avoid changing behavior with changing locales. In particular, + Turkish has distinct dotted and dotless variants of the Latin letter "I" in + both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale. + + Parameters + ---------- + s : str + + Returns + ------- + uppered : str + + Examples + -------- + >>> from numpy.core.numerictypes import english_upper + >>> english_upper('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_') + 'ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' + >>> english_upper('') + '' + """ + uppered = s.translate(UPPER_TABLE) + return uppered + + +def english_capitalize(s): + """ Apply English case rules to convert the first character of an ASCII + string to upper case. + + This is an internal utility function to replace calls to str.capitalize() + such that we can avoid changing behavior with changing locales. + + Parameters + ---------- + s : str + + Returns + ------- + capitalized : str + + Examples + -------- + >>> from numpy.core.numerictypes import english_capitalize + >>> english_capitalize('int8') + 'Int8' + >>> english_capitalize('Int8') + 'Int8' + >>> english_capitalize('') + '' + """ + if s: + return english_upper(s[0]) + s[1:] + else: + return s diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 259aef2c6..f0cd8ec74 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -102,6 +102,11 @@ __all__ = ['sctypeDict', 'sctypeNA', 'typeDict', 'typeNA', 'sctypes', 'busday_offset', 'busday_count', 'is_busday', 'busdaycalendar', ] +# we don't need all these imports, but we need to keep them for compatibility +# for users using np.core.numerictypes.UPPER_TABLE +from ._string_helpers import ( + english_lower, english_upper, english_capitalize, LOWER_TABLE, UPPER_TABLE +) # we don't export these for import *, but we do want them accessible # as numerictypes.bool, etc. @@ -112,103 +117,6 @@ else: from __builtin__ import bool, int, float, complex, object, unicode, str -# String-handling utilities to avoid locale-dependence. - -# "import string" is costly to import! -# Construct the translation tables directly -# "A" = chr(65), "a" = chr(97) -_all_chars = [chr(_m) for _m in range(256)] -_ascii_upper = _all_chars[65:65+26] -_ascii_lower = _all_chars[97:97+26] -LOWER_TABLE = "".join(_all_chars[:65] + _ascii_lower + _all_chars[65+26:]) -UPPER_TABLE = "".join(_all_chars[:97] + _ascii_upper + _all_chars[97+26:]) - - -def english_lower(s): - """ Apply English case rules to convert ASCII strings to all lower case. - - This is an internal utility function to replace calls to str.lower() such - that we can avoid changing behavior with changing locales. In particular, - Turkish has distinct dotted and dotless variants of the Latin letter "I" in - both lowercase and uppercase. Thus, "I".lower() != "i" in a "tr" locale. - - Parameters - ---------- - s : str - - Returns - ------- - lowered : str - - Examples - -------- - >>> from numpy.core.numerictypes import english_lower - >>> english_lower('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_') - 'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789_' - >>> english_lower('') - '' - """ - lowered = s.translate(LOWER_TABLE) - return lowered - -def english_upper(s): - """ Apply English case rules to convert ASCII strings to all upper case. - - This is an internal utility function to replace calls to str.upper() such - that we can avoid changing behavior with changing locales. In particular, - Turkish has distinct dotted and dotless variants of the Latin letter "I" in - both lowercase and uppercase. Thus, "i".upper() != "I" in a "tr" locale. - - Parameters - ---------- - s : str - - Returns - ------- - uppered : str - - Examples - -------- - >>> from numpy.core.numerictypes import english_upper - >>> english_upper('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_') - 'ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' - >>> english_upper('') - '' - """ - uppered = s.translate(UPPER_TABLE) - return uppered - -def english_capitalize(s): - """ Apply English case rules to convert the first character of an ASCII - string to upper case. - - This is an internal utility function to replace calls to str.capitalize() - such that we can avoid changing behavior with changing locales. - - Parameters - ---------- - s : str - - Returns - ------- - capitalized : str - - Examples - -------- - >>> from numpy.core.numerictypes import english_capitalize - >>> english_capitalize('int8') - 'Int8' - >>> english_capitalize('Int8') - 'Int8' - >>> english_capitalize('') - '' - """ - if s: - return english_upper(s[0]) + s[1:] - else: - return s - - sctypeDict = {} # Contains all leaf-node scalar types with aliases class TypeNADict(dict): def __getitem__(self, key): @@ -838,9 +746,10 @@ def sctype2char(sctype): # Create dictionary of casting functions that wrap sequences # indexed by type or type character - - cast = _typedict() +for key in _sctype2char_dict.keys(): + cast[key] = lambda x, k=key: array(x, copy=False).astype(k) + try: ScalarType = [_types.IntType, _types.FloatType, _types.ComplexType, _types.LongType, _types.BooleanType, @@ -851,24 +760,8 @@ except AttributeError: ScalarType.extend(_sctype2char_dict.keys()) ScalarType = tuple(ScalarType) -for key in _sctype2char_dict.keys(): - cast[key] = lambda x, k=key: array(x, copy=False).astype(k) - -# Create the typestring lookup dictionary -_typestr = _typedict() -for key in _sctype2char_dict.keys(): - if issubclass(key, allTypes['flexible']): - _typestr[key] = _sctype2char_dict[key] - else: - _typestr[key] = empty((1,), key).dtype.str[1:] - -# Make sure all typestrings are in sctypeDict -for key, val in _typestr.items(): - if val not in sctypeDict: - sctypeDict[val] = key # Add additional strings to the sctypeDict - if sys.version_info[0] >= 3: _toadd = ['int', 'float', 'complex', 'bool', 'object', 'str', 'bytes', 'object', ('a', allTypes['bytes_'])] diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c index f71d39405..dabbae064 100644 --- a/numpy/core/src/multiarray/number.c +++ b/numpy/core/src/multiarray/number.c @@ -119,7 +119,7 @@ PyArray_SetNumericOps(PyObject *dict) return 0; } -/* FIXME - macro contains goto */ +/* Note - macro contains goto */ #define GET(op) if (n_ops.op && \ (PyDict_SetItemString(dict, #op, n_ops.op)==-1)) \ goto fail; diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index deaf587d6..fc4dc952a 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -21,26 +21,26 @@ def assert_dtype_not_equal(a, b): "two different types hash to the same value !") class TestBuiltin(object): - def test_run(self): + @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object, + np.unicode]) + def test_run(self, t): """Only test hash runs at all.""" - for t in [int, float, complex, np.int32, str, object, - np.unicode]: - dt = np.dtype(t) - hash(dt) + dt = np.dtype(t) + hash(dt) - def test_dtype(self): + @pytest.mark.parametrize('t', [int, float]) + def test_dtype(self, t): # Make sure equivalent byte order char hash the same (e.g. < and = on # little endian) - for t in [int, float]: - dt = np.dtype(t) - dt2 = dt.newbyteorder("<") - dt3 = dt.newbyteorder(">") - if dt == dt2: - assert_(dt.byteorder != dt2.byteorder, "bogus test") - assert_dtype_equal(dt, dt2) - else: - assert_(dt.byteorder != dt3.byteorder, "bogus test") - assert_dtype_equal(dt, dt3) + dt = np.dtype(t) + dt2 = dt.newbyteorder("<") + dt3 = dt.newbyteorder(">") + if dt == dt2: + assert_(dt.byteorder != dt2.byteorder, "bogus test") + assert_dtype_equal(dt, dt2) + else: + assert_(dt.byteorder != dt3.byteorder, "bogus test") + assert_dtype_equal(dt, dt3) def test_equivalent_dtype_hashing(self): # Make sure equivalent dtypes with different type num hash equal @@ -649,12 +649,12 @@ class TestDtypeAttributes(object): new_dtype = np.dtype(dtype.descr) assert_equal(new_dtype.itemsize, 16) - def test_name_builtin(self): - for t in np.typeDict.values(): - name = t.__name__ - if name.endswith('_'): - name = name[:-1] - assert_equal(np.dtype(t).name, name) + @pytest.mark.parametrize('t', np.typeDict.values()) + def test_name_builtin(self, t): + name = t.__name__ + if name.endswith('_'): + name = name[:-1] + assert_equal(np.dtype(t).name, name) def test_name_dtype_subclass(self): # Ticket #4357 @@ -678,38 +678,46 @@ class TestPickling(object): assert_equal(x, y) assert_equal(x[0], y[0]) - def test_builtin(self): - for t in [int, float, complex, np.int32, str, object, - np.unicode, bool]: - self.check_pickling(np.dtype(t)) + @pytest.mark.parametrize('t', [int, float, complex, np.int32, str, object, + np.unicode, bool]) + def test_builtin(self, t): + self.check_pickling(np.dtype(t)) def test_structured(self): dt = np.dtype(([('a', '>f4', (2, 1)), ('b', '<f8', (1, 3))], (2, 2))) self.check_pickling(dt) + + def test_structured_aligned(self): dt = np.dtype('i4, i1', align=True) self.check_pickling(dt) + + def test_structured_unaligned(self): dt = np.dtype('i4, i1', align=False) self.check_pickling(dt) + + def test_structured_padded(self): dt = np.dtype({ 'names': ['A', 'B'], 'formats': ['f4', 'f4'], 'offsets': [0, 8], 'itemsize': 16}) self.check_pickling(dt) + + def test_structured_titles(self): dt = np.dtype({'names': ['r', 'b'], 'formats': ['u1', 'u1'], 'titles': ['Red pixel', 'Blue pixel']}) self.check_pickling(dt) - def test_datetime(self): - for base in ['m8', 'M8']: - for unit in ['', 'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', - 'us', 'ns', 'ps', 'fs', 'as']: - dt = np.dtype('%s[%s]' % (base, unit) if unit else base) - self.check_pickling(dt) - if unit: - dt = np.dtype('%s[7%s]' % (base, unit)) - self.check_pickling(dt) + @pytest.mark.parametrize('base', ['m8', 'M8']) + @pytest.mark.parametrize('unit', ['', 'Y', 'M', 'W', 'D', 'h', 'm', 's', + 'ms', 'us', 'ns', 'ps', 'fs', 'as']) + def test_datetime(self, base, unit): + dt = np.dtype('%s[%s]' % (base, unit) if unit else base) + self.check_pickling(dt) + if unit: + dt = np.dtype('%s[7%s]' % (base, unit)) + self.check_pickling(dt) def test_metadata(self): dt = np.dtype(int, metadata={'datum': 1}) diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py index 90a5141b3..daf2f8770 100644 --- a/numpy/ma/mrecords.py +++ b/numpy/ma/mrecords.py @@ -29,7 +29,6 @@ from numpy.core.records import ( ) _byteorderconv = np.core.records._byteorderconv -_typestr = ntypes._typestr import numpy.ma as ma from numpy.ma import ( @@ -48,24 +47,6 @@ __all__ = [ reserved_fields = ['_data', '_mask', '_fieldmask', 'dtype'] -def _getformats(data): - """ - Returns the formats of arrays in arraylist as a comma-separated string. - - """ - if hasattr(data, 'dtype'): - return ",".join([desc[1] for desc in data.dtype.descr]) - - formats = '' - for obj in data: - obj = np.asarray(obj) - formats += _typestr[obj.dtype.type] - if issubclass(obj.dtype.type, ntypes.flexible): - formats += repr(obj.itemsize) - formats += ',' - return formats[:-1] - - def _checknames(descr, names=None): """ Checks that field names ``descr`` are not reserved keywords. |