diff options
| author | Sebastian Berg <sebastian@sipsolutions.net> | 2022-05-05 13:11:40 +0200 |
|---|---|---|
| committer | Sebastian Berg <sebastian@sipsolutions.net> | 2022-05-09 17:13:13 +0200 |
| commit | 59b7afee1fa088760233060297172df6da3bde63 (patch) | |
| tree | 96a2e990b7c26a951d7098414eb0d9b34fe9998d /numpy | |
| parent | dcb8b07a9dba77cba14390859b9c8eec37f823d2 (diff) | |
| download | numpy-59b7afee1fa088760233060297172df6da3bde63.tar.gz | |
MAINT,ENH: Adjust metadata preservation, preserving it for identical structured dtypes
Identical means here that the promotion is between the same dtype *and* promoting it
does not modify any of the included dtypes *and* does not modify the offset layout
or itemsize.
Diffstat (limited to 'numpy')
| -rw-r--r-- | numpy/core/_internal.py | 17 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 7 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/dtypemeta.c | 10 | ||||
| -rw-r--r-- | numpy/core/tests/test_numeric.py | 12 |
4 files changed, 37 insertions, 9 deletions
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index 151791d86..9a1787dde 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -457,11 +457,15 @@ def _promote_fields(dt1, dt2): if (dt1.names is None or dt2.names is None) or dt1.names != dt2.names: raise TypeError("invalid type promotion") + # if both are identical, we can (maybe!) just return the same dtype. + identical = dt1 is dt2 new_fields = [] for name in dt1.names: field1 = dt1.fields[name] field2 = dt2.fields[name] new_descr = promote_types(field1[0], field2[0]) + identical = identical and new_descr is field1[0] + # Check that the titles match (if given): if field1[2:] != field2[2:]: raise TypeError("invalid type promotion") @@ -470,7 +474,18 @@ def _promote_fields(dt1, dt2): else: new_fields.append(((field1[2], name), new_descr)) - return dtype(new_fields, align=dt1.isalignedstruct or dt2.isalignedstruct) + res = dtype(new_fields, align=dt1.isalignedstruct or dt2.isalignedstruct) + + # Might as well preserve identity (and metadata) if the dtype is identical + # and the itemsize, offsets are also unmodified. This could probably be + # sped up, but also probably just be removed entirely. + if identical and res.itemsize == dt1.itemsize: + for name in dt1.names: + if dt1.fields[name][1] != res.fields[name][1]: + return res # the dtype changed. + return dt1 + + return res def _getfield_is_safe(oldtype, newtype, offset): diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index c19fe8e23..79f918d2a 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -1075,8 +1075,11 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) /* Fast path for identical inputs (NOTE: This path preserves metadata!) */ if (type1 == type2 - /* Use for builtin except void, void has no reliable byteorder */ - && type1->type_num >= 0 && type1->type_num < NPY_NTYPES + /* + * Short-cut for legacy/builtin dtypes except void, since void has + * no reliable byteorder. Note: This path preserves metadata! + */ + && NPY_DT_is_legacy(NPY_DTYPE(type1)) && PyArray_ISNBO(type1->byteorder) && type1->type_num != NPY_VOID) { Py_INCREF(type1); return type1; diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 0d4b21979..fb9903049 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -452,6 +452,16 @@ void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2) if (new_base == NULL) { return NULL; } + /* + * If it is the same dtype and the contained did not change, we might + * as well preserve identity and metadata. This could probably be + * changed. + */ + if (descr1 == descr2 && new_base == descr1->subarray->base) { + Py_DECREF(new_base); + Py_INCREF(descr1); + return descr1; + } PyArray_Descr *new_descr = PyArray_DescrNew(descr1); if (new_descr == NULL) { diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index e56e63109..5bd9456b9 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -953,7 +953,7 @@ class TestTypes: @pytest.mark.parametrize("dtype", list(np.typecodes["All"]) + - ["i,i", "S3", "S100", "U3", "U100", rational]) + ["i,i", "10i", "S3", "S100", "U3", "U100", rational]) def test_promote_identical_types_metadata(self, dtype): # The same type passed in twice to promote types always # preserves metadata @@ -970,14 +970,14 @@ class TestTypes: return res = np.promote_types(dtype, dtype) - if res.char in "?bhilqpBHILQPefdgFDGOmM" or dtype.type is rational: - # Metadata is lost for simple promotions (they create a new dtype) + + # Metadata is (currently) generally lost on byte-swapping (except for + # unicode. + if dtype.char != "U": assert res.metadata is None else: assert res.metadata == metadata - if dtype.kind != "V": - # the result is native (except for structured void) - assert res.isnative + assert res.isnative @pytest.mark.slow @pytest.mark.filterwarnings('ignore:Promotion of numbers:FutureWarning') |
