summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorSebastian Berg <sebastian@sipsolutions.net>2022-05-05 13:11:40 +0200
committerSebastian Berg <sebastian@sipsolutions.net>2022-05-09 17:13:13 +0200
commit59b7afee1fa088760233060297172df6da3bde63 (patch)
tree96a2e990b7c26a951d7098414eb0d9b34fe9998d /numpy
parentdcb8b07a9dba77cba14390859b9c8eec37f823d2 (diff)
downloadnumpy-59b7afee1fa088760233060297172df6da3bde63.tar.gz
MAINT,ENH: Adjust metadata preservation, preserving it for identical structured dtypes
Identical means here that the promotion is between the same dtype *and* promoting it does not modify any of the included dtypes *and* does not modify the offset layout or itemsize.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/_internal.py17
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c7
-rw-r--r--numpy/core/src/multiarray/dtypemeta.c10
-rw-r--r--numpy/core/tests/test_numeric.py12
4 files changed, 37 insertions, 9 deletions
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 151791d86..9a1787dde 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -457,11 +457,15 @@ def _promote_fields(dt1, dt2):
if (dt1.names is None or dt2.names is None) or dt1.names != dt2.names:
raise TypeError("invalid type promotion")
+ # if both are identical, we can (maybe!) just return the same dtype.
+ identical = dt1 is dt2
new_fields = []
for name in dt1.names:
field1 = dt1.fields[name]
field2 = dt2.fields[name]
new_descr = promote_types(field1[0], field2[0])
+ identical = identical and new_descr is field1[0]
+
# Check that the titles match (if given):
if field1[2:] != field2[2:]:
raise TypeError("invalid type promotion")
@@ -470,7 +474,18 @@ def _promote_fields(dt1, dt2):
else:
new_fields.append(((field1[2], name), new_descr))
- return dtype(new_fields, align=dt1.isalignedstruct or dt2.isalignedstruct)
+ res = dtype(new_fields, align=dt1.isalignedstruct or dt2.isalignedstruct)
+
+ # Might as well preserve identity (and metadata) if the dtype is identical
+ # and the itemsize, offsets are also unmodified. This could probably be
+ # sped up, but also probably just be removed entirely.
+ if identical and res.itemsize == dt1.itemsize:
+ for name in dt1.names:
+ if dt1.fields[name][1] != res.fields[name][1]:
+ return res # the dtype changed.
+ return dt1
+
+ return res
def _getfield_is_safe(oldtype, newtype, offset):
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index c19fe8e23..79f918d2a 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -1075,8 +1075,11 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
/* Fast path for identical inputs (NOTE: This path preserves metadata!) */
if (type1 == type2
- /* Use for builtin except void, void has no reliable byteorder */
- && type1->type_num >= 0 && type1->type_num < NPY_NTYPES
+ /*
+ * Short-cut for legacy/builtin dtypes except void, since void has
+ * no reliable byteorder. Note: This path preserves metadata!
+ */
+ && NPY_DT_is_legacy(NPY_DTYPE(type1))
&& PyArray_ISNBO(type1->byteorder) && type1->type_num != NPY_VOID) {
Py_INCREF(type1);
return type1;
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 0d4b21979..fb9903049 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -452,6 +452,16 @@ void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
if (new_base == NULL) {
return NULL;
}
+ /*
+ * If it is the same dtype and the contained did not change, we might
+ * as well preserve identity and metadata. This could probably be
+ * changed.
+ */
+ if (descr1 == descr2 && new_base == descr1->subarray->base) {
+ Py_DECREF(new_base);
+ Py_INCREF(descr1);
+ return descr1;
+ }
PyArray_Descr *new_descr = PyArray_DescrNew(descr1);
if (new_descr == NULL) {
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index e56e63109..5bd9456b9 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -953,7 +953,7 @@ class TestTypes:
@pytest.mark.parametrize("dtype",
list(np.typecodes["All"]) +
- ["i,i", "S3", "S100", "U3", "U100", rational])
+ ["i,i", "10i", "S3", "S100", "U3", "U100", rational])
def test_promote_identical_types_metadata(self, dtype):
# The same type passed in twice to promote types always
# preserves metadata
@@ -970,14 +970,14 @@ class TestTypes:
return
res = np.promote_types(dtype, dtype)
- if res.char in "?bhilqpBHILQPefdgFDGOmM" or dtype.type is rational:
- # Metadata is lost for simple promotions (they create a new dtype)
+
+ # Metadata is (currently) generally lost on byte-swapping (except for
+ # unicode.
+ if dtype.char != "U":
assert res.metadata is None
else:
assert res.metadata == metadata
- if dtype.kind != "V":
- # the result is native (except for structured void)
- assert res.isnative
+ assert res.isnative
@pytest.mark.slow
@pytest.mark.filterwarnings('ignore:Promotion of numbers:FutureWarning')