28 files changed, 1342 insertions, 349 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index 715235fd7..49e8ab22d 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -16,6 +16,9 @@ New functions
   common multiple.
 * `np.ma.stack`, the `np.stack` array-joining function generalized to masked
   arrays.
+* ``quantile`` function, an interface to ``percentile`` without factors of 100
+* ``nanquantile`` function, an interface to ``nanpercentile`` without factors
+  of 100
 
 * `np.printoptions`, the context manager which sets print options temporarily
   for the scope of the ``with`` block::
@@ -52,6 +55,17 @@ Deprecations
   In the future, it might return a different result. Use `np.sum(np.from_iter(generator))`
   or the built-in Python `sum` instead.
 
+* Users of the C-API should call ``PyArrayResolveWriteBackIfCopy`` or 
+  ``PyArray_DiscardWritbackIfCopy`` on any array with the ``WRITEBACKIFCOPY``
+  flag set, before the array is deallocated. A deprecation warning will be
+  emitted if those calls are not used when needed.
+
+* Users of ``nditer`` should use the nditer object as a context manager
+  anytime one of the iterator operands is writeable, so that numpy can
+  manage writeback semantics, or should call ``it.close()``. A
+ `RuntimeWarning` will be emitted otherwise in these cases. Users of the C-API
+  should call ``NpyIter_Close`` before ``NpyIter_Dealloc``.
+
 
 Future Changes
 ==============
@@ -60,6 +74,19 @@ Future Changes
 Compatibility notes
 ===================
 
+Under certain conditions, nditer must be used in a context manager
+------------------------------------------------------------------
+When using an nditer with the ``"writeonly"`` or ``"readwrite"`` flags, there
+are some circumstances where nditer doesn't actually give you a view onto the
+writable array. Instead, it gives you a copy, and if you make changes to the
+copy, nditer later writes those changes back into your actual array. Currently,
+this writeback occurs when the array objects are garbage collected, which makes
+this API error-prone on CPython and entirely broken on PyPy. Therefore, 
+``nditer`` should now be used as a context manager whenever using ``nditer``
+with writeable arrays (``with np.nditer(...) as it: ...``). You may also
+explicitly call ``it.close()`` for cases where a context manager is unusable,
+for instance in generator expressions.
+
 Numpy has switched to using pytest instead of nose for testing
 --------------------------------------------------------------
 The last nose release was 1.3.7 in June, 2015, and development of that tool has
@@ -93,6 +120,8 @@ using the old API.
 C API changes
 =============
 
+``NpyIter_Close`` has been added and should be called before
+``NpyIter_Dealloc`` to resolve possible writeback-enabled arrays.
 
 New Features
 ============
@@ -115,6 +144,13 @@ Creating a full iOS-compatible NumPy package requires building for the 5
 architectures supported by iOS (i386, x86_64, armv7, armv7s and arm64), and
 combining these 5 compiled builds products into a single "fat" binary.
 
+``np.quantile`` and ``np.nanquantile``
+--------------------------------------
+Like ``np.percentile`` and ``np.nanpercentile``, but takes quantiles in [0, 1]
+rather than percentiles in [0, 100]. ``np.percentile`` is now a thin wrapper
+around ``np.quantile`` with the extra step of dividing by 100.
+
+
 Build system
 ------------
 Added experimental support for the 64-bit RISC-V architecture.
diff --git a/doc/source/reference/arrays.nditer.rst b/doc/source/reference/arrays.nditer.rst
index 76f5991cf..acad29b11 100644
--- a/doc/source/reference/arrays.nditer.rst
+++ b/doc/source/reference/arrays.nditer.rst
@@ -83,7 +83,14 @@ Modifying Array Values
 
 By default, the :class:`nditer` treats the input array as a read-only
 object. To modify the array elements, you must specify either read-write
-or write-only mode. This is controlled with per-operand flags.
+or write-only mode. This is controlled with per-operand flags. The
+operands may be created as views into the original data with the 
+`WRITEBACKIFCOPY` flag. In this case the iterator must either
+
+- be used as a context manager, and the temporary data will be written back
+  to the original array when the `__exit__` function is called.
+- have a call to the iterator's `close` function to ensure the modified data
+  is written back to the original array.
 
 Regular assignment in Python simply changes a reference in the local or
 global variable dictionary instead of modifying an existing variable in
@@ -99,8 +106,9 @@ the ellipsis.
     >>> a
     array([[0, 1, 2],
            [3, 4, 5]])
-    >>> for x in np.nditer(a, op_flags=['readwrite']):
-    ...     x[...] = 2 * x
+    >>> with np.nditer(a, op_flags=['readwrite']) as it:
+    ...    for x in it:
+    ...        x[...] = 2 * x
     ...
     >>> a
     array([[ 0,  2,  4],
@@ -178,9 +186,10 @@ construct in order to be more readable.
     0 <(0, 0)> 1 <(0, 1)> 2 <(0, 2)> 3 <(1, 0)> 4 <(1, 1)> 5 <(1, 2)>
 
     >>> it = np.nditer(a, flags=['multi_index'], op_flags=['writeonly'])
-    >>> while not it.finished:
-    ...     it[0] = it.multi_index[1] - it.multi_index[0]
-    ...     it.iternext()
+    >>> with it: 
+    ....    while not it.finished:
+    ...         it[0] = it.multi_index[1] - it.multi_index[0]
+    ...         it.iternext()
     ...
     >>> a
     array([[ 0,  1,  2],
@@ -385,10 +394,10 @@ parameter support.
 .. admonition:: Example
 
     >>> def square(a):
-    ...     it = np.nditer([a, None])
-    ...     for x, y in it:
-    ...          y[...] = x*x
-    ...     return it.operands[1]
+    ...     with np.nditer([a, None]) as it:
+    ...         for x, y in it:
+    ...             y[...] = x*x
+    ...         return it.operands[1]
     ...
     >>> square([1,2,3])
     array([1, 4, 9])
@@ -426,9 +435,10 @@ reasons.
     ...             flags = ['external_loop', 'buffered'],
     ...             op_flags = [['readonly'],
     ...                         ['writeonly', 'allocate', 'no_broadcast']])
-    ...     for x, y in it:
-    ...         y[...] = x*x
-    ...     return it.operands[1]
+    ...     with it:
+    ...         for x, y in it:
+    ...             y[...] = x*x
+    ...         return it.operands[1]
     ...
 
     >>> square([1,2,3])
@@ -480,10 +490,12 @@ Everything to do with the outer product is handled by the iterator setup.
     >>> b = np.arange(8).reshape(2,4)
     >>> it = np.nditer([a, b, None], flags=['external_loop'],
     ...             op_axes=[[0, -1, -1], [-1, 0, 1], None])
-    >>> for x, y, z in it:
-    ...     z[...] = x*y
+    >>> with it:
+    ...     for x, y, z in it:
+    ...         z[...] = x*y
+    ...     result = it.operands[2]  # same as z
     ...
-    >>> it.operands[2]
+    >>> result
     array([[[ 0,  0,  0,  0],
             [ 0,  0,  0,  0]],
            [[ 0,  1,  2,  3],
@@ -491,6 +503,9 @@ Everything to do with the outer product is handled by the iterator setup.
            [[ 0,  2,  4,  6],
             [ 8, 10, 12, 14]]])
 
+Note that once the iterator is closed we can not access :func:`operands <nditer.operands>`
+and must use a reference created inside the context manager.
+
 Reduction Iteration
 -------------------
 
@@ -505,9 +520,10 @@ For a simple example, consider taking the sum of all elements in an array.
 
     >>> a = np.arange(24).reshape(2,3,4)
     >>> b = np.array(0)
-    >>> for x, y in np.nditer([a, b], flags=['reduce_ok', 'external_loop'],
-    ...                     op_flags=[['readonly'], ['readwrite']]):
-    ...     y[...] += x
+    >>> with np.nditer([a, b], flags=['reduce_ok', 'external_loop'],
+    ...                     op_flags=[['readonly'], ['readwrite']]) as it:
+    ...     for x,y in it:
+    ...         y[...] += x
     ...
     >>> b
     array(276)
@@ -525,11 +541,13 @@ sums along the last axis of `a`.
     >>> it = np.nditer([a, None], flags=['reduce_ok', 'external_loop'],
     ...             op_flags=[['readonly'], ['readwrite', 'allocate']],
     ...             op_axes=[None, [0,1,-1]])
-    >>> it.operands[1][...] = 0
-    >>> for x, y in it:
-    ...     y[...] += x
+    >>> with it:
+    ...     it.operands[1][...] = 0
+    ...     for x, y in it:
+    ...         y[...] += x
+    ...     result = it.operands[1]
     ...
-    >>> it.operands[1]
+    >>> result
     array([[ 6, 22, 38],
            [54, 70, 86]])
     >>> np.sum(a, axis=2)
@@ -558,12 +576,14 @@ buffering.
     ...                                  'buffered', 'delay_bufalloc'],
     ...             op_flags=[['readonly'], ['readwrite', 'allocate']],
     ...             op_axes=[None, [0,1,-1]])
-    >>> it.operands[1][...] = 0
-    >>> it.reset()
-    >>> for x, y in it:
-    ...     y[...] += x
+    >>> with it:
+    ...     it.operands[1][...] = 0
+    ...     it.reset()
+    ...     for x, y in it:
+    ...         y[...] += x
+    ...     result = it.operands[1]
     ...
-    >>> it.operands[1]
+    >>> result
     array([[ 6, 22, 38],
            [54, 70, 86]])
 
@@ -609,11 +629,12 @@ Here's how this looks.
     ...                 op_flags=[['readonly'], ['readwrite', 'allocate']],
     ...                 op_axes=[None, axeslist],
     ...                 op_dtypes=['float64', 'float64'])
-    ...     it.operands[1][...] = 0
-    ...     it.reset()
-    ...     for x, y in it:
-    ...         y[...] += x*x
-    ...     return it.operands[1]
+    ...     with it:
+    ...         it.operands[1][...] = 0
+    ...         it.reset()
+    ...         for x, y in it:
+    ...             y[...] += x*x
+    ...         return it.operands[1]
     ...
     >>> a = np.arange(6).reshape(2,3)
     >>> sum_squares_py(a)
@@ -661,16 +682,17 @@ Here's the listing of sum_squares.pyx::
                     op_flags=[['readonly'], ['readwrite', 'allocate']],
                     op_axes=[None, axeslist],
                     op_dtypes=['float64', 'float64'])
-        it.operands[1][...] = 0
-        it.reset()
-        for xarr, yarr in it:
-            x = xarr
-            y = yarr
-            size = x.shape[0]
-            for i in range(size):
-               value = x[i]
-               y[i] = y[i] + value * value
-        return it.operands[1]
+        with it:
+            it.operands[1][...] = 0
+            it.reset()
+            for xarr, yarr in it:
+                x = xarr
+                y = yarr
+                size = x.shape[0]
+                for i in range(size):
+                   value = x[i]
+                   y[i] = y[i] + value * value
+            return it.operands[1]
 
 On this machine, building the .pyx file into a module looked like the
 following, but you may have to find some Cython tutorials to tell you
diff --git a/doc/source/reference/c-api.iterator.rst b/doc/source/reference/c-api.iterator.rst
index 4c59bce51..17f1c45f2 100644
--- a/doc/source/reference/c-api.iterator.rst
+++ b/doc/source/reference/c-api.iterator.rst
@@ -110,6 +110,7 @@ number of non-zero elements in an array.
             /* Increment the iterator to the next inner loop */
         } while(iternext(iter));
 
+        NpyIter_Close(iter) /* best practice, not strictly required in this case */
         NpyIter_Deallocate(iter);
 
         return nonzero_count;
@@ -194,6 +195,7 @@ is used to control the memory layout of the allocated result, typically
         ret = NpyIter_GetOperandArray(iter)[1];
         Py_INCREF(ret);
 
+        NpyIter_Close(iter);
         if (NpyIter_Deallocate(iter) != NPY_SUCCEED) {
             Py_DECREF(ret);
             return NULL;
@@ -490,7 +492,10 @@ Construction and Destruction
 
             Indicate how the user of the iterator will read or write
             to ``op[i]``.  Exactly one of these flags must be specified
-            per operand.
+            per operand. Using ``NPY_ITER_READWRITE`` or ``NPY_ITER_WRITEONLY``
+            for a user-provided operand may trigger `WRITEBACKIFCOPY``
+            semantics. The data will be written back to the original array
+            when ``NpyIter_Close`` is called.
 
         .. c:var:: NPY_ITER_COPY
 
@@ -502,12 +507,12 @@ Construction and Destruction
 
             Triggers :c:data:`NPY_ITER_COPY`, and when an array operand
             is flagged for writing and is copied, causes the data
-            in a copy to be copied back to ``op[i]`` when the iterator
-            is destroyed.
+            in a copy to be copied back to ``op[i]`` when ``NpyIter_Close`` is
+            called.
 
             If the operand is flagged as write-only and a copy is needed,
             an uninitialized temporary array will be created and then copied
-            to back to ``op[i]`` on destruction, instead of doing
+            to back to ``op[i]`` on calling ``NpyIter_Close``, instead of doing
             the unnecessary copy operation.
 
         .. c:var:: NPY_ITER_NBO
@@ -754,10 +759,21 @@ Construction and Destruction
 
     Returns ``NPY_SUCCEED`` or ``NPY_FAIL``.
 
+.. c:function:: int NpyIter_Close(NpyIter* iter)
+
+    Resolves any needed writeback resolution. Must be called before
+    ``NpyIter_Deallocate``. After this call it is not safe to use the operands.
+
+    Returns ``0`` or ``-1`` if unsuccessful.
+
 .. c:function:: int NpyIter_Deallocate(NpyIter* iter)
 
-    Deallocates the iterator object.  This additionally frees any
-    copies made, triggering UPDATEIFCOPY behavior where necessary.
+    Deallocates the iterator object.  
+
+    `NpyIter_Close` should be called before this. If not, and if writeback is
+    needed, it will be performed at this point in order to maintain
+    backward-compatibility with older code, and a deprecation warning will be
+    emitted. Old code should be updated to call `NpyIter_Close` beforehand.
 
     Returns ``NPY_SUCCEED`` or ``NPY_FAIL``.
 
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index bbbc34759..93a521658 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -257,6 +257,7 @@ add_newdoc('numpy.core', 'nditer',
     dtypes : tuple of dtype(s)
         The data types of the values provided in `value`. This may be
         different from the operand data types if buffering is enabled.
+        Valid only before the iterator is closed.
     finished : bool
         Whether the iteration over the operands is finished or not.
     has_delayed_bufalloc : bool
@@ -282,7 +283,8 @@ add_newdoc('numpy.core', 'nditer',
         Size of the iterator.
     itviews
         Structured view(s) of `operands` in memory, matching the reordered
-        and optimized iterator access pattern.
+        and optimized iterator access pattern. Valid only before the iterator
+        is closed.
     multi_index
         When the "multi_index" flag was used, this property
         provides access to the index. Raises a ValueError if accessed
@@ -292,7 +294,8 @@ add_newdoc('numpy.core', 'nditer',
     nop : int
         The number of iterator operands.
     operands : tuple of operand(s)
-        The array(s) to be iterated over.
+        The array(s) to be iterated over. Valid only before the iterator is
+        closed.
     shape : tuple of ints
         Shape tuple, the shape of the iterator.
     value
@@ -319,8 +322,9 @@ add_newdoc('numpy.core', 'nditer',
             addop = np.add
             it = np.nditer([x, y, out], [],
                         [['readonly'], ['readonly'], ['writeonly','allocate']])
-            for (a, b, c) in it:
-                addop(a, b, out=c)
+            with it:
+                for (a, b, c) in it:
+                    addop(a, b, out=c)
             return it.operands[2]
 
     Here is the same function, but following the C-style pattern::
@@ -330,12 +334,12 @@ add_newdoc('numpy.core', 'nditer',
 
             it = np.nditer([x, y, out], [],
                         [['readonly'], ['readonly'], ['writeonly','allocate']])
+            with it:
+                while not it.finished:
+                    addop(it[0], it[1], out=it[2])
+                    it.iternext()
 
-            while not it.finished:
-                addop(it[0], it[1], out=it[2])
-                it.iternext()
-
-            return it.operands[2]
+                return it.operands[2]
 
     Here is an example outer product function::
 
@@ -344,14 +348,13 @@ add_newdoc('numpy.core', 'nditer',
 
             it = np.nditer([x, y, out], ['external_loop'],
                     [['readonly'], ['readonly'], ['writeonly', 'allocate']],
-                    op_axes=[range(x.ndim)+[-1]*y.ndim,
-                             [-1]*x.ndim+range(y.ndim),
+                    op_axes=[list(range(x.ndim)) + [-1] * y.ndim,
+                             [-1] * x.ndim + list(range(y.ndim)),
                              None])
-
-            for (a, b, c) in it:
-                mulop(a, b, out=c)
-
-            return it.operands[2]
+            with it:
+                for (a, b, c) in it:
+                    mulop(a, b, out=c)
+                return it.operands[2]
 
         >>> a = np.arange(2)+1
         >>> b = np.arange(3)+1
@@ -374,13 +377,39 @@ add_newdoc('numpy.core', 'nditer',
             while not it.finished:
                 it[0] = lamdaexpr(*it[1:])
                 it.iternext()
-            return it.operands[0]
+                return it.operands[0]
 
         >>> a = np.arange(5)
         >>> b = np.ones(5)
         >>> luf(lambda i,j:i*i + j/2, a, b)
         array([  0.5,   1.5,   4.5,   9.5,  16.5])
 
+    If operand flags `"writeonly"` or `"readwrite"` are used the operands may
+    be views into the original data with the WRITEBACKIFCOPY flag. In this case
+    nditer must be used as a context manager. The temporary
+    data will be written back to the original data when the `` __exit__``
+    function is called but not before::
+
+        >>> a = np.arange(6, dtype='i4')[::-2]
+        >>> with nditer(a, [],
+        ...        [['writeonly', 'updateifcopy']],
+        ...        casting='unsafe',
+        ...        op_dtypes=[np.dtype('f4')]) as i:
+        ...    x = i.operands[0]
+        ...    x[:] = [-1, -2, -3]
+        ...    # a still unchanged here
+        >>> a, x
+        array([-1, -2, -3]), array([-1, -2, -3])
+
+    It is important to note that once the iterator is exited, dangling
+    references (like `x` in the example) may or may not share data with
+    the original data `a`. If writeback semantics were active, i.e. if
+    `x.base.flags.writebackifcopy` is `True`, then exiting the iterator
+     will sever the connection between `x` and `a`, writing to `x` will
+    no longer write to `a`. If writeback semantics are not active, then
+    `x.data` will still point at some part of `a.data`, and writing to
+    one will affect the other.
+
     """)
 
 # nditer methods
@@ -404,6 +433,13 @@ add_newdoc('numpy.core', 'nditer', ('copy',
 
     """))
 
+add_newdoc('numpy.core', 'nditer', ('operands',
+    """
+    operands[`Slice`]
+
+    The array(s) to be iterated over. Valid only before the iterator is closed.
+    """))
+
 add_newdoc('numpy.core', 'nditer', ('debug_print',
     """
     debug_print()
@@ -524,6 +560,13 @@ add_newdoc('numpy.core', 'nested_iters',
 
     """)
 
+add_newdoc('numpy.core', 'nditer', ('close',
+    """
+    close()
+
+    Resolve all writeback semantics in writeable operands.
+
+    """))
 
 
 ###############################################################################
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 68ac5109c..cc6c3a5fb 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -41,3 +41,6 @@
 # Version 12 (NumPy 1.14) Added PyArray_ResolveWritebackIfCopy,
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
+
+# Version 13 (NumPy 1.15) Added NpyIter_Close
+0x0000000d = 4386e829d65aafce6bd09a85b142d585
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index a454d95b0..157fa3447 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -5,7 +5,8 @@ Each dictionary contains name -> index pair.
 
 Whenever you change one index, you break the ABI (and the ABI version number
 should be incremented). Whenever you add an item to one of the dict, the API
-needs to be updated.
+needs to be updated in both setup_common.py and by adding an appropriate
+entry to cversion.txt (generate the hash via "python cversions.py".
 
 When adding a function, make sure to use the next integer not used as an index
 (in case you use an existing index or jump, the build will stop and raise an
@@ -349,6 +350,8 @@ multiarray_funcs_api = {
     'PyArray_ResolveWritebackIfCopy':       (302,),
     'PyArray_SetWritebackIfCopyBase':       (303,),
     # End 1.14 API
+    'NpyIter_Close':                        (304,),
+    # End 1.15 API
 }
 
 ufunc_types_api = {
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index f36d61f55..a8aba40bd 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -40,7 +40,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000a - 1.12.x
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
-C_API_VERSION = 0x0000000c
+# 0x0000000d - 1.15.x
+C_API_VERSION = 0x0000000d
 
 class MismatchCAPIWarning(Warning):
     pass
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index afc6db1aa..38698887a 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -657,6 +657,24 @@ npy_create_writebackifcopy(PyObject* NPY_UNUSED(self), PyObject* args)
     return array;
 }
 
+/* used to test WRITEBACKIFCOPY without resolution emits runtime warning */
+static PyObject*
+npy_abuse_writebackifcopy(PyObject* NPY_UNUSED(self), PyObject* args)
+{
+    int flags;
+    PyObject* array;
+    if (!PyArray_Check(args)) {
+        PyErr_SetString(PyExc_TypeError, "test needs ndarray input");
+        return NULL;
+    }
+    flags = NPY_ARRAY_CARRAY | NPY_ARRAY_WRITEBACKIFCOPY;
+    array = PyArray_FromArray((PyArrayObject*)args, NULL, flags);
+    if (array == NULL)
+        return NULL;
+    Py_DECREF(array); /* calls array_dealloc even on PyPy */
+    Py_RETURN_NONE;
+}
+
 /* resolve WRITEBACKIFCOPY */
 static PyObject*
 npy_resolve(PyObject* NPY_UNUSED(self), PyObject* args)
@@ -1009,6 +1027,75 @@ test_nditer_too_large(PyObject *NPY_UNUSED(self), PyObject *args) {
     return NULL;
 }
 
+static PyObject *
+test_nditer_writeback(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
+{
+    /* like npyiter_init */
+    PyObject *op_in = NULL, *op_dtypes_in = NULL, *value = NULL;
+    PyArrayObject * opview;
+    int iop, nop = 0;
+    PyArrayObject *op[NPY_MAXARGS];
+    npy_uint32 flags = 0;
+    NPY_ORDER order = NPY_KEEPORDER;
+    NPY_CASTING casting = NPY_EQUIV_CASTING;
+    npy_uint32 op_flags[NPY_MAXARGS];
+    PyArray_Descr *op_request_dtypes[NPY_MAXARGS];
+    int retval;
+    unsigned char do_close;
+    int buffersize = 0;
+    NpyIter *iter = NULL;
+    static char *kwlist[] = {"value", "do_close", "input", "op_dtypes", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds,
+                    "ObO|O:test_nditer_writeback", kwlist,
+                    &value,
+                    &do_close,
+                    &op_in,
+                    &op_dtypes_in)) {
+        return NULL;
+    }
+    /* op and op_flags */
+    if (! PyArray_Check(op_in)) {
+        return NULL;
+    }
+    nop = 1;
+    op[0] = (PyArrayObject*)op_in;
+    op_flags[0] = NPY_ITER_READWRITE|NPY_ITER_UPDATEIFCOPY;
+
+    /* Set the dtypes */
+    for (iop=0; iop<nop; iop++) {
+        PyObject *dtype = PySequence_GetItem(op_dtypes_in, iop);
+        PyArray_DescrConverter2(dtype, &op_request_dtypes[iop]);
+    }
+
+    iter = NpyIter_AdvancedNew(nop, op, flags, order, casting, op_flags,
+                                  op_request_dtypes,
+                                  -1, NULL, NULL,
+                                  buffersize);
+    if (iter == NULL) {
+        goto fail;
+    }
+
+    opview = NpyIter_GetIterView(iter, 0);
+    retval = PyArray_FillWithScalar(opview, value);
+    Py_DECREF(opview);
+    if (retval < 0) {
+        NpyIter_Deallocate(iter);
+        return NULL;
+    }
+    if (do_close != 0) {
+        NpyIter_Close(iter);
+    }
+    NpyIter_Deallocate(iter);
+    Py_RETURN_NONE;
+
+fail:
+    for (iop = 0; iop < nop; ++iop) {
+        Py_XDECREF(op[iop]);
+        Py_XDECREF(op_request_dtypes[iop]);
+    }
+    return NULL;
+}
 
 static PyObject *
 array_solve_diophantine(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
@@ -1764,6 +1851,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"npy_create_writebackifcopy",
         npy_create_writebackifcopy,
         METH_O, NULL},
+    {"npy_abuse_writebackifcopy",
+        npy_abuse_writebackifcopy,
+        METH_O, NULL},
     {"npy_resolve",
         npy_resolve,
         METH_O, NULL},
@@ -1784,6 +1874,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"test_nditer_too_large",
         test_nditer_too_large,
         METH_VARARGS, NULL},
+    {"test_nditer_writeback",
+        (PyCFunction)test_nditer_writeback,
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {"solve_diophantine",
         (PyCFunction)array_solve_diophantine,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 0aaf27b27..69538c6b7 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -86,16 +86,6 @@ NPY_NO_EXPORT int
 PyArray_SetUpdateIfCopyBase(PyArrayObject *arr, PyArrayObject *base)
 {
     int ret;
-#ifdef PYPY_VERSION
-  #ifndef DEPRECATE_UPDATEIFCOPY
-    #define DEPRECATE_UPDATEIFCOPY
-  #endif
-#endif
-
-#ifdef DEPRECATE_UPDATEIFCOPY 
-    /* TODO: enable this once a solution for UPDATEIFCOPY
-     *  and nditer are resolved, also pending the fix for GH7054
-     */
     /* 2017-Nov-10 1.14 */
     if (DEPRECATE("PyArray_SetUpdateIfCopyBase is deprecated, use "
               "PyArray_SetWritebackIfCopyBase instead, and be sure to call "
@@ -104,7 +94,6 @@ PyArray_SetUpdateIfCopyBase(PyArrayObject *arr, PyArrayObject *base)
               "error, PyArray_DiscardWritebackIfCopy may be called instead to "
               "throw away the scratch buffer.") < 0)
         return -1;
-#endif
     ret = PyArray_SetWritebackIfCopyBase(arr, base);
     if (ret >=0) {
         PyArray_ENABLEFLAGS(arr, NPY_ARRAY_UPDATEIFCOPY);
@@ -453,6 +442,27 @@ PyArray_ResolveWritebackIfCopy(PyArrayObject * self)
 
 /*********************** end C-API functions **********************/
 
+
+/* dealloc must not raise an error, best effort try to write
+   to stderr and clear the error
+*/
+
+static NPY_INLINE void
+WARN_IN_DEALLOC(PyObject* warning, const char * msg) {
+    if (PyErr_WarnEx(warning, msg, 1) < 0) {
+        PyObject * s;
+
+        s = PyUString_FromString("array_dealloc");
+        if (s) {
+            PyErr_WriteUnraisable(s);
+            Py_DECREF(s);
+        }
+        else {
+            PyErr_WriteUnraisable(Py_None);
+        }
+    }
+};
+
 /* array object functions */
 
 static void
@@ -469,17 +479,15 @@ array_dealloc(PyArrayObject *self)
         int retval;
         if (PyArray_FLAGS(self) & NPY_ARRAY_WRITEBACKIFCOPY)
         {
-            char * msg = "WRITEBACKIFCOPY requires a call to "
-                "PyArray_ResolveWritebackIfCopy or "
-                "PyArray_DiscardWritebackIfCopy before array_dealloc is "
-                "called.";
-            /* 2017-Nov-10 1.14 */
-            if (DEPRECATE(msg) < 0) {
-                /* dealloc cannot raise an error, best effort try to write
-                   to stderr and clear the error
-                */
-                PyErr_WriteUnraisable((PyObject *)&PyArray_Type);
-            }
+            char const * msg = "WRITEBACKIFCOPY detected in array_dealloc. "
+                " Required call to PyArray_ResolveWritebackIfCopy or "
+                "PyArray_DiscardWritebackIfCopy is missing. This could also "
+                "be caused by using a nditer without a context manager";
+            Py_INCREF(self); /* hold on to self in next call  since if
+                              * refcount == 0 it will recurse back into
+                              *array_dealloc
+                              */
+            WARN_IN_DEALLOC(PyExc_RuntimeWarning, msg);
             retval = PyArray_ResolveWritebackIfCopy(self);
             if (retval < 0)
             {
@@ -489,10 +497,15 @@ array_dealloc(PyArrayObject *self)
         }
         if (PyArray_FLAGS(self) & NPY_ARRAY_UPDATEIFCOPY) {
             /* DEPRECATED, remove once the flag is removed */
+            char const * msg = "UPDATEIFCOPY detected in array_dealloc. "
+                " Required call to PyArray_ResolveWritebackIfCopy or "
+                "PyArray_DiscardWritebackIfCopy is missing";
             Py_INCREF(self); /* hold on to self in next call  since if
-                              * refcount == 0 it will recurse back into 
+                              * refcount == 0 it will recurse back into
                               *array_dealloc
                               */
+            /* 2017-Nov-10 1.14 */
+            WARN_IN_DEALLOC(PyExc_DeprecationWarning, msg);
             retval = PyArray_ResolveWritebackIfCopy(self);
             if (retval < 0)
             {
@@ -501,7 +514,7 @@ array_dealloc(PyArrayObject *self)
             }
         }
         /*
-         * In any case base is pointing to something that we need
+         * If fa->base is non-NULL, it is something
          * to DECREF -- either a view or a buffer object
          */
         Py_XDECREF(fa->base);
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 0eba077da..59eb2457c 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1014,7 +1014,7 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
     }
     else {
         fa->flags = (flags & ~NPY_ARRAY_WRITEBACKIFCOPY);
-        fa->flags = (fa->flags & ~NPY_ARRAY_UPDATEIFCOPY);
+        fa->flags &= ~NPY_ARRAY_UPDATEIFCOPY;
     }
     fa->descr = descr;
     fa->base = (PyObject *)NULL;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index eca4e98be..4b2c6aa5a 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -3374,6 +3374,7 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
 static void
 arraymapiter_dealloc(PyArrayMapIterObject *mit)
 {
+    PyArray_ResolveWritebackIfCopy(mit->array);
     Py_XDECREF(mit->array);
     Py_XDECREF(mit->ait);
     Py_XDECREF(mit->subspace);
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index f2bc23715..152955940 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -1391,6 +1391,47 @@ NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
 }
 
 /*NUMPY_API
+ * Resolves all writebackifcopy scratch buffers, not safe to use iterator
+ * operands after this call, in this iterator as well as any copies.
+ * Returns 0 on success, -1 on failure
+ */
+NPY_NO_EXPORT int
+NpyIter_Close(NpyIter *iter)
+{
+    int ret=0, iop, nop;
+    PyArrayObject ** operands;
+    npyiter_opitflags *op_itflags;
+    if (iter == NULL) {
+        return 0;
+    }
+    nop = NIT_NOP(iter);
+    operands = NIT_OPERANDS(iter);
+    op_itflags = NIT_OPITFLAGS(iter);
+    /* If NPY_OP_ITFLAG_HAS_WRITEBACK flag set on operand, resolve it.
+     * If the resolution fails (should never happen), continue from the
+     * next operand and discard the writeback scratch buffers, and return
+     * failure status
+     */
+    for (iop=0; iop<nop; iop++) {
+        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
+            op_itflags[iop] &= ~NPY_OP_ITFLAG_HAS_WRITEBACK;
+            if (PyArray_ResolveWritebackIfCopy(operands[iop]) < 0) {
+                ret = -1;
+                iop++;
+                break;
+            }
+        }
+    }
+    for (; iop<nop; iop++) {
+        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
+            op_itflags[iop] &= ~NPY_OP_ITFLAG_HAS_WRITEBACK;
+            PyArray_DiscardWritebackIfCopy(operands[iop]);
+        }
+    }
+    return ret;
+}
+
+/*NUMPY_API
  * For debugging
  */
 NPY_NO_EXPORT void
@@ -2799,5 +2840,4 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
     }
     return count * (*reduce_innersize);
 }
-
 #undef NPY_ITERATOR_IMPLEMENTATION_CODE
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index b74aca01c..c512cf208 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -403,6 +403,7 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
      */
     if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags,
                             op_itflags, op_axes)) {
+        NpyIter_Close(iter);
         NpyIter_Deallocate(iter);
         return NULL;
     }
@@ -464,12 +465,14 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
     /* If buffering is set without delayed allocation */
     if (itflags & NPY_ITFLAG_BUFFER) {
         if (!npyiter_allocate_transfer_functions(iter)) {
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return NULL;
         }
         if (!(itflags & NPY_ITFLAG_DELAYBUF)) {
             /* Allocate the buffers */
             if (!npyiter_allocate_buffers(iter, NULL)) {
+                NpyIter_Close(iter);
                 NpyIter_Deallocate(iter);
                 return NULL;
             }
@@ -2716,7 +2719,7 @@ npyiter_allocate_arrays(NpyIter *iter,
          *
          * If any write operand has memory overlap with any read operand,
          * eliminate all overlap by making temporary copies, by enabling
-         * NPY_OP_ITFLAG_FORCECOPY for the write operand to force UPDATEIFCOPY.
+         * NPY_OP_ITFLAG_FORCECOPY for the write operand to force WRITEBACKIFCOPY.
          *
          * Operands with NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE enabled are not
          * considered overlapping if the arrays are exactly the same. In this
@@ -2920,13 +2923,15 @@ npyiter_allocate_arrays(NpyIter *iter,
                     return 0;
                 }
             }
-            /* If the data will be written to, set UPDATEIFCOPY */
+            /* If the data will be written to, set WRITEBACKIFCOPY
+               and require a context manager */
             if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
                 Py_INCREF(op[iop]);
-                if (PyArray_SetUpdateIfCopyBase(temp, op[iop]) < 0) {
+                if (PyArray_SetWritebackIfCopyBase(temp, op[iop]) < 0) {
                     Py_DECREF(temp);
                     return 0;
                 }
+                op_itflags[iop] |= NPY_OP_ITFLAG_HAS_WRITEBACK;
             }
 
             Py_DECREF(op[iop]);
diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h
index 7788d327b..5fb146026 100644
--- a/numpy/core/src/multiarray/nditer_impl.h
+++ b/numpy/core/src/multiarray/nditer_impl.h
@@ -124,6 +124,8 @@
 #define NPY_OP_ITFLAG_USINGBUFFER  0x0100
 /* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */
 #define NPY_OP_ITFLAG_FORCECOPY    0x0200
+/* The operand has temporary data, write it back at dealloc */
+#define NPY_OP_ITFLAG_HAS_WRITEBACK 0x0400
 
 /*
  * The data layout of the iterator is fully specified by
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 0d318178f..4505e645b 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -26,6 +26,8 @@ struct NewNpyArrayIterObject_tag {
     NpyIter *iter;
     /* Flag indicating iteration started/stopped */
     char started, finished;
+    /* iter operands cannot be referenced if iter is closed */
+    npy_bool is_closed;
     /* Child to update for nested iteration */
     NewNpyArrayIterObject *nested_child;
     /* Cached values from the iterator */
@@ -85,6 +87,7 @@ npyiter_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
     if (self != NULL) {
         self->iter = NULL;
         self->nested_child = NULL;
+        self->is_closed = 0;
     }
 
     return (PyObject *)self;
@@ -704,7 +707,7 @@ npyiter_convert_ops(PyObject *op_in, PyObject *op_flags_in,
                     PyErr_SetString(PyExc_TypeError,
                             "Iterator operand is flagged as writeable, "
                             "but is an object which cannot be written "
-                            "back to via UPDATEIFCOPY");
+                            "back to via WRITEBACKIFCOPY");
                 }
                 for (iop = 0; iop < nop; ++iop) {
                     Py_DECREF(op[iop]);
@@ -1414,6 +1417,12 @@ static PyObject *npyiter_value_get(NewNpyArrayIterObject *self)
         ret = npyiter_seq_item(self, 0);
     }
     else {
+        if (self->is_closed) {
+            PyErr_SetString(PyExc_ValueError,
+                    "Iterator is closed");
+            return NULL;
+        }
+
         ret = PyTuple_New(nop);
         if (ret == NULL) {
             return NULL;
@@ -1443,6 +1452,11 @@ static PyObject *npyiter_operands_get(NewNpyArrayIterObject *self)
                 "Iterator is invalid");
         return NULL;
     }
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return NULL;
+    }
 
     nop = NpyIter_GetNOp(self->iter);
     operands = self->operands;
@@ -1473,6 +1487,12 @@ static PyObject *npyiter_itviews_get(NewNpyArrayIterObject *self)
         return NULL;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return NULL;
+    }
+
     nop = NpyIter_GetNOp(self->iter);
 
     ret = PyTuple_New(nop);
@@ -1495,7 +1515,8 @@ static PyObject *npyiter_itviews_get(NewNpyArrayIterObject *self)
 static PyObject *
 npyiter_next(NewNpyArrayIterObject *self)
 {
-    if (self->iter == NULL || self->iternext == NULL || self->finished) {
+    if (self->iter == NULL || self->iternext == NULL ||
+                self->finished || self->is_closed) {
         return NULL;
     }
 
@@ -1890,6 +1911,12 @@ static PyObject *npyiter_dtypes_get(NewNpyArrayIterObject *self)
         return NULL;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return NULL;
+    }
+
     nop = NpyIter_GetNOp(self->iter);
 
     ret = PyTuple_New(nop);
@@ -1986,6 +2013,12 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i)
         return NULL;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return NULL;
+    }
+
     nop = NpyIter_GetNOp(self->iter);
 
     /* Negative indexing */
@@ -2070,6 +2103,12 @@ npyiter_seq_slice(NewNpyArrayIterObject *self,
         return NULL;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return NULL;
+    }
+
     nop = NpyIter_GetNOp(self->iter);
     if (ilow < 0) {
         ilow = 0;
@@ -2130,6 +2169,12 @@ npyiter_seq_ass_item(NewNpyArrayIterObject *self, Py_ssize_t i, PyObject *v)
         return -1;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return -1;
+    }
+
     nop = NpyIter_GetNOp(self->iter);
 
     /* Negative indexing */
@@ -2204,6 +2249,12 @@ npyiter_seq_ass_slice(NewNpyArrayIterObject *self, Py_ssize_t ilow,
         return -1;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return -1;
+    }
+
     nop = NpyIter_GetNOp(self->iter);
     if (ilow < 0) {
         ilow = 0;
@@ -2255,6 +2306,12 @@ npyiter_subscript(NewNpyArrayIterObject *self, PyObject *op)
         return NULL;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return NULL;
+    }
+
     if (PyInt_Check(op) || PyLong_Check(op) ||
                     (PyIndex_Check(op) && !PySequence_Check(op))) {
         npy_intp i = PyArray_PyIntAsIntp(op);
@@ -2304,6 +2361,12 @@ npyiter_ass_subscript(NewNpyArrayIterObject *self, PyObject *op,
         return -1;
     }
 
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError,
+                "Iterator is closed");
+        return -1;
+    }
+
     if (PyInt_Check(op) || PyLong_Check(op) ||
                     (PyIndex_Check(op) && !PySequence_Check(op))) {
         npy_intp i = PyArray_PyIntAsIntp(op);
@@ -2331,6 +2394,44 @@ npyiter_ass_subscript(NewNpyArrayIterObject *self, PyObject *op,
     return -1;
 }
 
+static PyObject *
+npyiter_enter(NewNpyArrayIterObject *self)
+{
+    if (self->iter == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "operation on non-initialized iterator");
+        return NULL;
+    }
+    if (self->is_closed) {
+        PyErr_SetString(PyExc_ValueError, "cannot reuse closed iterator");
+        return NULL;
+    }
+    Py_INCREF(self);
+    return (PyObject *)self;
+}
+
+static PyObject *
+npyiter_close(NewNpyArrayIterObject *self)
+{
+    NpyIter *iter = self->iter;
+    int ret;
+    if (self->iter == NULL) {
+        Py_RETURN_NONE;
+    }
+    ret = NpyIter_Close(iter);
+    self->is_closed = 1;
+    if (ret < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+npyiter_exit(NewNpyArrayIterObject *self, PyObject *args)
+{
+    /* even if called via exception handling, writeback any data */
+    return npyiter_close(self);
+}
+
 static PyMethodDef npyiter_methods[] = {
     {"reset",
         (PyCFunction)npyiter_reset,
@@ -2356,6 +2457,12 @@ static PyMethodDef npyiter_methods[] = {
     {"debug_print",
         (PyCFunction)npyiter_debug_print,
         METH_NOARGS, NULL},
+    {"__enter__", (PyCFunction)npyiter_enter,
+         METH_NOARGS,  NULL},
+    {"__exit__",  (PyCFunction)npyiter_exit,
+         METH_VARARGS, NULL},
+    {"close",  (PyCFunction)npyiter_close,
+         METH_VARARGS, NULL},
     {NULL, NULL, 0, NULL},
 };
 
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 6dd597b3a..e0423630b 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1203,7 +1203,7 @@ iterator_loop(PyUFuncObject *ufunc,
                     PyUFuncGenericFunction innerloop,
                     void *innerloopdata)
 {
-    npy_intp i, iop, nin = ufunc->nin, nout = ufunc->nout;
+    npy_intp i, nin = ufunc->nin, nout = ufunc->nout;
     npy_intp nop = nin + nout;
     npy_uint32 op_flags[NPY_MAXARGS];
     NpyIter *iter;
@@ -1216,6 +1216,7 @@ iterator_loop(PyUFuncObject *ufunc,
 
     PyArrayObject **op_it;
     npy_uint32 iter_flags;
+    int retval;
 
     NPY_BEGIN_THREADS_DEF;
 
@@ -1289,12 +1290,7 @@ iterator_loop(PyUFuncObject *ufunc,
             /* Call the __array_prepare__ functions for the new array */
             if (prepare_ufunc_output(ufunc, &op[nin+i],
                                      arr_prep[i], arr_prep_args, i) < 0) {
-                for(iop = 0; iop < nin+i; ++iop) {
-                    if (op_it[iop] != op[iop]) {
-                        /* ignore errors */
-                        PyArray_ResolveWritebackIfCopy(op_it[iop]);
-                    }
-                }
+                NpyIter_Close(iter);
                 NpyIter_Deallocate(iter);
                 return -1;
             }
@@ -1323,6 +1319,7 @@ iterator_loop(PyUFuncObject *ufunc,
             baseptrs[i] = PyArray_BYTES(op_it[i]);
         }
         if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1330,6 +1327,7 @@ iterator_loop(PyUFuncObject *ufunc,
         /* Get the variables needed for the loop */
         iternext = NpyIter_GetIterNext(iter, NULL);
         if (iternext == NULL) {
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1347,13 +1345,9 @@ iterator_loop(PyUFuncObject *ufunc,
 
         NPY_END_THREADS;
     }
-    for(iop = 0; iop < nop; ++iop) {
-        if (op_it[iop] != op[iop]) {
-            PyArray_ResolveWritebackIfCopy(op_it[iop]);
-        }
-    }
+    retval = NpyIter_Close(iter);
     NpyIter_Deallocate(iter);
-    return 0;
+    return retval;
 }
 
 /*
@@ -1650,6 +1644,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
 
         if (prepare_ufunc_output(ufunc, &op_tmp,
                                  arr_prep[i], arr_prep_args, i) < 0) {
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1660,6 +1655,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
                         "The __array_prepare__ functions modified the data "
                         "pointer addresses in an invalid fashion");
             Py_DECREF(op_tmp);
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1694,6 +1690,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
                         wheremask != NULL ? fixed_strides[nop]
                                           : fixed_strides[nop + nin],
                         &innerloop, &innerloopdata, &needs_api) < 0) {
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1701,6 +1698,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
         /* Get the variables needed for the loop */
         iternext = NpyIter_GetIterNext(iter, NULL);
         if (iternext == NULL) {
+            NpyIter_Close(iter);
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1724,14 +1722,7 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
         NPY_AUXDATA_FREE(innerloopdata);
     }
 
-    retval = 0;
-    nop = NpyIter_GetNOp(iter);
-    for(i=0; i< nop; ++i) {
-        if (PyArray_ResolveWritebackIfCopy(NpyIter_GetOperandArray(iter)[i]) < 0) {
-            retval = -1;
-        }
-    }
-
+    retval = NpyIter_Close(iter);
     NpyIter_Deallocate(iter);
     return retval;
 }
@@ -2537,11 +2528,14 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     }
 
     /* Write back any temporary data from PyArray_SetWritebackIfCopyBase */
-    for(i=nin; i< nop; ++i)
-        if (PyArray_ResolveWritebackIfCopy(NpyIter_GetOperandArray(iter)[i]) < 0)
-            goto fail;
+    if (NpyIter_Close(iter) < 0) {
+        goto fail;
+    }
 
     PyArray_free(inner_strides);
+    if (NpyIter_Close(iter) < 0) {
+        goto fail;
+    }
     NpyIter_Deallocate(iter);
     /* The caller takes ownership of all the references in op */
     for (i = 0; i < nop; ++i) {
@@ -2558,6 +2552,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 fail:
     NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval);
     PyArray_free(inner_strides);
+    NpyIter_Close(iter);
     NpyIter_Deallocate(iter);
     for (i = 0; i < nop; ++i) {
         Py_XDECREF(op[i]);
@@ -3425,9 +3420,12 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
     }
 
 finish:
-    /* Write back any temporary data from PyArray_SetWritebackIfCopyBase */
-    if (PyArray_ResolveWritebackIfCopy(op[0]) < 0)
+    if (NpyIter_Close(iter) < 0) {
         goto fail;
+    }
+    if (NpyIter_Close(iter_inner) < 0) {
+        goto fail;
+    }
     Py_XDECREF(op_dtypes[0]);
     NpyIter_Deallocate(iter);
     NpyIter_Deallocate(iter_inner);
@@ -3810,7 +3808,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
     }
 
 finish:
-    if (op[0] && PyArray_ResolveWritebackIfCopy(op[0]) < 0) {
+    if (NpyIter_Close(iter) < 0) {
         goto fail;
     }
     Py_XDECREF(op_dtypes[0]);
@@ -5268,6 +5266,7 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
 
     iternext = NpyIter_GetIterNext(iter_buffer, NULL);
     if (iternext == NULL) {
+        NpyIter_Close(iter_buffer);
         NpyIter_Deallocate(iter_buffer);
         goto fail;
     }
@@ -5337,11 +5336,9 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
         PyErr_SetString(PyExc_ValueError, err_msg);
     }
 
+    NpyIter_Close(iter_buffer);
     NpyIter_Deallocate(iter_buffer);
 
-    if (op1_array != (PyArrayObject*)op1) {
-        PyArray_ResolveWritebackIfCopy(op1_array);
-    }
     Py_XDECREF(op2_array);
     Py_XDECREF(iter);
     Py_XDECREF(iter2);
@@ -5357,9 +5354,9 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
     }
 
 fail:
-
+    /* iter_buffer has already been deallocated, don't use NpyIter_Close */
     if (op1_array != (PyArrayObject*)op1) {
-        PyArray_ResolveWritebackIfCopy(op1_array);
+        PyArray_DiscardWritebackIfCopy(op1_array);
     }
     Py_XDECREF(op2_array);
     Py_XDECREF(iter);
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 4bc85ad97..806a3b083 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -7258,6 +7258,13 @@ class TestWritebackIfCopy(object):
         arr_wb[:] = 100
         assert_equal(arr, -100)
 
+    def test_dealloc_warning(self):
+        with suppress_warnings() as sup:
+            sup.record(RuntimeWarning)
+            arr = np.arange(9).reshape(3, 3)
+            v = arr.T
+            _multiarray_tests.npy_abuse_writebackifcopy(v)
+            assert len(sup.log) == 1
 
 class TestArange(object):
     def test_infinite(self):
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index 0e29876eb..77c26eacf 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -43,13 +43,14 @@ def test_iter_refcount():
     dt = np.dtype('f4').newbyteorder()
     rc_a = sys.getrefcount(a)
     rc_dt = sys.getrefcount(dt)
-    it = nditer(a, [],
+    with nditer(a, [],
                 [['readwrite', 'updateifcopy']],
                 casting='unsafe',
-                op_dtypes=[dt])
-    assert_(not it.iterationneedsapi)
-    assert_(sys.getrefcount(a) > rc_a)
-    assert_(sys.getrefcount(dt) > rc_dt)
+                op_dtypes=[dt]) as it:
+        assert_(not it.iterationneedsapi)
+        assert_(sys.getrefcount(a) > rc_a)
+        assert_(sys.getrefcount(dt) > rc_dt)
+    # del 'it'
     it = None
     assert_equal(sys.getrefcount(a), rc_a)
     assert_equal(sys.getrefcount(dt), rc_dt)
@@ -766,12 +767,32 @@ def test_iter_flags_errors():
 def test_iter_slice():
     a, b, c = np.arange(3), np.arange(3), np.arange(3.)
     i = nditer([a, b, c], [], ['readwrite'])
-    i[0:2] = (3, 3)
-    assert_equal(a, [3, 1, 2])
-    assert_equal(b, [3, 1, 2])
-    assert_equal(c, [0, 1, 2])
-    i[1] = 12
-    assert_equal(i[0:2], [3, 12])
+    with i:
+        i[0:2] = (3, 3)
+        assert_equal(a, [3, 1, 2])
+        assert_equal(b, [3, 1, 2])
+        assert_equal(c, [0, 1, 2])
+        i[1] = 12
+        assert_equal(i[0:2], [3, 12])
+
+def test_iter_assign_mapping():
+    a = np.arange(24, dtype='f8').reshape(2, 3, 4).T
+    it = np.nditer(a, [], [['readwrite', 'updateifcopy']],
+                       casting='same_kind', op_dtypes=[np.dtype('f4')])
+    with it:
+        it.operands[0][...] = 3
+        it.operands[0][...] = 14
+    assert_equal(a, 14)
+    it = np.nditer(a, [], [['readwrite', 'updateifcopy']],
+                       casting='same_kind', op_dtypes=[np.dtype('f4')])
+    with it:
+        x = it.operands[0][-1:1]
+        x[...] = 14
+        it.operands[0][...] = -1234
+    assert_equal(a, -1234)
+    # check for no warnings on dealloc
+    x = None
+    it = None
 
 def test_iter_nbo_align_contig():
     # Check that byte order, alignment, and contig changes work
@@ -783,23 +804,26 @@ def test_iter_nbo_align_contig():
     i = nditer(au, [], [['readwrite', 'updateifcopy']],
                         casting='equiv',
                         op_dtypes=[np.dtype('f4')])
-    assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder)
-    assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder)
-    assert_equal(i.operands[0], a)
-    i.operands[0][:] = 2
-    i = None
+    with i:
+        # context manager triggers UPDATEIFCOPY on i at exit
+        assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder)
+        assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder)
+        assert_equal(i.operands[0], a)
+        i.operands[0][:] = 2
     assert_equal(au, [2]*6)
-
+    i = None # should not raise a DeprecationWarning
     # Byte order change by requesting NBO
     a = np.arange(6, dtype='f4')
     au = a.byteswap().newbyteorder()
     assert_(a.dtype.byteorder != au.dtype.byteorder)
-    i = nditer(au, [], [['readwrite', 'updateifcopy', 'nbo']], casting='equiv')
-    assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder)
-    assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder)
-    assert_equal(i.operands[0], a)
-    i.operands[0][:] = 2
-    i = None
+    with nditer(au, [], [['readwrite', 'updateifcopy', 'nbo']],
+                        casting='equiv') as i:
+        # context manager triggers UPDATEIFCOPY on i at exit
+        assert_equal(i.dtypes[0].byteorder, a.dtype.byteorder)
+        assert_equal(i.operands[0].dtype.byteorder, a.dtype.byteorder)
+        assert_equal(i.operands[0], a)
+        i.operands[0][:] = 12345
+        i.operands[0][:] = 2
     assert_equal(au, [2]*6)
 
     # Unaligned input
@@ -812,11 +836,11 @@ def test_iter_nbo_align_contig():
     assert_(not i.operands[0].flags.aligned)
     assert_equal(i.operands[0], a)
     # With 'aligned', should make a copy
-    i = nditer(a, [], [['readwrite', 'updateifcopy', 'aligned']])
-    assert_(i.operands[0].flags.aligned)
-    assert_equal(i.operands[0], a)
-    i.operands[0][:] = 3
-    i = None
+    with nditer(a, [], [['readwrite', 'updateifcopy', 'aligned']]) as i:
+        assert_(i.operands[0].flags.aligned)
+        # context manager triggers UPDATEIFCOPY on i at exit
+        assert_equal(i.operands[0], a)
+        i.operands[0][:] = 3
     assert_equal(a, [3]*6)
 
     # Discontiguous input
@@ -838,16 +862,17 @@ def test_iter_array_cast():
     # No cast 'f4' -> 'f4'
     a = np.arange(6, dtype='f4').reshape(2, 3)
     i = nditer(a, [], [['readwrite']], op_dtypes=[np.dtype('f4')])
-    assert_equal(i.operands[0], a)
-    assert_equal(i.operands[0].dtype, np.dtype('f4'))
+    with i:
+        assert_equal(i.operands[0], a)
+        assert_equal(i.operands[0].dtype, np.dtype('f4'))
 
     # Byte-order cast '<f4' -> '>f4'
     a = np.arange(6, dtype='<f4').reshape(2, 3)
-    i = nditer(a, [], [['readwrite', 'updateifcopy']],
+    with nditer(a, [], [['readwrite', 'updateifcopy']],
             casting='equiv',
-            op_dtypes=[np.dtype('>f4')])
-    assert_equal(i.operands[0], a)
-    assert_equal(i.operands[0].dtype, np.dtype('>f4'))
+            op_dtypes=[np.dtype('>f4')]) as i:
+        assert_equal(i.operands[0], a)
+        assert_equal(i.operands[0].dtype, np.dtype('>f4'))
 
     # Safe case 'f4' -> 'f8'
     a = np.arange(24, dtype='f4').reshape(2, 3, 4).swapaxes(1, 2)
@@ -869,30 +894,28 @@ def test_iter_array_cast():
 
     # Same-kind cast 'f8' -> 'f4' -> 'f8'
     a = np.arange(24, dtype='f8').reshape(2, 3, 4).T
-    i = nditer(a, [],
+    with nditer(a, [],
             [['readwrite', 'updateifcopy']],
             casting='same_kind',
-            op_dtypes=[np.dtype('f4')])
-    assert_equal(i.operands[0], a)
-    assert_equal(i.operands[0].dtype, np.dtype('f4'))
-    assert_equal(i.operands[0].strides, (4, 16, 48))
-    # Check that UPDATEIFCOPY is activated
-    i.operands[0][2, 1, 1] = -12.5
-    assert_(a[2, 1, 1] != -12.5)
-    i = None
+            op_dtypes=[np.dtype('f4')]) as i:
+        assert_equal(i.operands[0], a)
+        assert_equal(i.operands[0].dtype, np.dtype('f4'))
+        assert_equal(i.operands[0].strides, (4, 16, 48))
+        # Check that WRITEBACKIFCOPY is activated at exit
+        i.operands[0][2, 1, 1] = -12.5
+        assert_(a[2, 1, 1] != -12.5)
     assert_equal(a[2, 1, 1], -12.5)
 
     a = np.arange(6, dtype='i4')[::-2]
-    i = nditer(a, [],
+    with nditer(a, [],
             [['writeonly', 'updateifcopy']],
             casting='unsafe',
-            op_dtypes=[np.dtype('f4')])
-    assert_equal(i.operands[0].dtype, np.dtype('f4'))
-    # Even though the stride was negative in 'a', it
-    # becomes positive in the temporary
-    assert_equal(i.operands[0].strides, (4,))
-    i.operands[0][:] = [1, 2, 3]
-    i = None
+            op_dtypes=[np.dtype('f4')]) as i:
+        assert_equal(i.operands[0].dtype, np.dtype('f4'))
+        # Even though the stride was negative in 'a', it
+        # becomes positive in the temporary
+        assert_equal(i.operands[0].strides, (4,))
+        i.operands[0][:] = [1, 2, 3]
     assert_equal(a, [1, 2, 3])
 
 def test_iter_array_cast_errors():
@@ -1027,9 +1050,10 @@ def test_iter_object_arrays_basic():
 
     i = nditer(a.reshape(2, 2).T, ['refs_ok', 'buffered'],
                         ['readwrite'], order='C')
-    for x in i:
-        x[...] = None
-    vals, i, x = [None]*3
+    with i:
+        for x in i:
+            x[...] = None
+        vals, i, x = [None]*3
     if HAS_REFCOUNT:
         assert_(sys.getrefcount(obj) == rc-1)
     assert_equal(a, np.array([None]*4, dtype='O'))
@@ -1039,15 +1063,17 @@ def test_iter_object_arrays_conversions():
     a = np.arange(6, dtype='O')
     i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'],
                     casting='unsafe', op_dtypes='i4')
-    for x in i:
-        x[...] += 1
+    with i:
+        for x in i:
+            x[...] += 1
     assert_equal(a, np.arange(6)+1)
 
     a = np.arange(6, dtype='i4')
     i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'],
                     casting='unsafe', op_dtypes='O')
-    for x in i:
-        x[...] += 1
+    with i:
+        for x in i:
+            x[...] += 1
     assert_equal(a, np.arange(6)+1)
 
     # Non-contiguous object array
@@ -1056,8 +1082,9 @@ def test_iter_object_arrays_conversions():
     a[:] = np.arange(6)
     i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'],
                     casting='unsafe', op_dtypes='i4')
-    for x in i:
-        x[...] += 1
+    with i:
+        for x in i:
+            x[...] += 1
     assert_equal(a, np.arange(6)+1)
 
     #Non-contiguous value array
@@ -1066,11 +1093,12 @@ def test_iter_object_arrays_conversions():
     a[:] = np.arange(6) + 98172488
     i = nditer(a, ['refs_ok', 'buffered'], ['readwrite'],
                     casting='unsafe', op_dtypes='O')
-    ob = i[0][()]
-    if HAS_REFCOUNT:
-        rc = sys.getrefcount(ob)
-    for x in i:
-        x[...] += 1
+    with i:
+        ob = i[0][()]
+        if HAS_REFCOUNT:
+            rc = sys.getrefcount(ob)
+        for x in i:
+            x[...] += 1
     if HAS_REFCOUNT:
         assert_(sys.getrefcount(ob) == rc-1)
     assert_equal(a, np.arange(6)+98172489)
@@ -1146,14 +1174,15 @@ def test_iter_copy_if_overlap():
     for flag in ['readonly', 'writeonly', 'readwrite']:
         a = arange(10)
         i = nditer([a], ['copy_if_overlap'], [[flag]])
-        assert_(i.operands[0] is a)
+        with i:
+            assert_(i.operands[0] is a)
 
     # Copy needed, 2 ops, read-write overlap
     x = arange(10)
     a = x[1:]
     b = x[:-1]
-    i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']])
-    assert_(not np.shares_memory(*i.operands))
+    with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) as i:
+        assert_(not np.shares_memory(*i.operands))
 
     # Copy not needed with elementwise, 2 ops, exactly same arrays
     x = arange(10)
@@ -1161,9 +1190,10 @@ def test_iter_copy_if_overlap():
     b = x
     i = nditer([a, b], ['copy_if_overlap'], [['readonly', 'overlap_assume_elementwise'],
                                              ['readwrite', 'overlap_assume_elementwise']])
-    assert_(i.operands[0] is a and i.operands[1] is b)
-    i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']])
-    assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b))
+    with i:
+        assert_(i.operands[0] is a and i.operands[1] is b)
+    with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) as i:
+        assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b))
 
     # Copy not needed, 2 ops, no overlap
     x = arange(10)
@@ -1176,8 +1206,8 @@ def test_iter_copy_if_overlap():
     x = arange(4, dtype=np.int8)
     a = x[3:]
     b = x.view(np.int32)[:1]
-    i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']])
-    assert_(not np.shares_memory(*i.operands))
+    with nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) as i:
+        assert_(not np.shares_memory(*i.operands))
 
     # Copy needed, 3 ops, read-write overlap
     for flag in ['writeonly', 'readwrite']:
@@ -1185,11 +1215,11 @@ def test_iter_copy_if_overlap():
         a = x
         b = x.T
         c = x
-        i = nditer([a, b, c], ['copy_if_overlap'],
-                   [['readonly'], ['readonly'], [flag]])
-        a2, b2, c2 = i.operands
-        assert_(not np.shares_memory(a2, c2))
-        assert_(not np.shares_memory(b2, c2))
+        with nditer([a, b, c], ['copy_if_overlap'],
+                   [['readonly'], ['readonly'], [flag]]) as i:
+            a2, b2, c2 = i.operands
+            assert_(not np.shares_memory(a2, c2))
+            assert_(not np.shares_memory(b2, c2))
 
     # Copy not needed, 3 ops, read-only overlap
     x = np.ones([10, 10])
@@ -1324,17 +1354,15 @@ def test_iter_copy():
     assert_equal([x[()] for x in i], [x[()] for x in j])
 
     # Casting iterator
-    i = nditer(a, ['buffered'], order='F', casting='unsafe',
-                op_dtypes='f8', buffersize=5)
-    j = i.copy()
-    i = None
+    with nditer(a, ['buffered'], order='F', casting='unsafe',
+                op_dtypes='f8', buffersize=5) as i:
+        j = i.copy()
     assert_equal([x[()] for x in j], a.ravel(order='F'))
 
     a = arange(24, dtype='<i4').reshape(2, 3, 4)
-    i = nditer(a, ['buffered'], order='F', casting='unsafe',
-                op_dtypes='>f8', buffersize=5)
-    j = i.copy()
-    i = None
+    with nditer(a, ['buffered'], order='F', casting='unsafe',
+                op_dtypes='>f8', buffersize=5) as i:
+        j = i.copy()
     assert_equal([x[()] for x in j], a.ravel(order='F'))
 
 def test_iter_allocate_output_simple():
@@ -1353,11 +1381,12 @@ def test_iter_allocate_output_buffered_readwrite():
     a = arange(6)
     i = nditer([a, None], ['buffered', 'delay_bufalloc'],
                         [['readonly'], ['allocate', 'readwrite']])
-    i.operands[1][:] = 1
-    i.reset()
-    for x in i:
-        x[1][...] += x[0][...]
-    assert_equal(i.operands[1], a+1)
+    with i:
+        i.operands[1][:] = 1
+        i.reset()
+        for x in i:
+            x[1][...] += x[0][...]
+        assert_equal(i.operands[1], a+1)
 
 def test_iter_allocate_output_itorder():
     # The allocated output should match the iteration order
@@ -1652,10 +1681,11 @@ def test_iter_write_buffering():
                    order='C',
                    buffersize=16)
     x = 0
-    while not i.finished:
-        i[0] = x
-        x += 1
-        i.iternext()
+    with i:
+        while not i.finished:
+            i[0] = x
+            x += 1
+            i.iternext()
     assert_equal(a.ravel(order='C'), np.arange(24))
 
 def test_iter_buffering_delayed_alloc():
@@ -1679,10 +1709,11 @@ def test_iter_buffering_delayed_alloc():
     i.reset()
     assert_(not i.has_delayed_bufalloc)
     assert_equal(i.multi_index, (0,))
-    assert_equal(i[0], 0)
-    i[1] = 1
-    assert_equal(i[0:2], [0, 1])
-    assert_equal([[x[0][()], x[1][()]] for x in i], list(zip(range(6), [1]*6)))
+    with i:
+        assert_equal(i[0], 0)
+        i[1] = 1
+        assert_equal(i[0:2], [0, 1])
+        assert_equal([[x[0][()], x[1][()]] for x in i], list(zip(range(6), [1]*6)))
 
 def test_iter_buffered_cast_simple():
     # Test that buffering can handle a simple cast
@@ -1693,8 +1724,9 @@ def test_iter_buffered_cast_simple():
                    casting='same_kind',
                    op_dtypes=[np.dtype('f8')],
                    buffersize=3)
-    for v in i:
-        v[...] *= 2
+    with i:
+        for v in i:
+            v[...] *= 2
 
     assert_equal(a, 2*np.arange(10, dtype='f4'))
 
@@ -1707,8 +1739,9 @@ def test_iter_buffered_cast_byteswapped():
                    casting='same_kind',
                    op_dtypes=[np.dtype('f8').newbyteorder()],
                    buffersize=3)
-    for v in i:
-        v[...] *= 2
+    with i:
+        for v in i:
+            v[...] *= 2
 
     assert_equal(a, 2*np.arange(10, dtype='f4'))
 
@@ -1721,8 +1754,9 @@ def test_iter_buffered_cast_byteswapped():
                        casting='unsafe',
                        op_dtypes=[np.dtype('c8').newbyteorder()],
                        buffersize=3)
-        for v in i:
-            v[...] *= 2
+        with i:
+            for v in i:
+                v[...] *= 2
 
         assert_equal(a, 2*np.arange(10, dtype='f8'))
 
@@ -1736,8 +1770,9 @@ def test_iter_buffered_cast_byteswapped_complex():
                    casting='same_kind',
                    op_dtypes=[np.dtype('c16')],
                    buffersize=3)
-    for v in i:
-        v[...] *= 2
+    with i:
+        for v in i:
+            v[...] *= 2
     assert_equal(a, 2*np.arange(10, dtype='c8') + 4j)
 
     a = np.arange(10, dtype='c8')
@@ -1747,8 +1782,9 @@ def test_iter_buffered_cast_byteswapped_complex():
                    casting='same_kind',
                    op_dtypes=[np.dtype('c16').newbyteorder()],
                    buffersize=3)
-    for v in i:
-        v[...] *= 2
+    with i:
+        for v in i:
+            v[...] *= 2
     assert_equal(a, 2*np.arange(10, dtype='c8') + 4j)
 
     a = np.arange(10, dtype=np.clongdouble).newbyteorder().byteswap()
@@ -1758,8 +1794,9 @@ def test_iter_buffered_cast_byteswapped_complex():
                    casting='same_kind',
                    op_dtypes=[np.dtype('c16')],
                    buffersize=3)
-    for v in i:
-        v[...] *= 2
+    with i:
+        for v in i:
+            v[...] *= 2
     assert_equal(a, 2*np.arange(10, dtype=np.clongdouble) + 4j)
 
     a = np.arange(10, dtype=np.longdouble).newbyteorder().byteswap()
@@ -1768,8 +1805,9 @@ def test_iter_buffered_cast_byteswapped_complex():
                    casting='same_kind',
                    op_dtypes=[np.dtype('f4')],
                    buffersize=7)
-    for v in i:
-        v[...] *= 2
+    with i:
+        for v in i:
+            v[...] *= 2
     assert_equal(a, 2*np.arange(10, dtype=np.longdouble))
 
 def test_iter_buffered_cast_structured_type():
@@ -1880,12 +1918,13 @@ def test_iter_buffered_cast_subarray():
     i = nditer(a, ['buffered', 'refs_ok'], ['readwrite'],
                     casting='unsafe',
                     op_dtypes=sdt2)
-    assert_equal(i[0].dtype, np.dtype(sdt2))
-    count = 0
-    for x in i:
-        assert_(np.all(x['a'] == count))
-        x['a'][0] += 2
-        count += 1
+    with i:
+        assert_equal(i[0].dtype, np.dtype(sdt2))
+        count = 0
+        for x in i:
+            assert_(np.all(x['a'] == count))
+            x['a'][0] += 2
+            count += 1
     assert_equal(a['a'], np.arange(6).reshape(6, 1, 1)+2)
 
     # many -> one element -> back (copies just element 0)
@@ -1896,12 +1935,13 @@ def test_iter_buffered_cast_subarray():
     i = nditer(a, ['buffered', 'refs_ok'], ['readwrite'],
                     casting='unsafe',
                     op_dtypes=sdt2)
-    assert_equal(i[0].dtype, np.dtype(sdt2))
-    count = 0
-    for x in i:
-        assert_equal(x['a'], count)
-        x['a'] += 2
-        count += 1
+    with i:
+        assert_equal(i[0].dtype, np.dtype(sdt2))
+        count = 0
+        for x in i:
+            assert_equal(x['a'], count)
+            x['a'] += 2
+            count += 1
     assert_equal(a['a'], np.arange(6).reshape(6, 1, 1, 1)*np.ones((1, 3, 2, 2))+2)
 
     # many -> one element -> back (copies just element 0)
@@ -2109,27 +2149,29 @@ def test_iter_buffered_reduce_reuse():
         nditer2 = np.nditer([arr.copy(), None],
                             op_axes=op_axes, flags=flags, op_flags=op_flags,
                             op_dtypes=op_dtypes)
-        nditer2.operands[-1][...] = 0
-        nditer2.reset()
-        nditer2.iterindex = skip
+        with nditer2:
+            nditer2.operands[-1][...] = 0
+            nditer2.reset()
+            nditer2.iterindex = skip
 
-        for (a2_in, b2_in) in nditer2:
-            b2_in += a2_in.astype(np.int_)
+            for (a2_in, b2_in) in nditer2:
+                b2_in += a2_in.astype(np.int_)
 
-        comp_res = nditer2.operands[-1]
+            comp_res = nditer2.operands[-1]
 
         for bufsize in range(0, 3**3):
             nditer1 = np.nditer([arr, None],
                                 op_axes=op_axes, flags=flags, op_flags=op_flags,
                                 buffersize=bufsize, op_dtypes=op_dtypes)
-            nditer1.operands[-1][...] = 0
-            nditer1.reset()
-            nditer1.iterindex = skip
+            with nditer1:
+                nditer1.operands[-1][...] = 0
+                nditer1.reset()
+                nditer1.iterindex = skip
 
-            for (a1_in, b1_in) in nditer1:
-                b1_in += a1_in.astype(np.int_)
+                for (a1_in, b1_in) in nditer1:
+                    b1_in += a1_in.astype(np.int_)
 
-            res = nditer1.operands[-1]
+                res = nditer1.operands[-1]
             assert_array_equal(res, comp_res)
 
 
@@ -2288,7 +2330,21 @@ class TestIterNested(object):
         assert_equal(vals, [[0, 1, 2], [3, 4, 5]])
         vals = None
 
-        # updateifcopy
+        # writebackifcopy - using conext manager
+        a = arange(6, dtype='f4').reshape(2, 3)
+        i, j = np.nested_iters(a, [[0], [1]],
+                            op_flags=['readwrite', 'updateifcopy'],
+                            casting='same_kind',
+                            op_dtypes='f8')
+        with i, j:
+            assert_equal(j[0].dtype, np.dtype('f8'))
+            for x in i:
+                for y in j:
+                    y[...] += 1
+            assert_equal(a, [[0, 1, 2], [3, 4, 5]])
+        assert_equal(a, [[1, 2, 3], [4, 5, 6]])
+
+        # writebackifcopy - using close()
         a = arange(6, dtype='f4').reshape(2, 3)
         i, j = np.nested_iters(a, [[0], [1]],
                             op_flags=['readwrite', 'updateifcopy'],
@@ -2299,9 +2355,11 @@ class TestIterNested(object):
             for y in j:
                 y[...] += 1
         assert_equal(a, [[0, 1, 2], [3, 4, 5]])
-        i, j, x, y = (None,)*4  # force the updateifcopy
+        i.close()
+        j.close()
         assert_equal(a, [[1, 2, 3], [4, 5, 6]])
 
+
     def test_dtype_buffered(self):
         # Test nested iteration with buffering to change dtype
 
@@ -2338,6 +2396,21 @@ class TestIterNested(object):
                 vals.append([z for z in k])
         assert_equal(vals, [[0, 2, 4], [1, 3, 5], [6, 8, 10], [7, 9, 11]])
 
+    def test_iter_nested_iters_dtype_buffered(self):
+        # Test nested iteration with buffering to change dtype
+
+        a = arange(6, dtype='f4').reshape(2, 3)
+        i, j = np.nested_iters(a, [[0], [1]],
+                            flags=['buffered'],
+                            op_flags=['readwrite'],
+                            casting='same_kind',
+                            op_dtypes='f8')
+        with i, j:
+            assert_equal(j[0].dtype, np.dtype('f8'))
+            for x in i:
+                for y in j:
+                    y[...] += 1
+        assert_equal(a, [[1, 2, 3], [4, 5, 6]])
 
 def test_iter_reduction_error():
 
@@ -2359,33 +2432,35 @@ def test_iter_reduction():
                     [['readonly'], ['readwrite', 'allocate']],
                     op_axes=[[0], [-1]])
     # Need to initialize the output operand to the addition unit
-    i.operands[1][...] = 0
-    # Do the reduction
-    for x, y in i:
-        y[...] += x
-    # Since no axes were specified, should have allocated a scalar
-    assert_equal(i.operands[1].ndim, 0)
-    assert_equal(i.operands[1], np.sum(a))
+    with i:
+        i.operands[1][...] = 0
+        # Do the reduction
+        for x, y in i:
+            y[...] += x
+        # Since no axes were specified, should have allocated a scalar
+        assert_equal(i.operands[1].ndim, 0)
+        assert_equal(i.operands[1], np.sum(a))
 
     a = np.arange(6).reshape(2, 3)
     i = nditer([a, None], ['reduce_ok', 'external_loop'],
                     [['readonly'], ['readwrite', 'allocate']],
                     op_axes=[[0, 1], [-1, -1]])
     # Need to initialize the output operand to the addition unit
-    i.operands[1][...] = 0
-    # Reduction shape/strides for the output
-    assert_equal(i[1].shape, (6,))
-    assert_equal(i[1].strides, (0,))
-    # Do the reduction
-    for x, y in i:
-        # Use a for loop instead of ``y[...] += x``
-        # (equivalent to ``y[...] = y[...].copy() + x``),
-        # because y has zero strides we use for the reduction
-        for j in range(len(y)):
-            y[j] += x[j]
-    # Since no axes were specified, should have allocated a scalar
-    assert_equal(i.operands[1].ndim, 0)
-    assert_equal(i.operands[1], np.sum(a))
+    with i:
+        i.operands[1][...] = 0
+        # Reduction shape/strides for the output
+        assert_equal(i[1].shape, (6,))
+        assert_equal(i[1].strides, (0,))
+        # Do the reduction
+        for x, y in i:
+            # Use a for loop instead of ``y[...] += x``
+            # (equivalent to ``y[...] = y[...].copy() + x``),
+            # because y has zero strides we use for the reduction
+            for j in range(len(y)):
+                y[j] += x[j]
+        # Since no axes were specified, should have allocated a scalar
+        assert_equal(i.operands[1].ndim, 0)
+        assert_equal(i.operands[1], np.sum(a))
 
     # This is a tricky reduction case for the buffering double loop
     # to handle
@@ -2397,15 +2472,16 @@ def test_iter_reduction():
                             'buffered', 'delay_bufalloc'],
                     [['readonly'], ['readwrite', 'allocate']],
                     op_axes=[None, [0, -1, 1]], buffersize=10)
-    it1.operands[1].fill(0)
-    it2.operands[1].fill(0)
-    it2.reset()
-    for x in it1:
-        x[1][...] += x[0]
-    for x in it2:
-        x[1][...] += x[0]
-    assert_equal(it1.operands[1], it2.operands[1])
-    assert_equal(it2.operands[1].sum(), a.size)
+    with it1, it2:
+        it1.operands[1].fill(0)
+        it2.operands[1].fill(0)
+        it2.reset()
+        for x in it1:
+            x[1][...] += x[0]
+        for x in it2:
+            x[1][...] += x[0]
+        assert_equal(it1.operands[1], it2.operands[1])
+        assert_equal(it2.operands[1].sum(), a.size)
 
 def test_iter_buffering_reduction():
     # Test doing buffered reductions with the iterator
@@ -2415,11 +2491,12 @@ def test_iter_buffering_reduction():
     i = nditer([a, b], ['reduce_ok', 'buffered'],
                     [['readonly'], ['readwrite', 'nbo']],
                     op_axes=[[0], [-1]])
-    assert_equal(i[1].dtype, np.dtype('f8'))
-    assert_(i[1].dtype != b.dtype)
-    # Do the reduction
-    for x, y in i:
-        y[...] += x
+    with i:
+        assert_equal(i[1].dtype, np.dtype('f8'))
+        assert_(i[1].dtype != b.dtype)
+        # Do the reduction
+        for x, y in i:
+            y[...] += x
     # Since no axes were specified, should have allocated a scalar
     assert_equal(b, np.sum(a))
 
@@ -2429,15 +2506,16 @@ def test_iter_buffering_reduction():
                     [['readonly'], ['readwrite', 'nbo']],
                     op_axes=[[0, 1], [0, -1]])
     # Reduction shape/strides for the output
-    assert_equal(i[1].shape, (3,))
-    assert_equal(i[1].strides, (0,))
-    # Do the reduction
-    for x, y in i:
-        # Use a for loop instead of ``y[...] += x``
-        # (equivalent to ``y[...] = y[...].copy() + x``),
-        # because y has zero strides we use for the reduction
-        for j in range(len(y)):
-            y[j] += x[j]
+    with i:
+        assert_equal(i[1].shape, (3,))
+        assert_equal(i[1].strides, (0,))
+        # Do the reduction
+        for x, y in i:
+            # Use a for loop instead of ``y[...] += x``
+            # (equivalent to ``y[...] = y[...].copy() + x``),
+            # because y has zero strides we use for the reduction
+            for j in range(len(y)):
+                y[j] += x[j]
     assert_equal(b, np.sum(a, axis=1))
 
     # Iterator inner double loop was wrong on this one
@@ -2447,9 +2525,10 @@ def test_iter_buffering_reduction():
             [['readonly'], ['readwrite', 'allocate']],
             op_axes=[[-1, 0], [-1, -1]],
             itershape=(2, 2))
-    it.operands[1].fill(0)
-    it.reset()
-    assert_equal(it[0], [1, 2, 1, 2])
+    with it:
+        it.operands[1].fill(0)
+        it.reset()
+        assert_equal(it[0], [1, 2, 1, 2])
 
     # Iterator inner loop should take argument contiguity into account
     x = np.ones((7, 13, 8), np.int8)[4:6,1:11:6,1:5].transpose(1, 2, 0)
@@ -2461,8 +2540,9 @@ def test_iter_buffering_reduction():
     it = np.nditer([y, x],
                    ['buffered', 'external_loop', 'reduce_ok'],
                    [['readwrite'], ['readonly']])
-    for a, b in it:
-        a.fill(2)
+    with it:
+        for a, b in it:
+            a.fill(2)
 
     assert_equal(y_base[1::2], y_base_copy[1::2])
     assert_equal(y_base[::2], 2)
@@ -2479,8 +2559,9 @@ def test_iter_buffering_reduction_reuse_reduce_loops():
                     buffersize=5)
 
     bufsizes = []
-    for x, y in it:
-        bufsizes.append(x.shape[0])
+    with it:
+        for x, y in it:
+            bufsizes.append(x.shape[0])
     assert_equal(bufsizes, [5, 2, 5, 2])
     assert_equal(sum(bufsizes), a.size)
 
@@ -2559,8 +2640,9 @@ def test_iter_writemasked():
     it = np.nditer([a, msk], [],
                 [['readwrite', 'writemasked'],
                  ['readonly', 'arraymask']])
-    for x, m in it:
-        x[...] = 1
+    with it:
+        for x, m in it:
+            x[...] = 1
     # Because we violated the semantics, all the values became 1
     assert_equal(a, [1, 1, 1])
 
@@ -2569,8 +2651,9 @@ def test_iter_writemasked():
     it = np.nditer([a, msk], ['buffered'],
                 [['readwrite', 'writemasked'],
                  ['readonly', 'arraymask']])
-    for x, m in it:
-        x[...] = 2.5
+    with it:
+        for x, m in it:
+            x[...] = 2.5
     # Because we violated the semantics, all the values became 2.5
     assert_equal(a, [2.5, 2.5, 2.5])
 
@@ -2582,8 +2665,9 @@ def test_iter_writemasked():
                  ['readonly', 'arraymask']],
                 op_dtypes=['i8', None],
                 casting='unsafe')
-    for x, m in it:
-        x[...] = 3
+    with it:
+        for x, m in it:
+            x[...] = 3
     # Even though we violated the semantics, only the selected values
     # were copied back
     assert_equal(a, [3, 3, 2.5])
@@ -2711,3 +2795,97 @@ def test_iter_too_large_with_multiindex():
             # an axis with size 1 is removed:
             with assert_raises(ValueError):
                 _multiarray_tests.test_nditer_too_large(arrays, i*2 + 1, mode)
+
+def test_writebacks():
+    a = np.arange(6, dtype='f4')
+    au = a.byteswap().newbyteorder()
+    assert_(a.dtype.byteorder != au.dtype.byteorder)
+    it = nditer(au, [], [['readwrite', 'updateifcopy']],
+                        casting='equiv', op_dtypes=[np.dtype('f4')])
+    with it:
+        it.operands[0][:] = 100
+    assert_equal(au, 100)
+    # do it again, this time raise an error,
+    it = nditer(au, [], [['readwrite', 'updateifcopy']],
+                        casting='equiv', op_dtypes=[np.dtype('f4')])
+    try:
+        with it:
+            assert_equal(au.flags.writeable, False)
+            it.operands[0][:] = 0
+            raise ValueError('exit context manager on exception')
+    except:
+        pass
+    assert_equal(au, 0)
+    assert_equal(au.flags.writeable, True)
+    # cannot reuse i outside context manager
+    assert_raises(ValueError, getattr, it, 'operands')
+
+    it = nditer(au, [], [['readwrite', 'updateifcopy']],
+                        casting='equiv', op_dtypes=[np.dtype('f4')])
+    with it:
+        x = it.operands[0]
+        x[:] = 6
+        assert_(x.flags.writebackifcopy)
+    assert_equal(au, 6)
+    assert_(not x.flags.writebackifcopy)
+    x[:] = 123 # x.data still valid
+    assert_equal(au, 6) # but not connected to au
+
+    do_close = 1
+    # test like above, only in C, and with an option to skip the NpyIter_Close
+    _multiarray_tests.test_nditer_writeback(3, do_close, au, op_dtypes=[np.dtype('f4')])
+    assert_equal(au, 3)
+    it = nditer(au, [],
+                 [['readwrite', 'updateifcopy']],
+                 casting='equiv', op_dtypes=[np.dtype('f4')])
+    au = None
+    # do not crash if original data array is decrefed
+    with it:
+        for x in it:
+            x[...] = 123
+    # make sure we cannot reenter the iterand
+    enter = it.__enter__
+    assert_raises(ValueError, enter)
+
+def test_close_equivalent():
+    ''' using a context amanger and using nditer.close are equivalent
+    '''
+    def add_close(x, y, out=None):
+        addop = np.add
+        it = np.nditer([x, y, out], [],
+                    [['readonly'], ['readonly'], ['writeonly','allocate']])
+        for (a, b, c) in it:
+            addop(a, b, out=c)
+        ret = it.operands[2]
+        it.close()
+        return ret
+
+    def add_context(x, y, out=None):
+        addop = np.add
+        it = np.nditer([x, y, out], [],
+                    [['readonly'], ['readonly'], ['writeonly','allocate']])
+        with it:
+            for (a, b, c) in it:
+                addop(a, b, out=c)
+            return it.operands[2]
+    z = add_close(range(5), range(5))
+    assert_equal(z, range(0, 10, 2))
+    z = add_context(range(5), range(5))
+    assert_equal(z, range(0, 10, 2))
+
+def test_close_raises():
+    it = np.nditer(np.arange(3))
+    assert_equal (next(it), 0)
+    it.close()
+    assert_raises(StopIteration, next, it)
+    assert_raises(ValueError, getattr, it, 'operands')
+
+def test_warn_noclose():
+    a = np.arange(6, dtype='f4')
+    au = a.byteswap().newbyteorder()
+    do_close = 0
+    with suppress_warnings() as sup:
+        sup.record(RuntimeWarning)
+        # test like above, only in C, and with an option to skip the NpyIter_Close
+        _multiarray_tests.test_nditer_writeback(3, do_close, au, op_dtypes=[np.dtype('f4')])
+        assert len(sup.log) == 1
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 099b63c40..72beef471 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -54,7 +54,8 @@ __all__ = [
     'bincount', 'digitize', 'cov', 'corrcoef',
     'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett',
     'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring',
-    'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc'
+    'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc',
+    'quantile'
     ]
 
 
@@ -3427,7 +3428,7 @@ def percentile(a, q, axis=None, out=None,
 
     interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points
+        use when the desired percentile lies between two data points
         ``i < j``:
 
         * 'linear': ``i + (j - i) * fraction``, where ``fraction``
@@ -3463,6 +3464,7 @@ def percentile(a, q, axis=None, out=None,
     mean
     median : equivalent to ``percentile(..., 50)``
     nanpercentile
+    quantile : equivalent to percentile, except with q in the range [0, 1].
 
     Notes
     -----
@@ -3539,6 +3541,110 @@ def percentile(a, q, axis=None, out=None,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
+def quantile(a, q, axis=None, out=None,
+             overwrite_input=False, interpolation='linear', keepdims=False):
+    """
+    Compute the `q`th quantile of the data along the specified axis.
+    ..versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    q : array_like of float
+        Quantile or sequence of quantiles to compute, which must be between
+        0 and 1 inclusive.
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the quantiles are computed. The
+        default is to compute the quantile(s) along a flattened
+        version of the array.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type (of the output) will be cast if necessary.
+    overwrite_input : bool, optional
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+        This optional parameter specifies the interpolation method to
+        use when the desired quantile lies between two data points
+        ``i < j``:
+            * linear: ``i + (j - i) * fraction``, where ``fraction``
+              is the fractional part of the index surrounded by ``i``
+              and ``j``.
+            * lower: ``i``.
+            * higher: ``j``.
+            * nearest: ``i`` or ``j``, whichever is nearest.
+            * midpoint: ``(i + j) / 2``.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left in
+        the result as dimensions with size one. With this option, the
+        result will broadcast correctly against the original array `a`.
+
+    Returns
+    -------
+    quantile : scalar or ndarray
+        If `q` is a single quantile and `axis=None`, then the result
+        is a scalar. If multiple quantiles are given, first axis of
+        the result corresponds to the quantiles. The other axes are
+        the axes that remain after the reduction of `a`. If the input
+        contains integers or floats smaller than ``float64``, the output
+        data-type is ``float64``. Otherwise, the output data-type is the
+        same as that of the input. If `out` is specified, that array is
+        returned instead.
+
+    See Also
+    --------
+    mean
+    percentile : equivalent to quantile, but with q in the range [0, 100].
+    median : equivalent to ``quantile(..., 0.5)``
+    nanquantile
+
+    Notes
+    -----
+    Given a vector ``V`` of length ``N``, the ``q``-th quantile of
+    ``V`` is the value ``q`` of the way from the minimum to the
+    maximum in a sorted copy of ``V``. The values and distances of
+    the two nearest neighbors as well as the `interpolation` parameter
+    will determine the quantile if the normalized ranking does not
+    match the location of ``q`` exactly. This function is the same as
+    the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the
+    same as the maximum if ``q=1.0``.
+
+    Examples
+    --------
+    >>> a = np.array([[10, 7, 4], [3, 2, 1]])
+    >>> a
+    array([[10,  7,  4],
+           [ 3,  2,  1]])
+    >>> np.quantile(a, 0.5)
+    3.5
+    >>> np.quantile(a, 0.5, axis=0)
+    array([[ 6.5,  4.5,  2.5]])
+    >>> np.quantile(a, 0.5, axis=1)
+    array([ 7.,  2.])
+    >>> np.quantile(a, 0.5, axis=1, keepdims=True)
+    array([[ 7.],
+           [ 2.]])
+    >>> m = np.quantile(a, 0.5, axis=0)
+    >>> out = np.zeros_like(m)
+    >>> np.quantile(a, 0.5, axis=0, out=out)
+    array([[ 6.5,  4.5,  2.5]])
+    >>> m
+    array([[ 6.5,  4.5,  2.5]])
+    >>> b = a.copy()
+    >>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
+    array([ 7.,  2.])
+    >>> assert not np.all(a == b)
+    """
+    q = np.asanyarray(q)
+    if not _quantile_is_valid(q):
+        raise ValueError("Quantiles must be in the range [0, 1]")
+    return _quantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
 def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
                         interpolation='linear', keepdims=False):
     """Assumes that q is in [0, 1], and is an ndarray"""
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index dddc0e5b8..abd2da1a2 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -16,6 +16,7 @@ Functions
 - `nanvar` -- variance of non-NaN values
 - `nanstd` -- standard deviation of non-NaN values
 - `nanmedian` -- median of non-NaN values
+- `nanquantile` -- qth quantile of non-NaN values
 - `nanpercentile` -- qth percentile of non-NaN values
 
 """
@@ -29,7 +30,7 @@ from numpy.lib import function_base
 __all__ = [
     'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
     'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
-    'nancumsum', 'nancumprod'
+    'nancumsum', 'nancumprod', 'nanquantile'
     ]
 
 
@@ -1057,7 +1058,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
         `a` after this function completes is undefined.
     interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points
+        use when the desired percentile lies between two data points
         ``i < j``:
 
         * 'linear': ``i + (j - i) * fraction``, where ``fraction``
@@ -1095,6 +1096,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
     nanmean
     nanmedian : equivalent to ``nanpercentile(..., 50)``
     percentile, median, mean
+    nanquantile : equivalent to nanpercentile, but with q in the range [0, 1].
 
     Notes
     -----
@@ -1144,6 +1146,110 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
+def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
+                interpolation='linear', keepdims=np._NoValue):
+    """
+    Compute the qth quantile of the data along the specified axis,
+    while ignoring nan values.
+    Returns the qth quantile(s) of the array elements.
+    .. versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array, containing
+        nan values to be ignored
+    q : array_like of float
+        Quantile or sequence of quantiles to compute, which must be between
+        0 and 1 inclusive.
+    axis : {int, tuple of int, None}, optional
+        Axis or axes along which the quantiles are computed. The
+        default is to compute the quantile(s) along a flattened
+        version of the array.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type (of the output) will be cast if necessary.
+    overwrite_input : bool, optional
+        If True, then allow the input array `a` to be modified by intermediate
+        calculations, to save memory. In this case, the contents of the input
+        `a` after this function completes is undefined.
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+        This optional parameter specifies the interpolation method to
+        use when the desired quantile lies between two data points
+        ``i < j``:
+            * linear: ``i + (j - i) * fraction``, where ``fraction``
+              is the fractional part of the index surrounded by ``i``
+              and ``j``.
+            * lower: ``i``.
+            * higher: ``j``.
+            * nearest: ``i`` or ``j``, whichever is nearest.
+            * midpoint: ``(i + j) / 2``.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left in
+        the result as dimensions with size one. With this option, the
+        result will broadcast correctly against the original array `a`.
+
+        If this is anything but the default value it will be passed
+        through (in the special case of an empty array) to the
+        `mean` function of the underlying array.  If the array is
+        a sub-class and `mean` does not have the kwarg `keepdims` this
+        will raise a RuntimeError.
+
+    Returns
+    -------
+    quantile : scalar or ndarray
+        If `q` is a single percentile and `axis=None`, then the result
+        is a scalar. If multiple quantiles are given, first axis of
+        the result corresponds to the quantiles. The other axes are
+        the axes that remain after the reduction of `a`. If the input
+        contains integers or floats smaller than ``float64``, the output
+        data-type is ``float64``. Otherwise, the output data-type is the
+        same as that of the input. If `out` is specified, that array is
+        returned instead.
+
+    See Also
+    --------
+    quantile
+    nanmean, nanmedian
+    nanmedian : equivalent to ``nanquantile(..., 0.5)``
+    nanpercentile : same as nanquantile, but with q in the range [0, 100].
+
+    Examples
+    --------
+    >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
+    >>> a[0][1] = np.nan
+    >>> a
+    array([[ 10.,  nan,   4.],
+          [  3.,   2.,   1.]])
+    >>> np.quantile(a, 0.5)
+    nan
+    >>> np.nanquantile(a, 0.5)
+    3.5
+    >>> np.nanquantile(a, 0.5, axis=0)
+    array([ 6.5,  2.,   2.5])
+    >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
+    array([[ 7.],
+           [ 2.]])
+    >>> m = np.nanquantile(a, 0.5, axis=0)
+    >>> out = np.zeros_like(m)
+    >>> np.nanquantile(a, 0.5, axis=0, out=out)
+    array([ 6.5,  2.,   2.5])
+    >>> m
+    array([ 6.5,  2. ,  2.5])
+    >>> b = a.copy()
+    >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
+    array([  7.,  2.])
+    >>> assert not np.all(a==b)
+    """
+    a = np.asanyarray(a)
+    q = np.asanyarray(q)
+    if not function_base._quantile_is_valid(q):
+        raise ValueError("Quantiles must be in the range [0, 1]")
+    return _nanquantile_unchecked(
+        a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
 def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
                            interpolation='linear', keepdims=np._NoValue):
     """Assumes that q is in [0, 1], and is an ndarray"""
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 59379bdda..67585443b 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -936,7 +936,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     if encoding is not None:
         fencoding = encoding
     # we must assume local encoding
-    # TOOD emit portability warning?
+    # TODO emit portability warning?
     elif fencoding is None:
         import locale
         fencoding = locale.getpreferredencoding()
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index 6c240db7f..2abe5cdd1 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -123,9 +123,12 @@ def _broadcast_to(array, shape, subok, readonly):
     needs_writeable = not readonly and array.flags.writeable
     extras = ['reduce_ok'] if needs_writeable else []
     op_flag = 'readwrite' if needs_writeable else 'readonly'
-    broadcast = np.nditer(
+    it = np.nditer(
         (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras,
-        op_flags=[op_flag], itershape=shape, order='C').itviews[0]
+        op_flags=[op_flag], itershape=shape, order='C')
+    with it:
+        # never really has writebackifcopy semantics
+        broadcast = it.itviews[0]
     result = _maybe_view_as_subclass(array, broadcast)
     if needs_writeable and not result.flags.writeable:
         result.flags.writeable = True
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 6653b5ba1..43d62a7ff 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -2749,6 +2749,28 @@ class TestPercentile(object):
                 a, [0.3, 0.6], (0, 2), interpolation='nearest'), b)
 
 
+class TestQuantile(object):
+    # most of this is already tested by TestPercentile
+
+    def test_basic(self):
+        x = np.arange(8) * 0.5
+        assert_equal(np.quantile(x, 0), 0.)
+        assert_equal(np.quantile(x, 1), 3.5)
+        assert_equal(np.quantile(x, 0.5), 1.75)
+
+    def test_no_p_overwrite(self):
+        # this is worth retesting, beause quantile does not make a copy
+        p0 = np.array([0, 0.75, 0.25, 0.5, 1.0])
+        p = p0.copy()
+        np.quantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
+
+        p0 = p0.tolist()
+        p = p.tolist()
+        np.quantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
+
+
 class TestMedian(object):
 
     def test_basic(self):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 84aca9915..f58c9e33d 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -23,7 +23,7 @@ from numpy.ma.testutils import assert_equal
 from numpy.testing import (
     assert_warns, assert_, SkipTest, assert_raises_regex, assert_raises,
     assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY,
-    HAS_REFCOUNT, suppress_warnings,
+    HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles,
     )
 
 
@@ -937,7 +937,7 @@ class TestLoadTxt(LoadTxtBase):
         assert_equal(res, tgt)
 
     def test_complex_misformatted(self):
-        # test for backward compatability
+        # test for backward compatibility
         # some complex formats used to generate x+-yj
         a = np.zeros((2, 2), dtype=np.complex128)
         re = np.pi
@@ -2416,14 +2416,5 @@ def test_load_refcount():
     np.savez(f, [1, 2, 3])
     f.seek(0)
 
-    assert_(gc.isenabled())
-    gc.disable()
-    try:
-        gc.collect()
+    with assert_no_gc_cycles():
         np.load(f)
-        # gc.collect returns the number of unreachable objects in cycles that
-        # were found -- we are checking that no cycles were created by np.load
-        n_objects_in_cycles = gc.collect()
-    finally:
-        gc.enable()
-    assert_equal(n_objects_in_cycles, 0)
diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py
index 1f403f7b8..e69d9dd7d 100644
--- a/numpy/lib/tests/test_nanfunctions.py
+++ b/numpy/lib/tests/test_nanfunctions.py
@@ -886,3 +886,39 @@ class TestNanFunctions_Percentile(object):
 
         megamat = np.ones((3, 4, 5, 6))
         assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
+
+
+class TestNanFunctions_Quantile(object):
+    # most of this is already tested by TestPercentile
+
+    def test_regression(self):
+        ar = np.arange(24).reshape(2, 3, 4).astype(float)
+        ar[0][1] = np.nan
+
+        assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50))
+        assert_equal(np.nanquantile(ar, q=0.5, axis=0),
+                     np.nanpercentile(ar, q=50, axis=0))
+        assert_equal(np.nanquantile(ar, q=0.5, axis=1),
+                     np.nanpercentile(ar, q=50, axis=1))
+        assert_equal(np.nanquantile(ar, q=[0.5], axis=1),
+                     np.nanpercentile(ar, q=[50], axis=1))
+        assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1),
+                     np.nanpercentile(ar, q=[25, 50, 75], axis=1))
+
+    def test_basic(self):
+        x = np.arange(8) * 0.5
+        assert_equal(np.nanquantile(x, 0), 0.)
+        assert_equal(np.nanquantile(x, 1), 3.5)
+        assert_equal(np.nanquantile(x, 0.5), 1.75)
+
+    def test_no_p_overwrite(self):
+        # this is worth retesting, beause quantile does not make a copy
+        p0 = np.array([0, 0.75, 0.25, 0.5, 1.0])
+        p = p0.copy()
+        np.nanquantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
+
+        p0 = p0.tolist()
+        p = p.tolist()
+        np.nanquantile(np.arange(100.), p, interpolation="midpoint")
+        assert_array_equal(p, p0)
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index 507ecb1e2..b0c0b0c48 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -7,6 +7,7 @@ from __future__ import division, absolute_import, print_function
 import os
 import sys
 import re
+import gc
 import operator
 import warnings
 from functools import partial, wraps
@@ -14,6 +15,7 @@ import shutil
 import contextlib
 from tempfile import mkdtemp, mkstemp
 from unittest.case import SkipTest
+import pprint
 
 from numpy.core import(
      float32, empty, arange, array_repr, ndarray, isnat, array)
@@ -35,7 +37,7 @@ __all__ = [
         'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings',
         'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY',
         'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare',
-        '_assert_valid_refcount', '_gen_alignment_data',
+        '_assert_valid_refcount', '_gen_alignment_data', 'assert_no_gc_cycles',
         ]
 
 
@@ -2272,3 +2274,89 @@ class suppress_warnings(object):
                 return func(*args, **kwargs)
 
         return new_func
+
+
+@contextlib.contextmanager
+def _assert_no_gc_cycles_context(name=None):
+    __tracebackhide__ = True  # Hide traceback for py.test
+
+    # not meaningful to test if there is no refcounting
+    if not HAS_REFCOUNT:
+        return
+
+    assert_(gc.isenabled())
+    gc.disable()
+    gc_debug = gc.get_debug()
+    try:
+        for i in range(100):
+            if gc.collect() == 0:
+                break
+        else:
+            raise RuntimeError(
+                "Unable to fully collect garbage - perhaps a __del__ method is "
+                "creating more reference cycles?")
+
+        gc.set_debug(gc.DEBUG_SAVEALL)
+        yield
+        # gc.collect returns the number of unreachable objects in cycles that
+        # were found -- we are checking that no cycles were created in the context
+        n_objects_in_cycles = gc.collect()
+        objects_in_cycles = gc.garbage[:]
+    finally:
+        del gc.garbage[:]
+        gc.set_debug(gc_debug)
+        gc.enable()
+
+    if n_objects_in_cycles:
+        name_str = " when calling %s" % name if name is not None else ""
+        raise AssertionError(
+            "Reference cycles were found{}: {} objects were collected, "
+            "of which {} are shown below:{}"
+            .format(
+                name_str,
+                n_objects_in_cycles,
+                len(objects_in_cycles),
+                ''.join(
+                    "\n  {} object with id={}:\n    {}".format(
+                        type(o).__name__,
+                        id(o),
+                        pprint.pformat(o).replace('\n', '\n    ')
+                    ) for o in objects_in_cycles
+                )
+            )
+        )
+
+
+def assert_no_gc_cycles(*args, **kwargs):
+    """
+    Fail if the given callable produces any reference cycles.
+
+    If called with all arguments omitted, may be used as a context manager:
+
+        with assert_no_gc_cycles():
+            do_something()
+
+    .. versionadded:: 1.15.0
+
+    Parameters
+    ----------
+    func : callable
+        The callable to test.
+    \\*args : Arguments
+        Arguments passed to `func`.
+    \\*\\*kwargs : Kwargs
+        Keyword arguments passed to `func`.
+
+    Returns
+    -------
+    Nothing. The result is deliberately discarded to ensure that all cycles
+    are found.
+
+    """
+    if not args:
+        return _assert_no_gc_cycles_context()
+
+    func = args[0]
+    args = args[1:]
+    with _assert_no_gc_cycles_context(name=func.__name__):
+        func(*args, **kwargs)
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 35f81d8a7..0592e62f8 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -6,6 +6,7 @@ import os
 import itertools
 import textwrap
 import pytest
+import weakref
 
 import numpy as np
 from numpy.testing import (
@@ -14,7 +15,7 @@ from numpy.testing import (
     assert_raises, assert_warns, assert_no_warnings, assert_allclose,
     assert_approx_equal, assert_array_almost_equal_nulp, assert_array_max_ulp,
     clear_and_catch_warnings, suppress_warnings, assert_string_equal, assert_,
-    tempdir, temppath,
+    tempdir, temppath, assert_no_gc_cycles, HAS_REFCOUNT
     )
 
 
@@ -1360,3 +1361,76 @@ def test_clear_and_catch_warnings_inherit():
         warnings.simplefilter('ignore')
         warnings.warn('Some warning')
     assert_equal(my_mod.__warningregistry__, {})
+
+
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
+class TestAssertNoGcCycles(object):
+    """ Test assert_no_gc_cycles """
+    def test_passes(self):
+        def no_cycle():
+            b = []
+            b.append([])
+            return b
+
+        with assert_no_gc_cycles():
+            no_cycle()
+
+        assert_no_gc_cycles(no_cycle)
+
+
+    def test_asserts(self):
+        def make_cycle():
+            a = []
+            a.append(a)
+            a.append(a)
+            return a
+
+        with assert_raises(AssertionError):
+            with assert_no_gc_cycles():
+                make_cycle()
+
+        with assert_raises(AssertionError):
+            assert_no_gc_cycles(make_cycle)
+
+
+    def test_fails(self):
+        """
+        Test that in cases where the garbage cannot be collected, we raise an
+        error, instead of hanging forever trying to clear it.
+        """
+
+        class ReferenceCycleInDel(object):
+            """
+            An object that not only contains a reference cycle, but creates new
+            cycles whenever it's garbage-collected and its __del__ runs
+            """
+            make_cycle = True
+
+            def __init__(self):
+                self.cycle = self
+
+            def __del__(self):
+                # break the current cycle so that `self` can be freed
+                self.cycle = None
+
+                if ReferenceCycleInDel.make_cycle:
+                    # but create a new one so that the garbage collector has more
+                    # work to do.
+                    ReferenceCycleInDel()
+
+        try:
+            w = weakref.ref(ReferenceCycleInDel())
+            try:
+                with assert_raises(RuntimeError):
+                    # this will be unable to get a baseline empty garbage
+                    assert_no_gc_cycles(lambda: None)
+            except AssertionError:
+                # the above test is only necessary if the GC actually tried to free
+                # our object anyway, which python 2.7 does not.
+                if w() is not None:
+                    pytest.skip("GC does not call __del__ on cyclic objects")
+                    raise
+
+        finally:
+            # make sure that we stop creating reference cycles
+            ReferenceCycleInDel.make_cycle = False
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 78cf405cf..184adcc74 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -25,5 +25,5 @@ __all__ = [
         'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings',
         'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY',
         'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare',
-        '_assert_valid_refcount', '_gen_alignment_data',
+        '_assert_valid_refcount', '_gen_alignment_data', 'assert_no_gc_cycles'
         ]