summaryrefslogtreecommitdiff
path: root/numpy/core/src/multiarray/nditer_impl.h
blob: a33e4f90fa4be691b8c65db363ede69121018a5f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/*
 * This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY*
 * by the iterator implementation code. All other internal NumPy code
 * should use the exposed iterator API.
 */
#ifndef NPY_ITERATOR_IMPLEMENTATION_CODE
#error This header is intended for use ONLY by iterator implementation code.
#endif

#ifndef NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_
#define NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_

#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <structmember.h>

#include "numpy/arrayobject.h"
#include "npy_pycompat.h"
#include "convert_datatype.h"

#include "lowlevel_strided_loops.h"
#include "dtype_transfer.h"
#include "dtype_traversal.h"


/********** ITERATOR CONSTRUCTION TIMING **************/
#define NPY_IT_CONSTRUCTION_TIMING 0

#if NPY_IT_CONSTRUCTION_TIMING
#define NPY_IT_TIME_POINT(var) { \
            unsigned int hi, lo; \
            __asm__ __volatile__ ( \
                "rdtsc" \
                : "=d" (hi), "=a" (lo)); \
            var = (((unsigned long long)hi) << 32) | lo; \
        }
#define NPY_IT_PRINT_TIME_START(var) { \
            printf("%30s: start\n", #var); \
            c_temp = var; \
        }
#define NPY_IT_PRINT_TIME_VAR(var) { \
            printf("%30s: %6.0f clocks\n", #var, \
                    ((double)(var-c_temp))); \
            c_temp = var; \
        }
#else
#define NPY_IT_TIME_POINT(var)
#endif

/******************************************************/

/********** PRINTF DEBUG TRACING **************/
#define NPY_IT_DBG_TRACING 0

#if NPY_IT_DBG_TRACING
#define NPY_IT_DBG_PRINT(s) printf("%s", s)
#define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1)
#define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2)
#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3)
#else
#define NPY_IT_DBG_PRINT(s)
#define NPY_IT_DBG_PRINT1(s, p1)
#define NPY_IT_DBG_PRINT2(s, p1, p2)
#define NPY_IT_DBG_PRINT3(s, p1, p2, p3)
#endif
/**********************************************/

/* Rounds up a number of bytes to be divisible by sizeof intp */
#if NPY_SIZEOF_INTP == 4
#define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4))
#else
#define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8))
#endif

/* Internal iterator flags */

/* The perm is the identity */
#define NPY_ITFLAG_IDENTPERM    (1 << 0)
/* The perm has negative entries (indicating flipped axes) */
#define NPY_ITFLAG_NEGPERM      (1 << 1)
/* The iterator is tracking an index */
#define NPY_ITFLAG_HASINDEX     (1 << 2)
/* The iterator is tracking a multi-index */
#define NPY_ITFLAG_HASMULTIINDEX    (1 << 3)
/* The iteration order was forced on construction */
#define NPY_ITFLAG_FORCEDORDER  (1 << 4)
/* The inner loop is handled outside the iterator */
#define NPY_ITFLAG_EXLOOP      (1 << 5)
/* The iterator is ranged */
#define NPY_ITFLAG_RANGE        (1 << 6)
/* The iterator is buffered */
#define NPY_ITFLAG_BUFFER       (1 << 7)
/* The iterator should grow the buffered inner loop when possible */
#define NPY_ITFLAG_GROWINNER    (1 << 8)
/* There is just one iteration, can specialize iternext for that */
#define NPY_ITFLAG_ONEITERATION (1 << 9)
/* Delay buffer allocation until first Reset* call */
#define NPY_ITFLAG_DELAYBUF     (1 << 10)
/* Iteration needs API access during iternext */
#define NPY_ITFLAG_NEEDSAPI     (1 << 11)
/* Iteration includes one or more operands being reduced */
#define NPY_ITFLAG_REDUCE       (1 << 12)
/* Reduce iteration doesn't need to recalculate reduce loops next time */
#define NPY_ITFLAG_REUSE_REDUCE_LOOPS (1 << 13)
/*
 * Offset of (combined) ArrayMethod flags for all transfer functions.
 * For now, we use the top 8 bits.
 */
#define NPY_ITFLAG_TRANSFERFLAGS_SHIFT 24

/* Internal iterator per-operand iterator flags */

/* The operand will be written to */
#define NPY_OP_ITFLAG_WRITE        0x0001
/* The operand will be read from */
#define NPY_OP_ITFLAG_READ         0x0002
/* The operand needs type conversion/byte swapping/alignment */
#define NPY_OP_ITFLAG_CAST         0x0004
/* The operand never needs buffering */
#define NPY_OP_ITFLAG_BUFNEVER     0x0008
/* The operand is aligned */
#define NPY_OP_ITFLAG_ALIGNED      0x0010
/* The operand is being reduced */
#define NPY_OP_ITFLAG_REDUCE       0x0020
/* The operand is for temporary use, does not have a backing array */
#define NPY_OP_ITFLAG_VIRTUAL      0x0040
/* The operand requires masking when copying buffer -> array */
#define NPY_OP_ITFLAG_WRITEMASKED  0x0080
/* The operand's data pointer is pointing into its buffer */
#define NPY_OP_ITFLAG_USINGBUFFER  0x0100
/* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */
#define NPY_OP_ITFLAG_FORCECOPY    0x0200
/* The operand has temporary data, write it back at dealloc */
#define NPY_OP_ITFLAG_HAS_WRITEBACK 0x0400

/*
 * The data layout of the iterator is fully specified by
 * a triple (itflags, ndim, nop).  These three variables
 * are expected to exist in all functions calling these macros,
 * either as true variables initialized to the correct values
 * from the iterator, or as constants in the case of specialized
 * functions such as the various iternext functions.
 */

struct NpyIter_InternalOnly {
    /* Initial fixed position data */
    npy_uint32 itflags;
    npy_uint8 ndim, nop;
    npy_int8 maskop;
    npy_intp itersize, iterstart, iterend;
    /* iterindex is only used if RANGED or BUFFERED is set */
    npy_intp iterindex;
    /* The rest is variable */
    char iter_flexdata[];
};

typedef struct NpyIter_AxisData_tag NpyIter_AxisData;
typedef struct NpyIter_TransferInfo_tag NpyIter_TransferInfo;
typedef struct NpyIter_BufferData_tag NpyIter_BufferData;

typedef npy_int16 npyiter_opitflags;

/* Byte sizes of the iterator members */
#define NIT_PERM_SIZEOF(itflags, ndim, nop) \
        NPY_INTP_ALIGNED(NPY_MAXDIMS)
#define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop))
#define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop+1))
#define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop+1))
#define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \
        ((NPY_SIZEOF_INTP)*(nop))
#define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \
        (NPY_INTP_ALIGNED(sizeof(npyiter_opitflags) * nop))
#define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \
        ((itflags&NPY_ITFLAG_BUFFER) ? ( \
            (NPY_SIZEOF_INTP)*(6 + 5*nop) + sizeof(NpyIter_TransferInfo) * nop) : 0)

/* Byte offsets of the iterator members starting from iter->iter_flexdata */
#define NIT_PERM_OFFSET() \
        (0)
#define NIT_DTYPES_OFFSET(itflags, ndim, nop) \
        (NIT_PERM_OFFSET() + \
         NIT_PERM_SIZEOF(itflags, ndim, nop))
#define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \
        (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \
         NIT_DTYPES_SIZEOF(itflags, ndim, nop))
#define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \
        (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \
         NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop))
#define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \
        (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \
         NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop))
#define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \
        (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \
         NIT_OPERANDS_SIZEOF(itflags, ndim, nop))
#define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \
        (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \
         NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop))
#define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \
        (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \
         NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop))

/* Internal-only ITERATOR DATA MEMBER ACCESS */
#define NIT_ITFLAGS(iter) \
        ((iter)->itflags)
#define NIT_NDIM(iter) \
        ((iter)->ndim)
#define NIT_NOP(iter) \
        ((iter)->nop)
#define NIT_MASKOP(iter) \
        ((iter)->maskop)
#define NIT_ITERSIZE(iter) \
        (iter->itersize)
#define NIT_ITERSTART(iter) \
        (iter->iterstart)
#define NIT_ITEREND(iter) \
        (iter->iterend)
#define NIT_ITERINDEX(iter) \
        (iter->iterindex)
#define NIT_PERM(iter)  ((npy_int8 *)( \
        iter->iter_flexdata + NIT_PERM_OFFSET()))
#define NIT_DTYPES(iter) ((PyArray_Descr **)( \
        iter->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop)))
#define NIT_RESETDATAPTR(iter) ((char **)( \
        iter->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop)))
#define NIT_BASEOFFSETS(iter) ((npy_intp *)( \
        iter->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop)))
#define NIT_OPERANDS(iter) ((PyArrayObject **)( \
        iter->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop)))
#define NIT_OPITFLAGS(iter) ((npyiter_opitflags *)( \
        iter->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop)))
#define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \
        iter->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop)))
#define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \
        iter->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop)))

/* Internal-only BUFFERDATA MEMBER ACCESS */

struct NpyIter_TransferInfo_tag {
    NPY_cast_info read;
    NPY_cast_info write;
    NPY_traverse_info clear;
    /* Probably unnecessary, but make sure what follows is intp aligned: */
    npy_intp _unused_ensure_alignment[];
};

struct NpyIter_BufferData_tag {
    npy_intp buffersize, size, bufiterend,
             reduce_pos, reduce_outersize, reduce_outerdim;
    npy_intp bd_flexdata;
};

#define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize)
#define NBF_SIZE(bufferdata) ((bufferdata)->size)
#define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend)
#define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos)
#define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize)
#define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim)
#define NBF_STRIDES(bufferdata) ( \
        &(bufferdata)->bd_flexdata + 0)
#define NBF_PTRS(bufferdata) ((char **) \
        (&(bufferdata)->bd_flexdata + 1*(nop)))
#define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \
        (&(bufferdata)->bd_flexdata + 2*(nop)))
#define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \
        (&(bufferdata)->bd_flexdata + 3*(nop)))
#define NBF_BUFFERS(bufferdata) ((char **) \
        (&(bufferdata)->bd_flexdata + 4*(nop)))
#define NBF_TRANSFERINFO(bufferdata) ((NpyIter_TransferInfo *) \
        (&(bufferdata)->bd_flexdata + 5*(nop)))

/* Internal-only AXISDATA MEMBER ACCESS. */
struct NpyIter_AxisData_tag {
    npy_intp shape, index;
    npy_intp ad_flexdata;
};
#define NAD_SHAPE(axisdata) ((axisdata)->shape)
#define NAD_INDEX(axisdata) ((axisdata)->index)
#define NAD_STRIDES(axisdata) ( \
        &(axisdata)->ad_flexdata + 0)
#define NAD_PTRS(axisdata) ((char **) \
        (&(axisdata)->ad_flexdata + 1*(nop+1)))

#define NAD_NSTRIDES() \
        ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0))

/* Size of one AXISDATA struct within the iterator */
#define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \
        /* intp shape */ \
        1 + \
        /* intp index */ \
        1 + \
        /* intp stride[nop+1] AND char* ptr[nop+1] */ \
        2*((nop)+1) \
        )*(size_t)NPY_SIZEOF_INTP)

/*
 * Macro to advance an AXISDATA pointer by a specified count.
 * Requires that sizeof_axisdata be previously initialized
 * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop).
 */
#define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \
        (((char *)(axisdata)) + (index)*sizeof_axisdata))
#define NIT_ADVANCE_AXISDATA(axisdata, count) \
        axisdata = NIT_INDEX_AXISDATA(axisdata, count)

/* Size of the whole iterator */
#define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \
        sizeof(struct NpyIter_InternalOnly) + \
        NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \
        NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1))

/* Internal helper functions shared between implementation files */

/**
 * Undo the axis permutation of the iterator. When the operand has fewer
 * dimensions then the iterator, this can return negative values for
 * inserted (broadcast) dimensions.
 *
 * @param axis Axis for which to undo the iterator axis permutation.
 * @param ndim If `op_axes` is being used, this is the iterator dimension,
 *             otherwise this is the operand dimension.
 * @param perm The iterator axis permutation NIT_PERM(iter)
 * @param axis_flipped Will be set to true if this is a flipped axis
 *        (i.e. is iterated in reversed order) and otherwise false.
 *        Can be NULL if the information is not needed.
 * @return The unpermuted axis. Without `op_axes` this is correct, with
 *         `op_axes` this indexes into `op_axes` (unpermuted iterator axis)
 */
static inline int
npyiter_undo_iter_axis_perm(
        int axis, int ndim, const npy_int8 *perm, npy_bool *axis_flipped)
{
    npy_int8 p = perm[axis];
    /* The iterator treats axis reversed, thus adjust by ndim */
    npy_bool flipped = p < 0;
    if (axis_flipped != NULL) {
        *axis_flipped = flipped;
    }
    if (flipped) {
        axis = ndim + p;
    }
    else {
        axis = ndim - p - 1;
    }
    return axis;
}

NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter *iter);
NPY_NO_EXPORT int
npyiter_allocate_buffers(NpyIter *iter, char **errmsg);
NPY_NO_EXPORT void
npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex);
NPY_NO_EXPORT int
npyiter_copy_from_buffers(NpyIter *iter);
NPY_NO_EXPORT int
npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);
NPY_NO_EXPORT void
npyiter_clear_buffers(NpyIter *iter);

/*
 * Function to get the ArrayMethod flags of the transfer functions.
 * TODO: This function should be public and removed from `nditer_impl.h`, but
 *       this requires making the ArrayMethod flags public API first.
 */
NPY_NO_EXPORT int
NpyIter_GetTransferFlags(NpyIter *iter);

#endif  /* NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ */