diff options
| author | Raymond Hettinger <python@rcn.com> | 2003-11-12 14:32:26 +0000 | 
|---|---|---|
| committer | Raymond Hettinger <python@rcn.com> | 2003-11-12 14:32:26 +0000 | 
| commit | ad983e79d6f215235d205245c2599620e33cf719 (patch) | |
| tree | f6dcc71e1d751c52d72b095806143a692af7d2b0 /Modules/itertoolsmodule.c | |
| parent | 767126d7b979020585da0d2b35bda5aae7a40d30 (diff) | |
| download | cpython-git-ad983e79d6f215235d205245c2599620e33cf719.tar.gz | |
Improve the implementation of itertools.tee().
Formerly, underlying queue was implemented in terms of two lists.  The
new queue is a series of singly-linked fixed length lists.
The new implementation runs much faster, supports multi-way tees, and
allows tees of tees without additional memory costs.
The root ideas for this structure were contributed by Andrew Koenig
and Guido van Rossum.
Diffstat (limited to 'Modules/itertoolsmodule.c')
| -rw-r--r-- | Modules/itertoolsmodule.c | 380 | 
1 files changed, 199 insertions, 181 deletions
| diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index be2d7356fc..a341a6630a 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -7,122 +7,102 @@     All rights reserved.  */ -/* independent iterator object supporting the tee object ***************/ - -/* The tee object maintains a queue of data seen by the leading iterator -   but not seen by the trailing iterator.  When the leading iterator -   gets data from PyIter_Next() it appends a copy to the inbasket stack. -   When the trailing iterator needs data, it is popped from the outbasket -   stack.  If the outbasket stack is empty, then it is filled from the -   inbasket (i.e. the queue is implemented using two stacks so that only -   O(n) operations like append() and pop() are used to access data and -   calls to reverse() never move any data element more than once). - -   If one of the independent iterators gets deallocated, it sets tee's -   save_mode to zero so that future calls to PyIter_Next() stop getting -   saved to the queue (because there is no longer a second iterator that -   may need the data). +/* tee object and with supporting function and objects ***************/ + +/* The teedataobject pre-allocates space for LINKCELLS number of objects. +   To help the object fit neatly inside cache lines (space for 16 to 32 +   pointers), the value should be a multiple of 16 minus  space for  +   the other structure members including PyHEAD overhead.  The larger the +   value, the less memory overhead per object and the less time spent +   allocating/deallocating new links.  The smaller the number, the less +   wasted space and the more rapid freeing of older data.  */ +#define LINKCELLS 57  typedef struct {  	PyObject_HEAD  	PyObject *it; -	PyObject *inbasket; -	PyObject *outbasket; -	int save_mode; -	int num_seen; -} teeobject; +	int numread; +	PyObject *nextlink; +	PyObject *(values[LINKCELLS]); +} teedataobject;  typedef struct {  	PyObject_HEAD -	teeobject *tee; -	int num_seen; -} iiobject; +	teedataobject *dataobj; +	int index; +} teeobject; -static PyTypeObject ii_type; +static PyTypeObject teedataobject_type;  static PyObject * -ii_next(iiobject *lz) +teedataobject_new(PyObject *it)  { -	teeobject *to = lz->tee; -	PyObject *result, *tmp; -	int n; +	teedataobject *tdo; -	if (lz->num_seen == to->num_seen) {  -		/* This instance is leading, use iter to get more data */ -		result = PyIter_Next(to->it); -		if (result == NULL) -			return NULL; -		if (to->save_mode) -			if(PyList_Append(to->inbasket, result) == -1){ -				Py_DECREF(result); -				return NULL; -			} -		to->num_seen++; -		lz->num_seen++; -		return result; -	} +	tdo = PyObject_New(teedataobject, &teedataobject_type); +	if (tdo == NULL) +		return NULL; -	/* This instance is trailing, get data from the queue */ -	if (PyList_GET_SIZE(to->outbasket) == 0) { -		/* outbasket is empty, so refill from the inbasket */ -		tmp = to->outbasket; -		to->outbasket = to->inbasket; -		to->inbasket = tmp; -		PyList_Reverse(to->outbasket); -	} +	tdo->numread = 0; +	tdo->nextlink = NULL; +	Py_INCREF(it); +	tdo->it = it; +	return (PyObject *)tdo; +} -	/* Pop result from to->outbasket */ -	n = PyList_GET_SIZE(to->outbasket); -	assert(n>0); -	result = PyList_GET_ITEM(to->outbasket, n-1); -	Py_INCREF(result); -	if (PyList_SetSlice(to->outbasket, n-1, n, NULL) == -1) { -		Py_DECREF(result); -		return NULL; -	} -	lz->num_seen++; -	return result; +static PyObject * +teedataobject_jumplink(teedataobject *tdo) +{ +	if (tdo->nextlink == NULL) +		tdo->nextlink = teedataobject_new(tdo->it); +	Py_INCREF(tdo->nextlink); +	return tdo->nextlink;  } -static void -ii_dealloc(iiobject *ii) +static PyObject * +teedataobject_getitem(teedataobject *tdo, int i)  { -	teeobject *to = ii->tee; -	int n; - -	PyObject_GC_UnTrack(ii); -	ii->tee->save_mode = 0;  /* Stop saving data */ -	if (ii->num_seen < to->num_seen) { -		/* It is the trailing iterator being freed; this means  -		   that the stored queue data is no longer needed */ -		n = PyList_GET_SIZE(to->inbasket); -		PyList_SetSlice(to->inbasket, 0, n, NULL); -		n = PyList_GET_SIZE(to->outbasket); -		PyList_SetSlice(to->outbasket, 0, n, NULL); +	PyObject *value; + +	assert(i < LINKCELLS); +	if (i < tdo->numread) +		value = tdo->values[i]; +	else { +		/* this is the lead iterator, so fetch more data */ +		assert(i == tdo->numread); +		value = PyIter_Next(tdo->it); +		if (value == NULL) +			return NULL; +		tdo->numread++; +		tdo->values[i] = value;  	} -	Py_XDECREF(ii->tee); -	PyObject_GC_Del(ii); +	Py_INCREF(value); +	return value;  } -static int -ii_traverse(iiobject *ii, visitproc visit, void *arg) +static void +teedataobject_dealloc(teedataobject *tdo)  { -	if (ii->tee) -		return visit((PyObject *)(ii->tee), arg); -	return 0; +	int i; + +	for (i=0 ; i<tdo->numread ; i++) +		Py_DECREF(tdo->values[i]); +	Py_XDECREF(tdo->it); +	Py_XDECREF(tdo->nextlink); +	PyObject_Del(tdo);  } -PyDoc_STRVAR(ii_doc, "Independent iterator created by tee(iterable)."); +PyDoc_STRVAR(teedataobject_doc, "Data container common to multiple tee objects."); -static PyTypeObject ii_type = { +static PyTypeObject teedataobject_type = {  	PyObject_HEAD_INIT(&PyType_Type)  	0,					/* ob_size */ -	"itertools.tee_iterator",		/* tp_name */ -	sizeof(iiobject),			/* tp_basicsize */ +	"itertools.tee_dataobject",		/* tp_name */ +	sizeof(teedataobject),			/* tp_basicsize */  	0,					/* tp_itemsize */  	/* methods */ -	(destructor)ii_dealloc,			/* tp_dealloc */ +	(destructor)teedataobject_dealloc,	/* tp_dealloc */  	0,					/* tp_print */  	0,					/* tp_getattr */  	0,					/* tp_setattr */ @@ -137,112 +117,96 @@ static PyTypeObject ii_type = {  	PyObject_GenericGetAttr,		/* tp_getattro */  	0,					/* tp_setattro */  	0,					/* tp_as_buffer */ -	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ -	ii_doc,					/* tp_doc */ -	(traverseproc)ii_traverse,		/* tp_traverse */ -	0,					/* tp_clear */ -	0,					/* tp_richcompare */	 -	0,					/* tp_weaklistoffset */ -	PyObject_SelfIter,			/* tp_iter */ -	(iternextfunc)ii_next,			/* tp_iternext */ -	0,					/* tp_methods */ +	Py_TPFLAGS_DEFAULT,			/* tp_flags */ +	teedataobject_doc,			/* tp_doc */ +	0,					/* tp_traverse */  }; -/* tee object **********************************************************/  static PyTypeObject tee_type;  static PyObject * -tee_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +tee_next(teeobject *to)  { -	PyObject *it = NULL; -	PyObject *iterable; -	PyObject *inbasket = NULL, *outbasket = NULL, *result = NULL; -	teeobject *to = NULL; -	int i; +	PyObject *value, *link; -	if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable)) +	if (to->index >= LINKCELLS) { +		link = teedataobject_jumplink(to->dataobj); +		Py_XDECREF(to->dataobj); +		to->dataobj = (teedataobject *)link; +		to->index = 0; +	} +	value = teedataobject_getitem(to->dataobj, to->index); +	if (value == NULL)  		return NULL; +	to->index++; +	return value; +} + +static PyObject * +tee_copy(teeobject *to) +{ +	teeobject *newto; + +	newto = PyObject_New(teeobject, &tee_type); +	if (newto == NULL) +		return NULL; +	Py_INCREF(to->dataobj); +	newto->dataobj = to->dataobj; +	newto->index = to->index; +	return (PyObject *)newto; +} + +PyDoc_STRVAR(teecopy_doc, "Returns an independent iterator."); + +static PyObject * +tee_fromiterable(PyObject *iterable) +{ +	teeobject *to; +	PyObject *it = NULL;  	it = PyObject_GetIter(iterable); -	if (it == NULL) goto fail; - -	inbasket = PyList_New(0); -	if (inbasket == NULL) goto fail; - -	outbasket = PyList_New(0); -	if (outbasket == NULL) goto fail; - -	to = PyObject_GC_New(teeobject, &tee_type); -	if (to == NULL)  goto fail; - -	to->it = it; -	to->inbasket = inbasket; -	to->outbasket = outbasket; -	to->save_mode = 1; -	to->num_seen = 0; -	PyObject_GC_Track(to); - -	/* create independent iterators */ -	result = PyTuple_New(2); -	if (result == NULL)  goto fail; -	for (i=0 ; i<2 ; i++) { -		iiobject *indep_it = PyObject_GC_New(iiobject, &ii_type); -		if (indep_it == NULL) goto fail; -		Py_INCREF(to); -		indep_it->tee = to; -		indep_it->num_seen = 0; -		PyObject_GC_Track(indep_it); -		PyTuple_SET_ITEM(result, i, (PyObject *)indep_it); +	if (it == NULL) +		return NULL; +	if (PyObject_TypeCheck(it, &tee_type)) { +		to = (teeobject *)tee_copy((teeobject *)it); +		goto done;  	} -	goto succeed; -fail: + +	to = PyObject_New(teeobject, &tee_type); +	if (to == NULL)  +		goto done; +	to->dataobj = (teedataobject *)teedataobject_new(it); +	to->index = 0; +done:  	Py_XDECREF(it); -	Py_XDECREF(inbasket); -	Py_XDECREF(outbasket); -	Py_XDECREF(result); -succeed: -	Py_XDECREF(to); -	return result; +	return (PyObject *)to;  } -static void -tee_dealloc(teeobject *to) +static PyObject * +tee_new(PyTypeObject *type, PyObject *args, PyObject *kw)  { -	PyObject_GC_UnTrack(to); -	Py_XDECREF(to->inbasket); -	Py_XDECREF(to->outbasket); -	Py_XDECREF(to->it); -	PyObject_GC_Del(to); +	PyObject *iterable; + +	if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable)) +		return NULL; +	return tee_fromiterable(iterable);  } -static int -tee_traverse(teeobject *to, visitproc visit, void *arg) +static void +tee_dealloc(teeobject *to)  { -	int err; - -	if (to->it) { -		err = visit(to->it, arg); -		if (err) -			return err; -	} -	if (to->inbasket) { -		err = visit(to->inbasket, arg); -		if (err) -			return err; -	} -	if (to->outbasket) { -		err = visit(to->outbasket, arg); -		if (err) -			return err; -	} -	return 0; +	Py_XDECREF(to->dataobj); +	PyObject_Del(to);  } -PyDoc_STRVAR(tee_doc, -"tee(iterable) --> (it1, it2)\n\ -\n\ -Split the iterable into two independent iterables."); +PyDoc_STRVAR(teeobject_doc, +"Iterator wrapped to make it copyable"); + +static PyMethodDef tee_methods[] = { +	{"__copy__",	(PyCFunction)tee_copy,	METH_NOARGS, teecopy_doc}, + 	{NULL,		NULL}		/* sentinel */ +};  static PyTypeObject tee_type = {  	PyObject_HEAD_INIT(NULL) @@ -266,15 +230,15 @@ static PyTypeObject tee_type = {  	0,				/* tp_getattro */  	0,				/* tp_setattro */  	0,				/* tp_as_buffer */ -	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,	/* tp_flags */ -	tee_doc,			/* tp_doc */ -	(traverseproc)tee_traverse,	/* tp_traverse */ +	Py_TPFLAGS_DEFAULT,		/* tp_flags */ +	teeobject_doc,			/* tp_doc */ +	0,				/* tp_traverse */  	0,				/* tp_clear */  	0,				/* tp_richcompare */  	0,				/* tp_weaklistoffset */ -	0,				/* tp_iter */ -	0,				/* tp_iternext */ -	0,				/* tp_methods */ +	PyObject_SelfIter,		/* tp_iter */ +	(iternextfunc)tee_next,		/* tp_iternext */ +	tee_methods,			/* tp_methods */  	0,				/* tp_members */  	0,				/* tp_getset */  	0,				/* tp_base */ @@ -285,8 +249,52 @@ static PyTypeObject tee_type = {  	0,				/* tp_init */  	0,				/* tp_alloc */  	tee_new,			/* tp_new */ +	PyObject_Del,			/* tp_free */  }; +static PyObject * +tee(PyObject *self, PyObject *args) +{ +	int i, n=2; +	PyObject *it, *iterable, *copyable, *result; + +	if (!PyArg_ParseTuple(args, "O|i", &iterable, &n)) +		return NULL; +	result = PyTuple_New(n); +	if (result == NULL) +		return NULL; +	if (n == 0) +		return result; +	it = PyObject_GetIter(iterable); +	if (it == NULL) { +		Py_DECREF(result); +		return NULL; +	} +	if (!PyObject_HasAttrString(it, "__copy__")) { +		copyable = tee_fromiterable(it); +		Py_DECREF(it); +		if (copyable == NULL) { +			Py_DECREF(result); +			return NULL; +		} +	} else +		copyable = it; +	PyTuple_SET_ITEM(result, 0, copyable); +	for (i=1 ; i<n ; i++) { +		copyable = PyObject_CallMethod(copyable, "__copy__", NULL); +		if (copyable == NULL) { +			Py_DECREF(result); +			return NULL; +		} +		PyTuple_SET_ITEM(result, i, copyable); +	} +	return result; +} + +PyDoc_STRVAR(tee_doc, +"tee(iterable, n=2) --> tuple of n independent iterators."); + +  /* cycle object **********************************************************/  typedef struct { @@ -2091,13 +2099,18 @@ islice(seq, [start,] stop [, step]) --> elements from\n\         seq[start:stop:step]\n\  imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\  starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\ -tee(it) --> (it1, it2) splits one iterator into two \n\ +tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\  chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\  takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\  dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\  "); +static PyMethodDef module_methods[] = { +	{"tee",	(PyCFunction)tee,	METH_VARARGS, tee_doc}, + 	{NULL,		NULL}		/* sentinel */ +}; +  PyMODINIT_FUNC  inititertools(void)  { @@ -2105,7 +2118,6 @@ inititertools(void)  	PyObject *m;  	char *name;  	PyTypeObject *typelist[] = { -		&tee_type,  		&cycle_type,  		&dropwhile_type,  		&takewhile_type, @@ -2121,7 +2133,7 @@ inititertools(void)  		NULL  	}; -	m = Py_InitModule3("itertools", NULL, module_doc); +	m = Py_InitModule3("itertools", module_methods, module_doc);  	for (i=0 ; typelist[i] != NULL ; i++) {  		if (PyType_Ready(typelist[i]) < 0) @@ -2131,4 +2143,10 @@ inititertools(void)  		Py_INCREF(typelist[i]);  		PyModule_AddObject(m, name+1, (PyObject *)typelist[i]);  	} + +	if (PyType_Ready(&teedataobject_type) < 0) +		return; +	if (PyType_Ready(&tee_type) < 0) +		return; +  } | 
