From def0f73989047c4ddf9b11da05ad2c9c8e387331 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Jun 2010 23:20:37 +0200 Subject: introduced a new counter keeping track of the scheduled tasks - this prevent unnecessary tasks to be scheduled as we keep track of how many items will be produced for the task at hand. This introduces additional locking, but performns well in multithreaded mode. Performance of the master queue is still a huge issue, its currently the limiting factor, as bypassing the master queue in serial moode gives 15x performance, wich is what I would need --- lib/git/async/pool.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'lib/git/async/pool.py') diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py index 19fc9f6e..4c97feb0 100644 --- a/lib/git/async/pool.py +++ b/lib/git/async/pool.py @@ -80,12 +80,13 @@ class RPoolChannel(RChannel): # * have_enough = False if count > 0: - have_enough = self._wc._queue.qsize() >= count - # END risky game + have_enough = self._task.scheduled_item_count() >= count or self._wc._queue.qsize() >= count + # END ########## prepare ############################## if not have_enough: self._pool._prepare_channel_read(self._task, count) + # END prepare pool scheduling ####### read data ######## @@ -260,26 +261,33 @@ class Pool(object): queue = self._queue if numchunks > 1: for i in xrange(numchunks): + # schedule them as early as we know about them + task.add_scheduled_items(chunksize) queue.put((task.process, chunksize)) # END for each chunk to put else: + task.add_scheduled_items(chunksize) queue.put((task.process, chunksize)) # END try efficient looping if remainder: + task.add_scheduled_items(remainder) queue.put((task.process, remainder)) # END handle chunksize else: # no workers, so we have to do the work ourselves if numchunks > 1: for i in xrange(numchunks): + task.add_scheduled_items(chunksize) task.process(chunksize) # END for each chunk to put else: + task.add_scheduled_items(chunksize) task.process(chunksize) # END try efficient looping if remainder: + task.add_scheduled_items(remainder) task.process(remainder) # END handle chunksize # END handle serial mode @@ -348,6 +356,9 @@ class Pool(object): self._workers.append(worker) # END for each new worker to create elif cur_count > size: + # we can safely increase the size, even from serial mode, as we would + # only be able to do this if the serial ( sync ) mode finished processing. + # Just adding more workers is not a problem at all. del_count = cur_count - size for i in range(del_count): self._workers[i].stop_and_join() -- cgit v1.2.1 From 898d47d1711accdfded8ee470520fdb96fb12d46 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Jun 2010 23:47:06 +0200 Subject: Task scheduled items lock now uses a dummy lock in serial mode, improving its performance considerably. Channels now use the AsyncQueue, boosting their throughput to about 5k items / s - this is something one can work with, considering the runtime of each item should be large enough to keep the threads busy. This could be a basis, further testing needed --- lib/git/async/pool.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/git/async/pool.py') diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py index 4c97feb0..d6b5711d 100644 --- a/lib/git/async/pool.py +++ b/lib/git/async/pool.py @@ -5,6 +5,7 @@ from threading import Lock from util import ( SyncQueue, AsyncQueue, + DummyLock ) from task import InputChannelTask @@ -462,6 +463,11 @@ class Pool(object): # END add task relation # END handle input channels for connections + # fix locks - in serial mode, the task does not need real locks + if self.size() == 0: + task._slock = DummyLock() + # END improve locks + return rc #} END interface -- cgit v1.2.1