diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-07 13:05:35 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-07 13:05:35 +0200 |
commit | 8c3c271b0d6b5f56b86e3f177caf3e916b509b52 (patch) | |
tree | 1a8341bb622d8ae18c6eb5061f5845619502e648 /lib/git/async | |
parent | 619662a9138fd78df02c52cae6dc89db1d70a0e5 (diff) | |
download | gitpython-8c3c271b0d6b5f56b86e3f177caf3e916b509b52.tar.gz |
Added task order cache, and a lock to prevent us walking the graph while changing tasks
Now processing more items to test performance, in dual-threaded mode as well, and its rather bad, have to figure out the reason for this, probably gil, but queues could help
Diffstat (limited to 'lib/git/async')
-rw-r--r-- | lib/git/async/pool.py | 26 | ||||
-rw-r--r-- | lib/git/async/thread.py | 3 |
2 files changed, 23 insertions, 6 deletions
diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py index 009096f2..26a6a182 100644 --- a/lib/git/async/pool.py +++ b/lib/git/async/pool.py @@ -107,6 +107,7 @@ class Pool(object): '_consumed_tasks', # a queue with tasks that are done or had an error '_workers', # list of worker threads '_queue', # master queue for tasks + '_taskorder_cache', # map task id -> ordered dependent tasks '_taskgraph_lock', # lock for accessing the task graph ) @@ -130,6 +131,7 @@ class Pool(object): self._workers = list() self._queue = self.TaskQueueCls() self._taskgraph_lock = self.LockCls() + self._taskorder_cache = dict() self.set_size(size) def __del__(self): @@ -149,10 +151,21 @@ class Pool(object): Tasks which are not done will be put onto the queue for processing, which is fine as we walked them depth-first.""" - dfirst_tasks = list() - # for the walk, we must make sure the ordering does not change - # Note: the result of this could be cached - self._tasks.visit_input_inclusive_depth_first(task, lambda n: dfirst_tasks.append(n)) + # for the walk, we must make sure the ordering does not change. Even + # when accessing the cache, as it is related to graph changes + self._taskgraph_lock.acquire() + try: + try: + dfirst_tasks = self._taskorder_cache[id(task)] + except KeyError: + # have to retrieve the list from the graph + dfirst_tasks = list() + self._tasks.visit_input_inclusive_depth_first(task, lambda n: dfirst_tasks.append(n)) + self._taskorder_cache[id(task)] = dfirst_tasks + # END handle cached order retrieval + finally: + self._taskgraph_lock.release() + # END handle locking # check the min count on all involved tasks, and be sure that we don't # have any task which produces less than the maximum min-count of all tasks @@ -208,7 +221,8 @@ class Pool(object): # the following loops are kind of unrolled - code duplication # should make things execute faster. Putting the if statements # into the loop would be less code, but ... slower - print actual_count, numchunks, chunksize, remainder, task._out_wc.size() + # DEBUG + # print actual_count, numchunks, chunksize, remainder, task._out_wc.size() if self._workers: # respect the chunk size, and split the task up if we want # to process too much. This can be defined per task @@ -332,6 +346,7 @@ class Pool(object): task.set_done() self._taskgraph_lock.acquire() try: + self._taskorder_cache.clear() self._tasks.del_node(task) finally: self._taskgraph_lock.release() @@ -360,6 +375,7 @@ class Pool(object): self._taskgraph_lock.acquire() try: + self._taskorder_cache.clear() self._tasks.add_node(task) finally: self._taskgraph_lock.release() diff --git a/lib/git/async/thread.py b/lib/git/async/thread.py index 2ed002e9..f875f094 100644 --- a/lib/git/async/thread.py +++ b/lib/git/async/thread.py @@ -141,7 +141,8 @@ class WorkerThread(TerminatableThread): # needing exactly one function, and one arg assert len(tasktuple) == 2, "Need tuple of function, arg - it could be more flexible, but its reduced to what we need" routine, arg = tasktuple - + # DEBUG + # print "%s: picked up: %s(%s)" % (self.name, routine, arg) try: rval = None if inspect.ismethod(routine): |