summaryrefslogtreecommitdiff
path: root/lib/git
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-10 00:24:49 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-10 00:24:49 +0200
commit3323464f85b986cba23176271da92a478b33ab9c (patch)
tree1633f83f6c5fd5a98396fc925b44602282cbd15a /lib/git
parent257a8a9441fca9a9bc384f673ba86ef5c3f1715d (diff)
downloadgitpython-3323464f85b986cba23176271da92a478b33ab9c.tar.gz
messy first version of a properly working depth-first graph method, which allows the pool to work as expected. Many more tests need to be added, and there still is a problem with shutdown as sometimes it won't kill all threads, mainly because the process came up with worker threads started, which cannot be
Diffstat (limited to 'lib/git')
-rw-r--r--lib/git/async/graph.py23
-rw-r--r--lib/git/async/pool.py6
-rw-r--r--lib/git/async/task.py5
-rw-r--r--lib/git/async/util.py8
4 files changed, 26 insertions, 16 deletions
diff --git a/lib/git/async/graph.py b/lib/git/async/graph.py
index 6386cbaa..e3999cdc 100644
--- a/lib/git/async/graph.py
+++ b/lib/git/async/graph.py
@@ -87,25 +87,26 @@ class Graph(object):
return self
- def visit_input_inclusive_depth_first(self, node, visitor=lambda n: True ):
- """Visit all input nodes of the given node, depth first, calling visitor
- for each node on our way. If the function returns False, the traversal
- will not go any deeper, but continue at the next branch
- It will return the actual input node in the end !"""
- nodes = node.in_nodes[:]
+ def input_inclusive_dfirst_reversed(self, node):
+ """Return all input nodes of the given node, depth first,
+ It will return the actual input node last, as it is required
+ like that by the pool"""
+ stack = [node]
seen = set()
# depth first
- while nodes:
- n = nodes.pop()
+ out = list()
+ while stack:
+ n = stack.pop()
if n in seen:
continue
seen.add(n)
+ out.append(n)
# only proceed in that direction if visitor is fine with it
- if visitor(n):
- nodes.extend(n.in_nodes)
+ stack.extend(n.in_nodes)
# END call visitor
# END while walking
- visitor(node)
+ out.reverse()
+ return out
diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 2ec18f1a..5ebc3655 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -182,14 +182,13 @@ class Pool(object):
dfirst_tasks = self._taskorder_cache[id(task)]
except KeyError:
# have to retrieve the list from the graph
- dfirst_tasks = list()
- self._tasks.visit_input_inclusive_depth_first(task, lambda n: dfirst_tasks.append(n))
+ dfirst_tasks = self._tasks.input_inclusive_dfirst_reversed(task)
self._taskorder_cache[id(task)] = dfirst_tasks
# END handle cached order retrieval
finally:
self._taskgraph_lock.release()
# END handle locking
-
+ print dfirst_tasks
# check the min count on all involved tasks, and be sure that we don't
# have any task which produces less than the maximum min-count of all tasks
# The actual_count is used when chunking tasks up for the queue, whereas
@@ -309,6 +308,7 @@ class Pool(object):
threadsafe to optimize item throughput.
:note: currently NOT threadsafe !"""
+ print "set_size", size
assert size > -1, "Size cannot be negative"
# either start new threads, or kill existing ones.
diff --git a/lib/git/async/task.py b/lib/git/async/task.py
index 03b40492..57dd285d 100644
--- a/lib/git/async/task.py
+++ b/lib/git/async/task.py
@@ -80,7 +80,9 @@ class OutputChannelTask(Node):
def process(self, count=0):
"""Process count items and send the result individually to the output channel"""
+ print "%r: reading %i" % (self.id, count)
items = self._read(count)
+ print "%r: done reading" % self.id
try:
# increase the ref-count - we use this to determine whether anyone else
# is currently handling our output channel. As this method runs asynchronously,
@@ -102,7 +104,7 @@ class OutputChannelTask(Node):
wc.write(rval)
# END handle single apply
except Exception, e:
- print >> sys.stderr, "task error:", str(e) # TODO: REMOVE DEBUG, or make it use logging
+ print >> sys.stderr, "task %s error:" % self.id, type(e), str(e) # TODO: REMOVE DEBUG, or make it use logging
# be sure our task is not scheduled again
self.set_done()
@@ -146,6 +148,7 @@ class OutputChannelTask(Node):
# thread having its copy on the stack
# + 1 for the instance we provide to refcount
if self.is_done() and getrefcount(self._out_wc) < 4:
+ print "Closing channel of %r" % self.id
self.close()
# END handle channel closure
#{ Configuration
diff --git a/lib/git/async/util.py b/lib/git/async/util.py
index 00d0dbab..b7750b0b 100644
--- a/lib/git/async/util.py
+++ b/lib/git/async/util.py
@@ -206,7 +206,6 @@ class AsyncQueue(deque):
return old
finally:
self.mutex.release()
-
# if we won't receive anymore items, inform the getters
if not state:
self.not_empty.notify_all()
@@ -222,6 +221,13 @@ class AsyncQueue(deque):
def put(self, item, block=True, timeout=None):
self.mutex.acquire()
+ # NOTE: we explicitly do NOT check for our writable state
+ # Its just used as a notification signal, and we need to be able
+ # to continue writing to prevent threads ( easily ) from failing
+ # to write their computed results, which we want in fact
+ # NO: we want them to fail and stop processing, as the one who caused
+ # the channel to close had a reason and wants the threads to
+ # stop on the task as soon as possible
if not self._writable:
self.mutex.release()
raise ReadOnly