diff options
Diffstat (limited to 'examples/webcrawler.py')
| -rw-r--r-- | examples/webcrawler.py | 41 |
1 files changed, 18 insertions, 23 deletions
diff --git a/examples/webcrawler.py b/examples/webcrawler.py index 2dfbd45..53c6e8f 100644 --- a/examples/webcrawler.py +++ b/examples/webcrawler.py @@ -2,32 +2,27 @@ """\ @file webcrawler.py -This is a simple web "crawler" that fetches a bunch of urls using a coroutine pool. It fetches as - many urls at time as coroutines in the pool. +This is a simple web "crawler" that fetches a bunch of urls using a pool to +control the number of outbound connections. It has as many simultaneously open +connections as coroutines in the pool. + +The prints in the body of the fetch function are there to demonstrate that the +requests are truly made in parallel. """ urls = ["http://www.google.com/intl/en_ALL/images/logo.gif", - "http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif", - "http://eventlet.net"] + "https://wiki.secondlife.com/w/images/secondlife.jpg", + "http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif"] -import time -from eventlet.green import urllib2 -from eventlet import coros +import eventlet +from eventlet.green import urllib2 def fetch(url): - # we could do something interesting with the result, but this is - # example code, so we'll just report that we did it - print "%s fetching %s" % (time.asctime(), url) - req = urllib2.urlopen(url) - print "%s fetched %s (%s)" % (time.asctime(), url, len(req.read())) - -pool = coros.CoroutinePool(max_size=4) -waiters = [] -for url in urls: - waiters.append(pool.execute(fetch, url)) - -# wait for all the coroutines to come back before exiting the process -for waiter in waiters: - waiter.wait() - - + print "opening", url + body = urllib2.urlopen(url).read() + print "done with", url + return url, body + +pool = eventlet.GreenPool(200) +for url, body in pool.imap(fetch, urls): + print "got body from", url, "of length", len(body)
\ No newline at end of file |
