summaryrefslogtreecommitdiff
path: root/examples/webcrawler.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/webcrawler.py')
-rw-r--r--examples/webcrawler.py41
1 files changed, 18 insertions, 23 deletions
diff --git a/examples/webcrawler.py b/examples/webcrawler.py
index 2dfbd45..53c6e8f 100644
--- a/examples/webcrawler.py
+++ b/examples/webcrawler.py
@@ -2,32 +2,27 @@
"""\
@file webcrawler.py
-This is a simple web "crawler" that fetches a bunch of urls using a coroutine pool. It fetches as
- many urls at time as coroutines in the pool.
+This is a simple web "crawler" that fetches a bunch of urls using a pool to
+control the number of outbound connections. It has as many simultaneously open
+connections as coroutines in the pool.
+
+The prints in the body of the fetch function are there to demonstrate that the
+requests are truly made in parallel.
"""
urls = ["http://www.google.com/intl/en_ALL/images/logo.gif",
- "http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif",
- "http://eventlet.net"]
+ "https://wiki.secondlife.com/w/images/secondlife.jpg",
+ "http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif"]
-import time
-from eventlet.green import urllib2
-from eventlet import coros
+import eventlet
+from eventlet.green import urllib2
def fetch(url):
- # we could do something interesting with the result, but this is
- # example code, so we'll just report that we did it
- print "%s fetching %s" % (time.asctime(), url)
- req = urllib2.urlopen(url)
- print "%s fetched %s (%s)" % (time.asctime(), url, len(req.read()))
-
-pool = coros.CoroutinePool(max_size=4)
-waiters = []
-for url in urls:
- waiters.append(pool.execute(fetch, url))
-
-# wait for all the coroutines to come back before exiting the process
-for waiter in waiters:
- waiter.wait()
-
-
+ print "opening", url
+ body = urllib2.urlopen(url).read()
+ print "done with", url
+ return url, body
+
+pool = eventlet.GreenPool(200)
+for url, body in pool.imap(fetch, urls):
+ print "got body from", url, "of length", len(body) \ No newline at end of file