1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#!/usr/bin/python
import sys, os, socket, psycopg2, urllib, re, bz2, cStringIO, ConfigParser
sys.path.append(os.path.dirname(__file__)+"/..")
import apache_stats
statsdir = '/data/www/pypi/stats'
def integrate(config, data, dbconn):
for (filename, browser, package), count in data.items():
cursor.execute('update release_files set downloads=downloads+%s where filename=%s',
(count, filename))
dbconn.commit()
def integrate_remote(config, host, dbconn, dbupdate=True):
url = 'http://%s.pypi.python.org/local-stats/days/' % host
try:
index = urllib.urlopen(url).read()
except Exception, e:
print 'ERROR %s for %s' % (e, url)
return
files = set(re.findall('href=.(20..-..-..).bz2', index))
try:
integrated = open(statsdir+'/integrated/'+host).readlines()
integrated = set([x.strip() for x in integrated])
except IOError:
integrated = set()
missing = files-integrated
stats = apache_stats.LocalStats()
for m in missing:
url = 'http://%s.pypi.python.org/local-stats/days/%s.bz2' % (host, m)
try:
data = urllib.urlopen(url).read()
except Exception, e:
print 'ERROR %s for %s' % (e, url)
continue
data = bz2.decompress(data)
data = cStringIO.StringIO(data)
year, month, day = m.split('-')
# index integration
delta = stats.integrate_stats(statsdir, year, month, day, data)
if dbupdate:
# database integration
integrate(config, delta, dbconn=dbconn)
integrated.add(m)
open(statsdir+'/integrated/'+host, 'w').write('\n'.join(sorted(integrated)))
def main():
# Setup database connection
c = ConfigParser.ConfigParser({'user':'', 'password':''})
c.read(sys.argv[1])
dbname = c.get('database', 'name')
dbuser = c.get('database', 'user')
dbpass = c.get('database', 'password')
dbhost = c.get('database', 'host')
dbconn = psycopg2.connect(database=dbname, user=dbuser, password=dbpass, host=dbhost)
lasts = socket.gethostbyname_ex('last.pypi.python.org')
# look for name X.pypi.python.org
lasts = [lasts[0]] + lasts[1]
for last in lasts:
if last[1:] == '.pypi.python.org':
break
else:
raise ValueError, "Could not properly resolve last mirror name"
last = last.split('.')[0]
host = 'a'
while True:
if host == "d":
# This is just a CNAME back to a.pypi.python.org
continue
integrate_remote(sys.argv[1], host, dbconn=dbconn)
if host == last:
break
host = chr(ord(host)+1)
dbconn.close()
main()
|