summaryrefslogtreecommitdiff
path: root/tools/mirrorlib.py
blob: 94f5b37496b0f7532ca181be5a2f166a725644f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
'''Library to support tools that access PyPI mirrors. The following
functional areas are covered:
- mirror selection (find_mirror)
- mirror verification
- key rollover
'''

################## Mirror Selection ##############################
import socket, time, datetime, errno, select

def _mirror_list(first):
    '''Generator producing all mirror names'''
    ord_a = ord('a')
    last = socket.gethostbyname_ex('last.pypi.python.org')
    cur_index = ord(first)-ord_a
    cur = first+'.pypi.python.org'
    while True:
        for family, _, _, _, sockaddr in socket.getaddrinfo(cur, 0, 0, socket.SOCK_STREAM):
            yield cur, family, sockaddr
        if last[0] == cur:
            break
        cur_index += 1
        if cur_index < 26:
            # a..z
            cur = chr(ord_a+cur_index)
        elif cur_index > 701:
            raise ValueError, 'too many mirrors'
        else:
            # aa, ab, ... zz
            cur = divmod(cur_index, 26)
            cur = chr(ord_a-1+cur[0])+chr(ord_a+cur[1])
        cur += '.pypi.python.org'

class _Mirror:
    # status values:
    # 0: wants to send
    # 1: wants to recv
    # 2: completed, ok
    # 3: completed, failed
    def __init__(self, name, family, ip):
        self.name = name
        self.family = family
        self.ip = ip
        self.socket = socket.socket(family, socket.SOCK_STREAM)
        self.socket.setblocking(0)
        self.started = time.time()
        try:
            self.socket.connect((name, 80))
        except socket.error, e:
            if e.errno != errno.EINPROGRESS:
                raise
        # now need to select for writing
        self.status = 0

    def write(self):
        url = 'last-modified'
        if self.name == 'a.pypi.python.org':
            # the master server doesn't provide last-modified,
            # as that would be pointless. Instead, /daytime can be
            # used as an indication of currency and responsiveness.
            url = 'daytime'
        self.socket.send('GET /%s HTTP/1.0\r\n'
                         'Host: %s\r\n'
                         '\r\n' % (url, self.name))
        self.status = 1
    
    def read(self):
        data = self.socket.recv(1200)
        self.response_time = time.time()-self.started
        # response should be much shorter
        assert len(data) < 1200
        self.socket.close()
        data = data.splitlines()
        if data[0].split()[1] == '200':
            # ok
            data = data[-1]
            try:
                self.last_modified = datetime.datetime.strptime(data, "%Y%m%dT%H:%M:%S")
                self.status = 2 # complete
            except ValueError:
                self.status = 3 # failed
        else:
            self.status = 3

    def failed(self):
        self.socket.close()
        self.status = failed()

    def results(self):
        return self.name, self.family, self.ip, self.response_time, self.last_modified

def _select(mirrors):
    # perform select call on mirrors dictionary
    rlist = []
    wlist = []
    xlist = []
    for m in mirrors.values():
        if m.status == 0:
            wlist.append(m.socket)
            xlist.append(m.socket)
        elif m.status == 1:
            rlist.append(m.socket)
            xlist.append(m.socket)
    rlist, wlist, xlist = select.select(rlist, wlist, xlist, 0)
    completed = []
    for s in wlist:
        mirrors[s].write()
    for s in rlist:
        m = mirrors[s]
        del mirrors[s]
        m.read()
        if m.status == 2:
            completed.append(m)
    for s in xlist:
        mirrors[s].failed()
        del mirrors[s]
    return completed

def _close(mirrors):
    for m in mirrors:
        m.close()

def _newest(mirrors):
    if not mirrors:
        raise ValueError, "no mirrors found"
    mirrors.sort(key=lambda m:m.last_modified)
    return mirrors[-1].results()

def find_mirror(start_with='a',
                good_response_time = 1,
                good_age = 30*60,
                slow_mirrors_wait = 5):
    '''find_mirror(start_with, good_response_time, good_age, slow_mirrors_wait) 
       -> name, family, IP, response_time, last_modified

    Find a PyPI mirror matching given criteria.
    start_with indicates the first mirror that should be considered (defaults to 'a').
    good_response_time is the maximum response time which lets this algorithm look no further;
    likewise, good_age is the maximum age acceptable to the caller.
    If this procedure goes on for longer than slow_mirrors_wait (default 5s), return even if
    not all mirrors have been responding.
    If no matching mirror can be found, the newest one that did response is returned.
    If no mirror can be found at all, ValueError is raised'''
    started = time.time()
    good_mirrors = []
    pending_mirrors = {} # socket:mirror
    good_last_modified = datetime.datetime.utcnow()-datetime.timedelta(seconds=good_age)
    for host, family, ip in _mirror_list(start_with):
        m = _Mirror(host, family, ip)
        pending_mirrors[m.socket] = m
        for m in _select(pending_mirrors):
            if m.response_time < good_response_time and m.last_modified > good_last_modified:
                _close(pending_mirrors)
                return m.results()
            else:
                good_mirrors.append(m)

    while pending_mirrors:
        if time.time() > started+slow_mirrors_wait and good_mirrors:
            # if we have looked for 5s for a mirror, and we already have one
            # return the newest one
            _close(pending)
            return _newest(good_mirrors)
        for m in _select(pending_mirrors):
            if m.response_time < good_response_time and m.last_modified > good_last_modified:
                _close(pending_mirrors)
                return m.results()
            else:
                good_mirrors.append(m)
    return _newest(good_mirrors)