blob: 21bd9d9d1acd4364c1a085dda85cc72cb2c8820c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
import time
import urllib.error
import rdflib
import rdflib.query
MAX_RETRY = 10
BACKOFF_FACTOR = 1.5
def query_with_retry(graph: rdflib.Graph, query: str, **kwargs) -> rdflib.query.Result: # type: ignore[return]
"""Query graph an retry on failure, returns preloaded result
The tests run against outside network targets which results
in flaky tests. Therefor retries are needed to increase stability.
There are two main categories why these might fail:
* Resource shortage on the server running the tests (e.g. out of ports)
* Issues outside the server (network, target server, etc)
As fast feedback is important the retry should be done quickly.
Therefor the first retry is done after 100ms. But if the issue is
outside the server running the tests it we need to be good
citizenship of the internet and not hit servers of others at
a constant rate. (Also it might get us banned)
Therefor this function implements a backoff mechanism.
When adjusting the parameters please keep in mind that several
tests might run on the same machine at the same time
on our CI, and we really don't want to hit any rate limiting.
The maximum time the function waits is:
>>> sum((BACKOFF_FACTOR ** backoff) / 10 for backoff in range(MAX_RETRY))
11.3330078125
"""
backoff = 0
for i in range(MAX_RETRY):
try:
result = graph.query(query, **kwargs)
result.bindings # access bindings to ensure no lazy loading
return result
except urllib.error.URLError as e:
if i == MAX_RETRY - 1:
raise e
backoff_s = (BACKOFF_FACTOR**backoff) / 10
print(
f"Network error {e} during query, waiting for {backoff_s:.2f}s and retrying"
)
time.sleep(backoff_s)
backoff += 1
|