blob: 3c14d9e415dee63b1b537127d06b86e2b62f597b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# -*- coding: utf-8 -*-
"""
sphinx.web.antispam
~~~~~~~~~~~~~~~~~~~
Small module that performs anti spam tests based on the bad content
regex list provided by moin moin.
:copyright: 2007-2008 by Armin Ronacher.
:license: BSD.
"""
import re
import urllib
import time
from os import path
DOWNLOAD_URL = 'http://moinmaster.wikiwikiweb.de/BadContent?action=raw'
UPDATE_INTERVAL = 60 * 60 * 24 * 7
class AntiSpam(object):
"""
Class that reads a bad content database (flat file that is automatically
updated from the moin moin server) and checks strings against it.
"""
def __init__(self, bad_content_file):
self.bad_content_file = bad_content_file
lines = None
if not path.exists(self.bad_content_file):
last_change = 0
else:
last_change = path.getmtime(self.bad_content_file)
if last_change + UPDATE_INTERVAL < time.time():
try:
f = urllib.urlopen(DOWNLOAD_URL)
data = f.read()
except:
pass
else:
lines = [l.strip() for l in data.splitlines()
if not l.startswith('#')]
f = open(bad_content_file, 'w')
try:
f.write('\n'.join(lines))
finally:
f.close()
last_change = int(time.time())
if lines is None:
try:
f = open(bad_content_file)
try:
lines = [l.strip() for l in f]
finally:
f.close()
except:
lines = []
self.rules = [re.compile(rule) for rule in lines if rule]
def is_spam(self, fields):
for regex in self.rules:
for field in fields:
if regex.search(field) is not None:
return True
return False
|