1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
import html.parser
from django.utils.encoding import smart_str
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
# The HTMLParser constructor takes a 'convert_charrefs'
# argument which raises a warning if we don't pass it.
HTML_PARSER_ARGS = {
"convert_charrefs": False,
}
class DefaultHtmlParser(ParserBase, html.parser.HTMLParser):
def __init__(self, content):
html.parser.HTMLParser.__init__(self, **HTML_PARSER_ARGS)
self.content = content
self._css_elems = []
self._js_elems = []
self._current_tag = None
try:
self.feed(self.content)
self.close()
except Exception as err:
lineno = err.lineno
line = self.content.splitlines()[lineno]
raise ParserError(
"Error while initializing HtmlParser: %s (line: %s)" % (err, repr(line))
)
def handle_starttag(self, tag, attrs):
tag = tag.lower()
if tag in ("style", "script"):
if tag == "style":
tags = self._css_elems
elif tag == "script":
tags = self._js_elems
tags.append(
{"tag": tag, "attrs": attrs, "attrs_dict": dict(attrs), "text": ""}
)
self._current_tag = tag
elif tag == "link":
self._css_elems.append(
{"tag": tag, "attrs": attrs, "attrs_dict": dict(attrs), "text": None}
)
def handle_endtag(self, tag):
if self._current_tag and self._current_tag == tag.lower():
self._current_tag = None
def handle_data(self, data):
if self._current_tag == "style":
self._css_elems[-1]["text"] = data
elif self._current_tag == "script":
self._js_elems[-1]["text"] = data
def css_elems(self):
return self._css_elems
def js_elems(self):
return self._js_elems
def elem_name(self, elem):
return elem["tag"]
def elem_attribs(self, elem):
return elem["attrs_dict"]
def elem_content(self, elem):
return smart_str(elem["text"])
def elem_str(self, elem):
tag = {}
tag.update(elem)
tag["attrs"] = ""
if len(elem["attrs"]):
tag["attrs"] = " %s" % " ".join(
['%s="%s"' % (name, value) for name, value in elem["attrs"]]
)
if elem["tag"] == "link":
return "<%(tag)s%(attrs)s>" % tag
else:
return "<%(tag)s%(attrs)s>%(text)s</%(tag)s>" % tag
|