diff options
| author | scoder <stefan_ml@behnel.de> | 2016-07-24 09:48:05 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-07-24 09:48:05 +0200 |
| commit | 0a43d0b52ea35e33564b4e08b3e50f4f890f4758 (patch) | |
| tree | 593b1378cb927c3a324825ef5715f24cbd13258a /src/lxml/html | |
| parent | b3ffdd9904baab64d2288969752c26d39cd18db3 (diff) | |
| parent | c35549338445a579e81cd658027baefa72723c49 (diff) | |
| download | python-lxml-0a43d0b52ea35e33564b4e08b3e50f4f890f4758.tar.gz | |
Merge pull request #180 from chripede/patch-2
Add inline_style option
Diffstat (limited to 'src/lxml/html')
| -rw-r--r-- | src/lxml/html/clean.py | 7 | ||||
| -rw-r--r-- | src/lxml/html/tests/test_clean.txt | 23 |
2 files changed, 27 insertions, 3 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index 99fe42b1..47a32749 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -112,7 +112,10 @@ class Cleaner(object): Removes any comments. ``style``: - Removes any style tags or attributes. + Removes any style tags. + + ``inline_style`` + Removes any style attributes. ``links``: Removes any ``<link>`` tags @@ -191,6 +194,7 @@ class Cleaner(object): javascript = True comments = True style = False + inline_style = False links = True meta = True page_structure = True @@ -314,6 +318,7 @@ class Cleaner(object): kill_tags.add(etree.ProcessingInstruction) if self.style: kill_tags.add('style') + if self.inline_style: etree.strip_attributes(doc, 'style') if self.links: kill_tags.add('link') diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index 4431c980..c78ab4f1 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -122,7 +122,7 @@ </body> </html> ->>> print(Cleaner(style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc)) +>>> print(Cleaner(style=True, inline_style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc)) <html> <head> </head> @@ -142,6 +142,26 @@ </body> </html> +>>> print(Cleaner(style=True, inline_style=False, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc)) +<html> + <head> + </head> + <body> + <a href="">a link</a> + <a href="">a control char link</a> + <a href="">data</a> + <a href="#">another link</a> + <p>a paragraph</p> + <div style="display: none">secret EVIL!</div> + of EVIL! + Password: + <a href="evil-site" rel="nofollow">spam spam SPAM!</a> + <a href="http://example.com" rel="author nofollow">Author</a> + <a href="http://example.com" rel="nofollow">Text</a> + <img src="evil!"> + </body> +</html> + >>> print(Cleaner(links=False, page_structure=False, javascript=True, host_whitelist=['example.com'], whitelist_tags=None).clean_html(doc)) <html> <head> @@ -165,4 +185,3 @@ <img src="evil!"> </body> </html> - |
