summaryrefslogtreecommitdiff
path: root/src/lxml/html
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2016-07-24 09:48:05 +0200
committerGitHub <noreply@github.com>2016-07-24 09:48:05 +0200
commit0a43d0b52ea35e33564b4e08b3e50f4f890f4758 (patch)
tree593b1378cb927c3a324825ef5715f24cbd13258a /src/lxml/html
parentb3ffdd9904baab64d2288969752c26d39cd18db3 (diff)
parentc35549338445a579e81cd658027baefa72723c49 (diff)
downloadpython-lxml-0a43d0b52ea35e33564b4e08b3e50f4f890f4758.tar.gz
Merge pull request #180 from chripede/patch-2
Add inline_style option
Diffstat (limited to 'src/lxml/html')
-rw-r--r--src/lxml/html/clean.py7
-rw-r--r--src/lxml/html/tests/test_clean.txt23
2 files changed, 27 insertions, 3 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index 99fe42b1..47a32749 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -112,7 +112,10 @@ class Cleaner(object):
Removes any comments.
``style``:
- Removes any style tags or attributes.
+ Removes any style tags.
+
+ ``inline_style``
+ Removes any style attributes.
``links``:
Removes any ``<link>`` tags
@@ -191,6 +194,7 @@ class Cleaner(object):
javascript = True
comments = True
style = False
+ inline_style = False
links = True
meta = True
page_structure = True
@@ -314,6 +318,7 @@ class Cleaner(object):
kill_tags.add(etree.ProcessingInstruction)
if self.style:
kill_tags.add('style')
+ if self.inline_style:
etree.strip_attributes(doc, 'style')
if self.links:
kill_tags.add('link')
diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt
index 4431c980..c78ab4f1 100644
--- a/src/lxml/html/tests/test_clean.txt
+++ b/src/lxml/html/tests/test_clean.txt
@@ -122,7 +122,7 @@
</body>
</html>
->>> print(Cleaner(style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
+>>> print(Cleaner(style=True, inline_style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
<html>
<head>
</head>
@@ -142,6 +142,26 @@
</body>
</html>
+>>> print(Cleaner(style=True, inline_style=False, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
+<html>
+ <head>
+ </head>
+ <body>
+ <a href="">a link</a>
+ <a href="">a control char link</a>
+ <a href="">data</a>
+ <a href="#">another link</a>
+ <p>a paragraph</p>
+ <div style="display: none">secret EVIL!</div>
+ of EVIL!
+ Password:
+ <a href="evil-site" rel="nofollow">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author nofollow">Author</a>
+ <a href="http://example.com" rel="nofollow">Text</a>
+ <img src="evil!">
+ </body>
+</html>
+
>>> print(Cleaner(links=False, page_structure=False, javascript=True, host_whitelist=['example.com'], whitelist_tags=None).clean_html(doc))
<html>
<head>
@@ -165,4 +185,3 @@
<img src="evil!">
</body>
</html>
-