From bb557dcefacccd583b174b87b5f268797449ed2f Mon Sep 17 00:00:00 2001 From: ianb Date: Wed, 26 Mar 2008 17:54:46 +0100 Subject: [svn r3460] Fix empty tags (e.g.,
) in diffs. --HG-- branch : trunk --- src/lxml/html/diff.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/lxml/html/diff.py') diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py index 448faffb..3acf375e 100644 --- a/src/lxml/html/diff.py +++ b/src/lxml/html/diff.py @@ -139,6 +139,8 @@ def markup_serialize_tokens(tokens, markup_func): ############################################################ def htmldiff(old_html, new_html): + ## FIXME: this should take parsed documents too, and use their body + ## or other content. """ Do a diff of the old and new document. The documents are HTML *fragments* (str/UTF8 or unicode), they are not complete documents (i.e., no tag). @@ -310,8 +312,6 @@ def split_unbalanced(chunks): endtag = chunk[1] == '/' name = chunk.split()[0].strip('<>/') if name in empty_tags: - assert not endtag, ( - "Empty tag %r should have no end tag" % chunk) balanced.append(chunk) continue if endtag: @@ -669,7 +669,7 @@ def flatten_el(el, include_hrefs, skip_tag=False): yield ('img', el.attrib['src'], start_tag(el)) else: yield start_tag(el) - if el.tag in empty_tags and not el.text and not len(el): + if el.tag in empty_tags and not el.text and not len(el) and not el.tail: return start_words = split_words(el.text) for word in start_words: -- cgit v1.2.1