summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/html.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners/html.rb')
-rw-r--r--lib/coderay/scanners/html.rb79
1 files changed, 61 insertions, 18 deletions
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 7fab3cc..7cc9c9c 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -53,10 +53,20 @@ module Scanners
@plain_string_content = nil
end
+ def scan_java_script encoder, code
+ if code && !code.empty?
+ @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
+ # encoder.begin_group :inline
+ @java_script_scanner.tokenize code, :tokens => encoder
+ # encoder.end_group :inline
+ end
+ end
+
def scan_tokens encoder, options
state = @state
plain_string_content = @plain_string_content
+ in_tag = in_attribute = nil
until eos?
@@ -68,19 +78,35 @@ module Scanners
case state
when :initial
- if match = scan(/<!--.*?-->/m)
+ case in_tag
+ when 'script'
+ if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
+ code = self[2] || self[4]
+ closing = self[3]
+ encoder.text_token self[1], :comment
+ else
+ code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
+ closing = false
+ end
+ scan_java_script encoder, code
+ encoder.text_token closing, :comment if closing
+ end
+ next if eos?
+ if match = scan(/<!--(?:.*?-->|.*)/m)
encoder.text_token match, :comment
- elsif match = scan(/<!DOCTYPE.*?>/m)
+ elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
encoder.text_token match, :doctype
- elsif match = scan(/<\?xml.*?\?>/m)
+ elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
encoder.text_token match, :preprocessor
- elsif match = scan(/<\?.*?\?>|<%.*?%>/m)
+ elsif match = scan(/<\?(?:.*?\?>|.*)|<%(?:.*?%>|.*)/m)
encoder.text_token match, :comment
- elsif match = scan(/<\/[-\w.:]*>/m)
+ elsif match = scan(/<\/[-\w.:]*>?/m)
encoder.text_token match, :tag
- elsif match = scan(/<[-\w.:]+>?/m)
+ in_tag = nil
+ elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
encoder.text_token match, :tag
- state = :attribute unless match[-1] == ?>
+ in_tag = self[1]
+ state = :attribute unless self[2]
elsif match = scan(/[^<>&]+/)
encoder.text_token match, :plain
elsif match = scan(/#{ENTITY}/ox)
@@ -92,10 +118,14 @@ module Scanners
end
when :attribute
- if match = scan(/#{TAG_END}/)
+ if match = scan(/#{TAG_END}/o)
encoder.text_token match, :tag
+ in_attribute = nil
state = :initial
elsif match = scan(/#{ATTR_NAME}/o)
+ if match.downcase == 'onclick'
+ in_attribute = 'script'
+ end
encoder.text_token match, :attribute_name
state = :attribute_equal
else
@@ -106,11 +136,9 @@ module Scanners
if match = scan(/=/)
encoder.text_token match, :operator
state = :attribute_value
- elsif match = scan(/#{ATTR_NAME}/o)
- encoder.text_token match, :attribute_name
- elsif match = scan(/#{TAG_END}/o)
- encoder.text_token match, :tag
- state = :initial
+ elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o)
+ state = :attribute
+ next
else
encoder.text_token getch, :error
state = :attribute
@@ -121,11 +149,26 @@ module Scanners
encoder.text_token match, :attribute_value
state = :attribute
elsif match = scan(/["']/)
- encoder.begin_group :string
- state = :attribute_value_string
- plain_string_content = PLAIN_STRING_CONTENT[match]
- encoder.text_token match, :delimiter
- elsif scan(/#{TAG_END}/o)
+ if in_attribute == 'script'
+ encoder.begin_group :inline
+ encoder.text_token match, :inline_delimiter
+ if scan(/javascript:\s*/)
+ encoder.text_token matched, :comment
+ end
+ code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
+ scan_java_script encoder, code
+ match = scan(/["']/)
+ encoder.text_token match, :inline_delimiter if match
+ encoder.end_group :inline
+ state = :attribute
+ in_attribute = nil
+ else
+ encoder.begin_group :string
+ state = :attribute_value_string
+ plain_string_content = PLAIN_STRING_CONTENT[match]
+ encoder.text_token match, :delimiter
+ end
+ elsif match = scan(/#{TAG_END}/o)
encoder.text_token match, :tag
state = :initial
else