diff options
author | murphy <murphy@rubychan.de> | 2011-03-07 23:07:35 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2011-03-07 23:07:35 +0000 |
commit | 33f4da87ee0fb3d225500e0985dd92e359afeb5f (patch) | |
tree | 9e0f34595f7a7c4e8e0c4a0300e83047dd49fae8 /lib/coderay/scanners/html.rb | |
parent | 583e7a57d56f1c212b36b1d26718382f8e0748bc (diff) | |
download | coderay-33f4da87ee0fb3d225500e0985dd92e359afeb5f.tar.gz |
#46: Integrating JavaScript into HTML scanner.
Diffstat (limited to 'lib/coderay/scanners/html.rb')
-rw-r--r-- | lib/coderay/scanners/html.rb | 79 |
1 files changed, 61 insertions, 18 deletions
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 7fab3cc..7cc9c9c 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -53,10 +53,20 @@ module Scanners @plain_string_content = nil end + def scan_java_script encoder, code + if code && !code.empty? + @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true + # encoder.begin_group :inline + @java_script_scanner.tokenize code, :tokens => encoder + # encoder.end_group :inline + end + end + def scan_tokens encoder, options state = @state plain_string_content = @plain_string_content + in_tag = in_attribute = nil until eos? @@ -68,19 +78,35 @@ module Scanners case state when :initial - if match = scan(/<!--.*?-->/m) + case in_tag + when 'script' + if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m) + code = self[2] || self[4] + closing = self[3] + encoder.text_token self[1], :comment + else + code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/) + closing = false + end + scan_java_script encoder, code + encoder.text_token closing, :comment if closing + end + next if eos? + if match = scan(/<!--(?:.*?-->|.*)/m) encoder.text_token match, :comment - elsif match = scan(/<!DOCTYPE.*?>/m) + elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m) encoder.text_token match, :doctype - elsif match = scan(/<\?xml.*?\?>/m) + elsif match = scan(/<\?xml(?:.*?\?>|.*)/m) encoder.text_token match, :preprocessor - elsif match = scan(/<\?.*?\?>|<%.*?%>/m) + elsif match = scan(/<\?(?:.*?\?>|.*)|<%(?:.*?%>|.*)/m) encoder.text_token match, :comment - elsif match = scan(/<\/[-\w.:]*>/m) + elsif match = scan(/<\/[-\w.:]*>?/m) encoder.text_token match, :tag - elsif match = scan(/<[-\w.:]+>?/m) + in_tag = nil + elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m) encoder.text_token match, :tag - state = :attribute unless match[-1] == ?> + in_tag = self[1] + state = :attribute unless self[2] elsif match = scan(/[^<>&]+/) encoder.text_token match, :plain elsif match = scan(/#{ENTITY}/ox) @@ -92,10 +118,14 @@ module Scanners end when :attribute - if match = scan(/#{TAG_END}/) + if match = scan(/#{TAG_END}/o) encoder.text_token match, :tag + in_attribute = nil state = :initial elsif match = scan(/#{ATTR_NAME}/o) + if match.downcase == 'onclick' + in_attribute = 'script' + end encoder.text_token match, :attribute_name state = :attribute_equal else @@ -106,11 +136,9 @@ module Scanners if match = scan(/=/) encoder.text_token match, :operator state = :attribute_value - elsif match = scan(/#{ATTR_NAME}/o) - encoder.text_token match, :attribute_name - elsif match = scan(/#{TAG_END}/o) - encoder.text_token match, :tag - state = :initial + elsif scan(/#{ATTR_NAME}/o) || scan(/#{TAG_END}/o) + state = :attribute + next else encoder.text_token getch, :error state = :attribute @@ -121,11 +149,26 @@ module Scanners encoder.text_token match, :attribute_value state = :attribute elsif match = scan(/["']/) - encoder.begin_group :string - state = :attribute_value_string - plain_string_content = PLAIN_STRING_CONTENT[match] - encoder.text_token match, :delimiter - elsif scan(/#{TAG_END}/o) + if in_attribute == 'script' + encoder.begin_group :inline + encoder.text_token match, :inline_delimiter + if scan(/javascript:\s*/) + encoder.text_token matched, :comment + end + code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/) + scan_java_script encoder, code + match = scan(/["']/) + encoder.text_token match, :inline_delimiter if match + encoder.end_group :inline + state = :attribute + in_attribute = nil + else + encoder.begin_group :string + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + encoder.text_token match, :delimiter + end + elsif match = scan(/#{TAG_END}/o) encoder.text_token match, :tag state = :initial else |