From d9d6dd5f4a73363ea5d353ecda142f77ed4eba5a Mon Sep 17 00:00:00 2001 From: Gavin Kistner Date: Thu, 6 Jun 2013 10:43:19 -0600 Subject: Support CDATA blocks in HTML/XML --- lib/coderay/scanners/html.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib/coderay/scanners/html.rb') diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 3ba3b79..fcf249a 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -99,7 +99,17 @@ module Scanners case state when :initial - if match = scan(/|.*)/m) + if match = scan(//m) + encoder.text_token match[0..-4], :content + encoder.text_token ']]>', :delimiter + else + encoder.text_token scan(/.*/m), :error + end + encoder.end_group :string + elsif match = scan(/|.*)/m) encoder.text_token match, :comment elsif match = scan(/|.*)|\]>/m) encoder.text_token match, :doctype -- cgit v1.2.1 From 14f3a4f28341237503a51bfa0764c922bb02bdfa Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 14:13:15 +0200 Subject: tweak HTML CDATA token kinds --- lib/coderay/scanners/html.rb | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'lib/coderay/scanners/html.rb') diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index fcf249a..530ce44 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,13 +1,13 @@ module CodeRay module Scanners - + # HTML Scanner # # Alias: +xhtml+ # # See also: Scanners::XML class HTML < Scanner - + register_for :html KINDS_NOT_LOC = [ @@ -100,15 +100,13 @@ module Scanners when :initial if match = scan(//m) - encoder.text_token match[0..-4], :content - encoder.text_token ']]>', :delimiter - else - encoder.text_token scan(/.*/m), :error + encoder.text_token match[0..-4], :plain + encoder.text_token ']]>', :inline_delimiter + elsif match = scan(/.+/) + encoder.text_token match, :error end - encoder.end_group :string elsif match = scan(/|.*)/m) encoder.text_token match, :comment elsif match = scan(/|.*)|\]>/m) -- cgit v1.2.1 From 5b0dc0f35090949d7512a85dadc8cf2871b91aac Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 14:15:01 +0200 Subject: scan_css in HTML scanner (tags) --- lib/coderay/scanners/html.rb | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'lib/coderay/scanners/html.rb') diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 3ba3b79..06728e4 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -75,9 +75,14 @@ module Scanners def scan_java_script encoder, code if code && !code.empty? @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true - # encoder.begin_group :inline @java_script_scanner.tokenize code, :tokens => encoder - # encoder.end_group :inline + end + end + + def scan_css encoder, code + if code && !code.empty? + @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true + @css_scanner.tokenize code, :tokens => encoder end end @@ -110,7 +115,7 @@ module Scanners elsif match = scan(/<\/[-\w.:]*>?/m) in_tag = nil encoder.text_token match, :tag - elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m) + elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m) encoder.text_token match, :tag in_tag = self[1] if self[2] @@ -206,19 +211,23 @@ module Scanners when :in_special_tag case in_tag - when 'script' + when 'script', 'style' encoder.text_token match, :space if match = scan(/[ \t]*\n/) if scan(/(\s*)|(.*))/m) code = self[2] || self[4] closing = self[3] encoder.text_token self[1], :comment else - code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/) + code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/) closing = false end unless code.empty? encoder.begin_group :inline - scan_java_script encoder, code + if in_tag == 'script' + scan_java_script encoder, code + else + scan_css encoder, code + end encoder.end_group :inline end encoder.text_token closing, :comment if closing -- cgit v1.2.1 From 7ada74c8f0f77815393e887db83e1d2c36ce7235 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Sun, 23 Jun 2013 14:32:14 +0200 Subject: scan_css in HTML scanner (arguments), change token kind from :inline to :string --- lib/coderay/scanners/html.rb | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'lib/coderay/scanners/html.rb') diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 06728e4..f1dfba0 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -33,7 +33,8 @@ module Scanners ) IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). - add(EVENT_ATTRIBUTES, :script) + add(EVENT_ATTRIBUTES, :script). + add(['style'], :style) ATTR_NAME = /[\w.:-]+/ # :nodoc: TAG_END = /\/?>/ # :nodoc: @@ -79,10 +80,10 @@ module Scanners end end - def scan_css encoder, code + def scan_css encoder, code, state = [:initial] if code && !code.empty? @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true - @css_scanner.tokenize code, :tokens => encoder + @css_scanner.tokenize code, :tokens => encoder, :state => state end end @@ -166,17 +167,21 @@ module Scanners encoder.text_token match, :attribute_value state = :attribute elsif match = scan(/["']/) - if in_attribute == :script - encoder.begin_group :inline - encoder.text_token match, :inline_delimiter + if in_attribute == :script || in_attribute == :style + encoder.begin_group :string + encoder.text_token match, :delimiter if scan(/javascript:[ \t]*/) encoder.text_token matched, :comment end code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/) - scan_java_script encoder, code + if in_attribute == :script + scan_java_script encoder, code + else + scan_css encoder, code, [:block] + end match = scan(/["']/) - encoder.text_token match, :inline_delimiter if match - encoder.end_group :inline + encoder.text_token match, :delimiter if match + encoder.end_group :string state = :attribute in_attribute = nil else -- cgit v1.2.1