From 5ee15661dbc2da70927f588e310315233aff6eea Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 10 Apr 2006 03:06:50 +0000 Subject: Large update: Scanners for HTML, RHTML and Nitro-XHTML added. CSS style changes/enhancments (mainly the new background color for inline code, affects all Ruby code.) Demos and tests adjusted. Plugin: new PluginHost::default method. Scanner: - New setup method - ability to re-use a scanner - ability to keep the tokens - minor changes to token caching and string flattening Encoder: Error if token content is neither String nor Symbol. HTML encoder: - more warnings for unclosed tokens - output now UTF-8 Ruby Scanner: - bug: symbols before => now do not include =; {:foo=>bar} is valid Ruby code - try to close all open tokens - constants now all with specific namespace (for speed, I hope) Styles: new :entity/en class. Test suite now gives hinted HTML output. --- lib/coderay/scanners/_map.rb | 4 +- lib/coderay/scanners/html.rb | 57 ++++++++++------ lib/coderay/scanners/nitro_html.rb | 123 ++++++++++++++++++++++++++++++++++ lib/coderay/scanners/rhtml.rb | 63 +++++++++++++++++ lib/coderay/scanners/ruby.rb | 35 ++++++---- lib/coderay/scanners/ruby/patterns.rb | 2 +- 6 files changed, 248 insertions(+), 36 deletions(-) create mode 100644 lib/coderay/scanners/nitro_html.rb create mode 100644 lib/coderay/scanners/rhtml.rb (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index fc53d91..f6e4452 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -6,7 +6,9 @@ module CodeRay :pascal => :delphi, :irb => :ruby, :xml => :html, - :xhtml => :html + :xhtml => :nitro_html + + default :plain end end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 62da13b..a1efa9e 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,8 +1,8 @@ -#require 'coderay/common_patterns' - module CodeRay module Scanners # HTML Scanner + # + # $Id$ class HTML < Scanner include Streamable @@ -27,10 +27,21 @@ module CodeRay module Scanners ; /ox + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + private + def setup + @state = :initial + @plain_string_content = nil + end + def scan_tokens tokens, options - - state = :initial + + state = @state + plain_string_content = @plain_string_content until eos? @@ -55,17 +66,13 @@ module CodeRay module Scanners kind = :comment elsif scan(/<\/[-\w_.:]*>/m) kind = :tag - elsif match = scan(/<[-\w_.:]*/m) + elsif match = scan(/<[-\w_.:]*>?/m) kind = :tag - if match?(/>/) - match << getch - else - state = :attribute - end + state = :attribute unless match[-1] == ?> elsif scan(/[^<>&]+/) kind = :plain elsif scan(/#{ENTITY}/ox) - kind = :char + kind = :entity elsif scan(/>/) kind = :error else @@ -79,6 +86,8 @@ module CodeRay module Scanners elsif scan(/#{ATTR_NAME}/o) kind = :attribute_name state = :attribute_equal + else + getch end when :attribute_equal @@ -98,29 +107,32 @@ module CodeRay module Scanners if scan(/#{ATTR_VALUE_UNQUOTED}/o) kind = :attribute_value state = :attribute - elsif scan(/"/) + elsif match = scan(/["']/) tokens << [:open, :string] state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] kind = :delimiter elsif scan(/#{TAG_END}/o) kind = :tag state = :initial + else + getch end when :attribute_value_string - if scan(/[^"&\n]+/) + if scan(plain_string_content) kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] + elsif scan(/['"]/) + tokens << [matched, :delimiter] tokens << [:close, :string] state = :attribute next elsif scan(/#{ENTITY}/ox) - kind = :char - elsif match(/\n/) + kind = :entity + elsif match(/[\n>]/) tokens << [:close, :string] - state = :attribute - next + kind = error + state = :initial end else @@ -136,10 +148,15 @@ module CodeRay module Scanners [[match, kind], line], tokens end raise_inspect 'Empty token', tokens unless match - + tokens << [match, kind] end + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + tokens end diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb new file mode 100644 index 0000000..86d4992 --- /dev/null +++ b/lib/coderay/scanners/nitro_html.rb @@ -0,0 +1,123 @@ +module CodeRay module Scanners + + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class NitroHTML < Scanner + + include Streamable + register_for :nitro_html + + NITRO_RUBY_BLOCK = / + <\?r + (?> + [^\?]* + (?> \?(?!>) [^\?]* )* + ) + (?: \?> )? + | + + (?> + [^<]* + (?> <(?!\/ruby>) [^<]* )* + ) + (?: <\/ruby> )? + | + <% + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /mx + + NITRO_VALUE_BLOCK = / + \# + (?: + \{ + [^{}]* + (?> + \{ [^}]* \} + (?> [^{}]* ) + )* + \}? + | \| [^|]* \|? + | \( [^)]* \)? + | \[ [^\]]* \]? + | \\ [^\\]* \\? + ) + /x + + NITRO_ENTITY = / + % (?: \#\d+ | \w+ ) ; + / + + START_OF_RUBY = / + (?=[<\#%]) + < (?: \?r | % | ruby> ) + | \# [{(|] + | % (?: \#\d+ | \w+ ) ; + /x + + CLOSING_PAREN = Hash.new do |h, p| + h[p] = p + end.update( { + '(' => ')', + '[' => ']', + '{' => '}', + } ) + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) + start_tag = match[0,2] + delimiter = CLOSING_PAREN[start_tag[1,1]] + end_tag = match[-1,1] == delimiter ? delimiter : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) + start_tag = '' ? '?>' : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -(end_tag.size)-1] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif entity = scan(/#{NITRO_ENTITY}/o) + tokens << [entity, :entity] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb new file mode 100644 index 0000000..77a4366 --- /dev/null +++ b/lib/coderay/scanners/rhtml.rb @@ -0,0 +1,63 @@ +module CodeRay module Scanners + + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class RHTML < Scanner + + include Streamable + register_for :rhtml + + ERB_RUBY_BLOCK = / + <%(?!%)[=-]? + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /x + + START_OF_ERB = / + <%(?!%) + /x + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) + start_tag = match[/\A<%[-=]?/] + end_tag = match[/%?>?\z/] + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 810e1fd..9a33bef 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -36,12 +36,14 @@ module CodeRay module Scanners depth = nil states = [] + c = self.class + until eos? type = :error match = nil kind = nil - if state.instance_of? StringState + if state.instance_of? c::StringState # {{{ match = scan_until(state.pattern) || scan_until(/\z/) tokens << [match, :content] unless match.empty? @@ -74,7 +76,7 @@ module CodeRay module Scanners tokens = saved_tokens regexp = tokens for text, type in regexp - if text.is_a? String + if text.is_a? ::String case type when :content text.scan(/([^#]+)|(#.*)/) do |plain, comment| @@ -141,7 +143,7 @@ module CodeRay module Scanners state.paren_depth += 1 tokens << [match, :nesting_delimiter] - when REGEXP_SYMBOLS + when /#{REGEXP_SYMBOLS}/ox tokens << [match, :function] else @@ -190,15 +192,15 @@ module CodeRay module Scanners if last_token_dot type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end else - type = IDENT_KIND[match] + type = c::IDENT_KIND[match] if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) type = :constant elsif type == :reserved - state = DEF_NEW_STATE[match] + state = c::DEF_NEW_STATE[match] end end ## experimental! - fancy_allowed = regexp_allowed = :set if REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) + fancy_allowed = regexp_allowed = :set if c::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) # OPERATORS # elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or @@ -226,7 +228,7 @@ module CodeRay module Scanners elsif match = scan(/ ['"] /mx) tokens << [:open, :string] type = :delimiter - state = StringState.new :string, match == '"', match # important for streaming + state = c::StringState.new :string, match == '"', match # important for streaming elsif match = scan(/#{INSTANCE_VARIABLE}/o) type = :instance_variable @@ -235,7 +237,7 @@ module CodeRay module Scanners tokens << [:open, :regexp] type = :delimiter interpreted = true - state = StringState.new :regexp, interpreted, match + state = c::StringState.new :regexp, interpreted, match if parse_regexp tokens = [] saved_tokens = tokens @@ -251,7 +253,7 @@ module CodeRay module Scanners tokens << [':', :symbol] match = delim.chr type = :delimiter - state = StringState.new :symbol, delim == ?", match + state = c::StringState.new :symbol, delim == ?", match else type = :symbol end @@ -264,11 +266,11 @@ module CodeRay module Scanners indented = self[1] == '-' quote = self[3] delim = self[quote ? 4 : 2] - type = QUOTE_TO_TYPE[quote] + type = c::QUOTE_TO_TYPE[quote] tokens << [:open, type] tokens << [match, :delimiter] match = :close - heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) + heredoc = c::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) heredocs ||= [] # create heredocs if empty heredocs << heredoc @@ -277,7 +279,7 @@ module CodeRay module Scanners raise_inspect 'Unknown fancy string: %%%p' % k, tokens end tokens << [:open, type] - state = StringState.new type, interpreted, self[2] + state = c::StringState.new type, interpreted, self[2] type = :delimiter elsif fancy_allowed and match = scan(/#{CHARACTER}/o) @@ -293,7 +295,7 @@ module CodeRay module Scanners else tokens << [:open, :shell] type = :delimiter - state = StringState.new :shell, true, match + state = c::StringState.new :shell, true, match end elsif match = scan(/#{GLOBAL_VARIABLE}/o) @@ -326,7 +328,7 @@ module CodeRay module Scanners tokens << [':', :symbol] match = delim.chr type = :delimiter - state = StringState.new :symbol, delim == ?", match + state = c::StringState.new :symbol, delim == ?", match state.next_state = :undef_comma_expected else type = :symbol @@ -377,6 +379,11 @@ module CodeRay module Scanners end end + states << state if state.is_a? c::StringState + until states.empty? + tokens << [:close, states.pop.type] + end + tokens end end diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index c007d8c..7bf9103 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -46,7 +46,7 @@ module CodeRay module Scanners | <=?>? | >=? # comparison, rocket operator | ===? # simple equality and case equality /ox - METHOD_NAME_EX = / #{IDENT} [?!=]? | #{METHOD_NAME_OPERATOR} /ox + METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox INSTANCE_VARIABLE = / @ #{IDENT} /ox CLASS_VARIABLE = / @@ #{IDENT} /ox OBJECT_VARIABLE = / @@? #{IDENT} /ox -- cgit v1.2.1