From 196765788f6f03fb0754e71c7038669377797374 Mon Sep 17 00:00:00 2001 From: murphy Date: Sun, 9 Jul 2006 23:18:40 +0000 Subject: Fixed another bug in the Ruby scanner, this time it was unfinished heredocs with empty delimiter. Fixed documentation uploading. --- lib/coderay/scanners/ruby.rb | 729 ++++++++++++++++++++++--------------------- 1 file changed, 365 insertions(+), 364 deletions(-) (limited to 'lib/coderay/scanners/ruby.rb') diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 2a415eb..7ba3029 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -1,397 +1,398 @@ module CodeRay module Scanners - # This scanner is really complex, since Ruby _is_ a complex language! - # - # It tries to highlight 100% of all common code, - # and 90% of strange codes. - # - # It is optimized for HTML highlighting, and is not very useful for - # parsing or pretty printing. - # - # For now, I think it's better than the scanners in VIM or Syntax, or - # any highlighter I was able to find, except Caleb's RubyLexer. - # - # I hope it's also better than the rdoc/irb lexer. - class Ruby < Scanner + # This scanner is really complex, since Ruby _is_ a complex language! + # + # It tries to highlight 100% of all common code, + # and 90% of strange codes. + # + # It is optimized for HTML highlighting, and is not very useful for + # parsing or pretty printing. + # + # For now, I think it's better than the scanners in VIM or Syntax, or + # any highlighter I was able to find, except Caleb's RubyLexer. + # + # I hope it's also better than the rdoc/irb lexer. + class Ruby < Scanner - include Streamable + include Streamable - register_for :ruby + register_for :ruby - helper :patterns - - DEFAULT_OPTIONS = { - :parse_regexps => true, - } + helper :patterns + + DEFAULT_OPTIONS = { + :parse_regexps => true, + } - private - def scan_tokens tokens, options - parse_regexp = false # options[:parse_regexps] - first_bake = saved_tokens = nil - last_token_dot = false - fancy_allowed = regexp_allowed = true - heredocs = nil - last_state = nil - state = :initial - depth = nil - states = [] + private + def scan_tokens tokens, options + parse_regexp = false # options[:parse_regexps] + first_bake = saved_tokens = nil + last_token_dot = false + fancy_allowed = regexp_allowed = true + heredocs = nil + last_state = nil + state = :initial + depth = nil + states = [] - patterns = Patterns # avoid constant lookup + patterns = Patterns # avoid constant lookup - until eos? - type = :error - match = nil - kind = nil + until eos? + type = :error + match = nil + kind = nil - if state.instance_of? patterns::StringState + if state.instance_of? patterns::StringState # {{{ - match = scan_until(state.pattern) || scan_until(/\z/) - tokens << [match, :content] unless match.empty? - break if eos? - - if state.heredoc and self[1] - match = getch + scan_until(/$/) - tokens << [match, :delimiter] - tokens << [:close, state.type] - state = state.next_state - next - end - - case match = getch - - when state.delim - if state.paren - state.paren_depth -= 1 - if state.paren_depth > 0 - tokens << [match, :nesting_delimiter] - next - end - end - tokens << [match, :delimiter] - if state.type == :regexp and not eos? - modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) - tokens << [modifiers, :modifier] unless modifiers.empty? - if parse_regexp - extended = modifiers.index ?x - tokens = saved_tokens - regexp = tokens - for text, type in regexp - if text.is_a? ::String - case type - when :content - text.scan(/([^#]+)|(#.*)/) do |plain, comment| - if plain - tokens << [plain, :content] - else - tokens << [comment, :comment] - end - end - when :character - if text[/\\(?:[swdSWDAzZbB]|\d+)/] - tokens << [text, :modifier] - else - tokens << [text, type] - end - else - tokens << [text, type] - end - else - tokens << [text, type] - end - end - first_bake = saved_tokens = nil - end - end - tokens << [:close, state.type] - fancy_allowed = regexp_allowed = false - state = state.next_state - - when '\\' - if state.interpreted - if esc = scan(/ #{patterns::ESCAPE} /ox) - tokens << [match + esc, :char] - else - tokens << [match, :error] - end - else - case m = getch - when state.delim, '\\' - tokens << [match + m, :char] + match = scan_until(state.pattern) || scan_until(/\z/) + tokens << [match, :content] unless match.empty? + break if eos? + + if state.heredoc and self[1] # end of heredoc + match = getch.to_s + match << scan_until(/$/) unless eos? + tokens << [match, :delimiter] + tokens << [:close, state.type] + state = state.next_state + next + end + + case match = getch + + when state.delim + if state.paren + state.paren_depth -= 1 + if state.paren_depth > 0 + tokens << [match, :nesting_delimiter] + next + end + end + tokens << [match, :delimiter] + if state.type == :regexp and not eos? + modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) + tokens << [modifiers, :modifier] unless modifiers.empty? + if parse_regexp + extended = modifiers.index ?x + tokens = saved_tokens + regexp = tokens + for text, type in regexp + if text.is_a? ::String + case type + when :content + text.scan(/([^#]+)|(#.*)/) do |plain, comment| + if plain + tokens << [plain, :content] + else + tokens << [comment, :comment] + end + end + when :character + if text[/\\(?:[swdSWDAzZbB]|\d+)/] + tokens << [text, :modifier] + else + tokens << [text, type] + end + else + tokens << [text, type] + end + else + tokens << [text, type] + end + end + first_bake = saved_tokens = nil + end + end + tokens << [:close, state.type] + fancy_allowed = regexp_allowed = false + state = state.next_state + + when '\\' + if state.interpreted + if esc = scan(/ #{patterns::ESCAPE} /ox) + tokens << [match + esc, :char] + else + tokens << [match, :error] + end + else + case m = getch + when state.delim, '\\' + tokens << [match + m, :char] when nil tokens << [match, :error] - else - tokens << [match + m, :content] - end - end - - when '#' - case peek(1)[0] - when ?{ - states.push [state, depth, heredocs] - fancy_allowed = regexp_allowed = true - state = :initial - depth = 1 - tokens << [:open, :inline] - tokens << [match + getch, :delimiter] - when ?$, ?@ - tokens << [match, :escape] - last_state = state # scan one token as normal code, then return here - state = :initial - else - raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens - end - - when state.paren - state.paren_depth += 1 - tokens << [match, :nesting_delimiter] + else + tokens << [match + m, :content] + end + end + + when '#' + case peek(1)[0] + when ?{ + states.push [state, depth, heredocs] + fancy_allowed = regexp_allowed = true + state = :initial + depth = 1 + tokens << [:open, :inline] + tokens << [match + getch, :delimiter] + when ?$, ?@ + tokens << [match, :escape] + last_state = state # scan one token as normal code, then return here + state = :initial + else + raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens + end + + when state.paren + state.paren_depth += 1 + tokens << [match, :nesting_delimiter] - when /#{patterns::REGEXP_SYMBOLS}/ox - tokens << [match, :function] - - else - raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens - - end - next + when /#{patterns::REGEXP_SYMBOLS}/ox + tokens << [match, :function] + + else + raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens + + end + next # }}} - else -# {{{ - if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or - ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) - fancy_allowed = true - case m = match[0] - when ?\s, ?\t, ?\f - match << scan(/\s*/) unless eos? or heredocs - type = :space - when ?\n, ?\\ - type = :space - if m == ?\n - regexp_allowed = true - state = :initial if state == :undef_comma_expected - end - if heredocs - unscan # heredoc scanning needs \n at start - state = heredocs.shift - tokens << [:open, state.type] - heredocs = nil if heredocs.empty? - next - else - match << scan(/\s*/) unless eos? - end - when ?#, ?=, ?_ - type = :comment - regexp_allowed = true - else - raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens - end - tokens << [match, type] - next + else +# {{{ + if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or + ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) + fancy_allowed = true + case m = match[0] + when ?\s, ?\t, ?\f + match << scan(/\s*/) unless eos? or heredocs + type = :space + when ?\n, ?\\ + type = :space + if m == ?\n + regexp_allowed = true + state = :initial if state == :undef_comma_expected + end + if heredocs + unscan # heredoc scanning needs \n at start + state = heredocs.shift + tokens << [:open, state.type] + heredocs = nil if heredocs.empty? + next + else + match << scan(/\s*/) unless eos? + end + when ?#, ?=, ?_ + type = :comment + regexp_allowed = true + else + raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens + end + tokens << [match, type] + next - elsif state == :initial - - # IDENTS # - if match = scan(/#{patterns::METHOD_NAME}/o) - if last_token_dot - type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end - else - type = patterns::IDENT_KIND[match] - if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) - type = :constant - elsif type == :reserved - state = patterns::DEF_NEW_STATE[match] - end - end - ## experimental! - fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) - - # OPERATORS # - elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or - (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) - if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ - regexp_allowed = fancy_allowed = :set - end - last_token_dot = :set if match == '.' or match == '::' - type = :operator - unless states.empty? - case match - when '{' - depth += 1 - when '}' - depth -= 1 - if depth == 0 - state, depth, heredocs = states.pop - tokens << [match, :delimiter] - type = :inline - match = :close - end - end - end - - elsif match = scan(/ ['"] /mx) - tokens << [:open, :string] - type = :delimiter - state = patterns::StringState.new :string, match == '"', match # important for streaming - - elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) - type = :instance_variable + elsif state == :initial + + # IDENTS # + if match = scan(/#{patterns::METHOD_NAME}/o) + if last_token_dot + type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end + else + type = patterns::IDENT_KIND[match] + if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) + type = :constant + elsif type == :reserved + state = patterns::DEF_NEW_STATE[match] + end + end + ## experimental! + fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) + + # OPERATORS # + elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or + (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) + if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ + regexp_allowed = fancy_allowed = :set + end + last_token_dot = :set if match == '.' or match == '::' + type = :operator + unless states.empty? + case match + when '{' + depth += 1 + when '}' + depth -= 1 + if depth == 0 + state, depth, heredocs = states.pop + tokens << [match, :delimiter] + type = :inline + match = :close + end + end + end + + elsif match = scan(/ ['"] /mx) + tokens << [:open, :string] + type = :delimiter + state = patterns::StringState.new :string, match == '"', match # important for streaming + + elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) + type = :instance_variable - elsif regexp_allowed and match = scan(/\//) - tokens << [:open, :regexp] - type = :delimiter - interpreted = true - state = patterns::StringState.new :regexp, interpreted, match - if parse_regexp - tokens = [] - saved_tokens = tokens - end - - elsif match = scan(/#{patterns::NUMERIC}/o) - type = if self[1] then :float else :integer end + elsif regexp_allowed and match = scan(/\//) + tokens << [:open, :regexp] + type = :delimiter + interpreted = true + state = patterns::StringState.new :regexp, interpreted, match + if parse_regexp + tokens = [] + saved_tokens = tokens + end + + elsif match = scan(/#{patterns::NUMERIC}/o) + type = if self[1] then :float else :integer end - elsif match = scan(/#{patterns::SYMBOL}/o) - case delim = match[1] - when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] - match = delim.chr - type = :delimiter - state = patterns::StringState.new :symbol, delim == ?", match - else - type = :symbol - end - - elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) - regexp_allowed = fancy_allowed = :set - type = :operator - - elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) - indented = self[1] == '-' - quote = self[3] - delim = self[quote ? 4 : 2] - type = patterns::QUOTE_TO_TYPE[quote] - tokens << [:open, type] - tokens << [match, :delimiter] - match = :close - heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) - heredocs ||= [] # create heredocs if empty - heredocs << heredoc - - elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) - type, interpreted = *patterns::FancyStringType.fetch(self[1]) do - raise_inspect 'Unknown fancy string: %%%p' % k, tokens - end - tokens << [:open, type] - state = patterns::StringState.new type, interpreted, self[2] - type = :delimiter + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + type = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + else + type = :symbol + end + + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) + regexp_allowed = fancy_allowed = :set + type = :operator + + elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) + indented = self[1] == '-' + quote = self[3] + delim = self[quote ? 4 : 2] + type = patterns::QUOTE_TO_TYPE[quote] + tokens << [:open, type] + tokens << [match, :delimiter] + match = :close + heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) + heredocs ||= [] # create heredocs if empty + heredocs << heredoc + + elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) + type, interpreted = *patterns::FancyStringType.fetch(self[1]) do + raise_inspect 'Unknown fancy string: %%%p' % k, tokens + end + tokens << [:open, type] + state = patterns::StringState.new type, interpreted, self[2] + type = :delimiter - elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) - type = :integer + elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) + type = :integer - elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) - regexp_allowed = fancy_allowed = :set - type = :operator + elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) + regexp_allowed = fancy_allowed = :set + type = :operator - elsif match = scan(/`/) - if last_token_dot - type = :operator - else - tokens << [:open, :shell] - type = :delimiter - state = patterns::StringState.new :shell, true, match - end - - elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) - type = :global_variable - - elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) - type = :class_variable - - else - match = getch - - end - - elsif state == :def_expected - state = :initial - if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) - type = :method - else - next - end + elsif match = scan(/`/) + if last_token_dot + type = :operator + else + tokens << [:open, :shell] + type = :delimiter + state = patterns::StringState.new :shell, true, match + end + + elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) + type = :global_variable + + elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) + type = :class_variable + + else + match = getch + + end + + elsif state == :def_expected + state = :initial + if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) + type = :method + else + next + end - elsif state == :undef_expected - state = :undef_comma_expected - if match = scan(/#{patterns::METHOD_NAME_EX}/o) - type = :method - elsif match = scan(/#{patterns::SYMBOL}/o) - case delim = match[1] - when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] - match = delim.chr - type = :delimiter - state = patterns::StringState.new :symbol, delim == ?", match - state.next_state = :undef_comma_expected - else - type = :symbol - end - else - state = :initial - next - end - - elsif state == :undef_comma_expected - if match = scan(/,/) - type = :operator - state = :undef_expected - else - state = :initial - next - end + elsif state == :undef_expected + state = :undef_comma_expected + if match = scan(/#{patterns::METHOD_NAME_EX}/o) + type = :method + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + type = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + state.next_state = :undef_comma_expected + else + type = :symbol + end + else + state = :initial + next + end + + elsif state == :undef_comma_expected + if match = scan(/,/) + type = :operator + state = :undef_expected + else + state = :initial + next + end - elsif state == :module_expected - if match = scan(/<