summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners/ruby.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners/ruby.rb')
-rw-r--r--lib/coderay/scanners/ruby.rb333
1 files changed, 333 insertions, 0 deletions
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
new file mode 100644
index 0000000..433726b
--- /dev/null
+++ b/lib/coderay/scanners/ruby.rb
@@ -0,0 +1,333 @@
+module CodeRay module Scanners
+
+ # This scanner is really complex, since Ruby _is_ a complex language!
+ #
+ # It tries to highlight 100% of all common code,
+ # and 90% of strange codes.
+ #
+ # It is optimized for HTML highlighting, and is not very useful for
+ # parsing or pretty printing.
+ #
+ # For now, I think it's better than the scanners in VIM or Syntax, or
+ # any highlighter I was able to find, except Caleb's RubyLexer.
+ #
+ # I hope it's also better than the rdoc/irb lexer.
+ class Ruby < Scanner
+
+ include Streamable
+
+ register_for :ruby
+
+ require 'coderay/scanners/helpers/ruby_helper'
+
+ DEFAULT_OPTIONS = {
+ :parse_regexps => true,
+ }
+
+ private
+ def scan_tokens tokens, options
+ parse_regexp = false # options[:parse_regexps]
+ first_bake = saved_tokens = nil
+ last_token_dot = false
+ fancy_allowed = regexp_allowed = true
+ heredocs = nil
+ last_state = nil
+ state = :initial
+ depth = nil
+ states = []
+
+ until eos?
+ type = :error
+ match = nil
+ kind = nil
+
+ if state.instance_of? StringState
+# {{{
+
+ match = scan_until(state.pattern) || scan_until(/\z/)
+ tokens << [match, :content] unless match.empty?
+ break if eos?
+
+ if state.heredoc and self[1]
+ match = getch + scan_until(/$/)
+ tokens << [match, :delimiter]
+ tokens << [:close, state.type]
+ state = :initial
+ next
+ end
+
+ case match = getch
+
+ when state.delim
+ if state.paren
+ state.paren_depth -= 1
+ if state.paren_depth > 0
+ tokens << [match, :nesting_delimiter]
+ next
+ end
+ end
+ tokens << [match, :delimiter]
+ if state.type == :regexp and not eos?
+ modifiers = scan(/#{REGEXP_MODIFIERS}/ox)
+ tokens << [modifiers, :modifier] unless modifiers.empty?
+ if parse_regexp
+ extended = modifiers.index ?x
+ tokens, regexp = saved_tokens, tokens
+ for text, type in regexp
+ if text.is_a? String
+ case type
+ when :content
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
+ if plain
+ tokens << [plain, :content]
+ else
+ tokens << [comment, :comment]
+ end
+ end
+ when :character
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
+ tokens << [text, :modifier]
+ else
+ tokens << [text, type]
+ end
+ else
+ tokens << [text, type]
+ end
+ else
+ tokens << [text, type]
+ end
+ end
+ first_bake = saved_tokens = nil
+ end
+ end
+ tokens << [:close, state.type]
+ fancy_allowed = regexp_allowed = false
+ state = :initial
+
+ when '\\'
+ if state.interpreted
+ if esc = scan(/ #{ESCAPE} /ox)
+ tokens << [match + esc, :char]
+ else
+ tokens << [match, :error]
+ end
+ else
+ case m = getch
+ when state.delim, '\\'
+ tokens << [match + m, :char]
+ else
+ tokens << [match + m, :content]
+ end
+ end
+
+ when '#'
+ case peek(1)[0]
+ when ?{
+ states.push [state, depth, heredocs]
+ fancy_allowed = regexp_allowed = true
+ state, depth = :initial, 1
+ tokens << [match + getch, :escape]
+ when ?$, ?@
+ tokens << [match, :escape]
+ last_state = state # scan one token as normal code, then return here
+ state = :initial
+ else
+ raise "else-case # reached; #%p not handled" % peek(1), tokens
+ end
+
+ when state.paren
+ state.paren_depth += 1
+ tokens << [match, :nesting_delimiter]
+
+ when REGEXP_SYMBOLS
+ tokens << [match, :function]
+
+ else
+ raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens
+
+ end
+ next
+# }}}
+ else
+# {{{
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
+ ( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) )
+ fancy_allowed = true
+ case m = match[0]
+ when ?\s, ?\t, ?\f
+ match << scan(/\s*/) unless eos? or heredocs
+ type = :space
+ when ?\n, ?\\
+ type = :space
+ regexp_allowed = m == ?\n
+ if heredocs
+ unscan # heredoc scanning needs \n at start
+ state = heredocs.shift
+ tokens << [:open, state.type]
+ heredocs = nil if heredocs.empty?
+ next
+ else
+ match << scan(/\s*/) unless eos?
+ end
+ when ?#, ?=, ?_
+ type = :comment
+ regexp_allowed = true
+ else
+ raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens
+ end
+ tokens << [match, type]
+ next
+
+ elsif state == :initial
+ if match = scan(/ \.\.?\.? | [-+*=>;,|&!\(\)\[\]~^]+ | [\{\}] | :: /x)
+ if match !~ / [.\)\]\}] \z/x or match =~ /\.\.\.?/
+ regexp_allowed = fancy_allowed = :set
+ end
+ last_token_dot = :set if match == '.' or match == '::'
+ type = :operator
+ unless states.empty?
+ case match
+ when '{'
+ depth += 1
+ when '}'
+ depth -= 1
+ if depth == 0
+ state, depth, heredocs = *states.pop
+ type = :escape
+ end
+ end
+ end
+
+ elsif match = scan(/#{METHOD_NAME}/o)
+ if last_token_dot
+ type = if match[/^[A-Z]/] then :constant else :ident end
+ else
+ type = IDENT_KIND[match]
+ if type == :ident and match[/^[A-Z]/]
+ type = :constant
+ elsif type == :reserved
+ state = DEF_NEW_STATE[match]
+ end
+ end
+ fancy_allowed = regexp_allowed = REGEXP_ALLOWED[match]
+
+ elsif match = scan(/ ['"] /mx)
+ tokens << [:open, :string]
+ type = :delimiter
+ state = StringState.new :string, match != '\'', match.dup # important for streaming
+
+ elsif match = scan(/#{INSTANCE_VARIABLE}/o)
+ type = :instance_variable
+
+ elsif regexp_allowed and match = scan(/ \/ /mx)
+ tokens << [:open, :regexp]
+ type = :delimiter
+ interpreted = true
+ state = StringState.new :regexp, interpreted, match.dup
+ if parse_regexp
+ tokens, saved_tokens = [], tokens
+ end
+
+ elsif match = scan(/#{NUMERIC}/o)
+ type = if match[/#{FLOAT}/o] then :float else :integer end
+
+ elsif fancy_allowed and match = scan(/#{SYMBOL}/o)
+ case match[1]
+ when ?', ?"
+ tokens << [:open, :symbol]
+ state = StringState.new :symbol, match[1] == ?", match[1,1]
+ end
+ type = :symbol
+
+ elsif fancy_allowed and match = scan(/#{HEREDOC_OPEN}/o)
+ indented, quote = self[1] == '-', self[3]
+ delim = self[quote ? 4 : 2]
+ type = QUOTE_TO_TYPE[quote]
+ tokens << [:open, type]
+ tokens << [match, :delimiter]
+ match = :close
+ heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
+ heredocs ||= [] # create heredocs if empty
+ heredocs << heredoc
+
+ elsif fancy_allowed and match = scan(/#{FANCY_START}/o)
+ type, interpreted = *FancyStringType.fetch(self[1]) do
+ raise 'Unknown fancy string: %%%p' % k, tokens
+ end
+ tokens << [:open, type]
+ state = StringState.new type, interpreted, self[2]
+ type = :delimiter
+
+ elsif fancy_allowed and match = scan(/#{CHARACTER}/o)
+ type = :integer
+
+ elsif match = scan(/ [\/%<?:] /x)
+ regexp_allowed = fancy_allowed = :set
+ type = :operator
+
+ elsif match = scan(/`/)
+ if last_token_dot
+ type = :operator
+ else
+ tokens << [:open, :shell]
+ type = :delimiter
+ state = StringState.new :shell, true, '`'
+ end
+
+ elsif match = scan(/#{GLOBAL_VARIABLE}/o)
+ type = :global_variable
+
+ elsif match = scan(/#{CLASS_VARIABLE}/o)
+ type = :class_variable
+
+ else
+ match = getch
+
+ end
+
+ elsif state == :def_expected
+ if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox)
+ type = :method
+ else
+ match = getch
+ end
+ state = :initial
+
+ elsif state == :module_expected
+ if match = scan(/<</)
+ type = :operator
+ else
+ if match = scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
+ type = :class
+ else
+ match = getch
+ end
+ end
+ state = :initial
+
+ end
+
+ regexp_allowed = regexp_allowed == :set
+ fancy_allowed = fancy_allowed == :set
+ last_token_dot = last_token_dot == :set
+
+ if $DEBUG
+ raise_inspect 'error token %p in line %d' % [tokens.last, line], tokens if not type or type == :error
+ end
+
+ tokens << [match, type]
+
+ if last_state
+ state = last_state
+ last_state = nil
+ end
+# }}}
+ end
+ end
+
+ tokens
+ end
+ end
+
+end end
+# vim:fdm=marker