diff options
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 13 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 36 |
2 files changed, 29 insertions, 20 deletions
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index b8cba97..cb9637b 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -130,14 +130,14 @@ module Scanners if match = scan(/[ \t\f]+/) kind = :space match << scan(/\s*/) unless eos? || heredocs - value_expected = true if match.index(?\n) # FIXME not quite true + value_expected = true if match.index(?\n) tokens << [match, kind] next elsif match = scan(/\\?\n/) kind = :space if match == "\n" - value_expected = true # FIXME not quite true + value_expected = true state = :initial if state == :undef_comma_expected end if heredocs @@ -159,7 +159,6 @@ module Scanners elsif match = scan(/\#.*/) or ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) kind = :comment - value_expected = true tokens << [match, kind] next @@ -176,9 +175,9 @@ module Scanners kind = :constant elsif kind == :reserved state = patterns::DEF_NEW_STATE[match] + value_expected = :set if patterns::VALUE_EXPECTING_KEYWORDS[match] end end - ## experimental! value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o) @@ -186,7 +185,6 @@ module Scanners value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) # OPERATORS # - # TODO: match (), [], {} as one single operator elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x) if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ value_expected = :set @@ -289,6 +287,7 @@ module Scanners kind = :error match = (scan(/./mu) rescue nil) || getch if !unicode && match.size > 1 + # warn 'Switchig to unicode mode: %p' % ['ä'[/#{patterns::METHOD_NAME}/uo]] unicode = true unscan next @@ -298,6 +297,10 @@ module Scanners elsif state == :def_expected state = :initial + if scan(/self\./) + tokens << ['self', :pre_constant] + tokens << ['.', :operator] + end if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo : /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) kind = :method diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index cf5f8c1..51ceba3 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 module CodeRay module Scanners @@ -31,7 +32,7 @@ module Scanners add(RESERVED_WORDS, :reserved). add(PREDEFINED_CONSTANTS, :pre_constant) - IDENT = /[^\W\d]\w*/ + IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/ METHOD_NAME = / #{IDENT} [?!]? /ox METHOD_NAME_OPERATOR = / @@ -59,7 +60,7 @@ module Scanners QUOTE_TO_TYPE.default = :string REGEXP_MODIFIERS = /[mixounse]*/ - REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ + REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/ DECIMAL = /\d+(?:_\d+)*/ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ @@ -141,14 +142,19 @@ module Scanners | #{CHARACTER} ) /x + VALUE_EXPECTING_KEYWORDS = WordList.new.add(%w[ + and end in or unless begin + defined? ensure redo super until + break do next rescue then + when case else for retry + while elsif if not return + yield + ]) RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x - # FIXME: \s and = are only a workaround, they are still allowed - # as delimiters. - FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx FancyStringType = { @@ -174,15 +180,15 @@ module Scanners CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with << OPENING_PAREN = CLOSING_PAREN.invert - STRING_PATTERN = Hash.new { |h, k| + STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k delim_pattern = Regexp.escape(delim) if closing_paren = CLOSING_PAREN[delim] delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix delim_pattern << Regexp.escape(closing_paren) end - - + delim_pattern << '\\\\' unless delim == '\\' + special_escapes = case interpreted when :regexp_symbols @@ -190,16 +196,16 @@ module Scanners when :words '| \s' end - + h[k] = if interpreted and not delim == '#' - / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx + / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx else - / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx + / (?= [#{delim_pattern}] #{special_escapes} ) /mx end - } + end - HEREDOC_PATTERN = Hash.new { |h, k| + HEREDOC_PATTERN = Hash.new do |h, k| delim, interpreted, indented = *k delim_pattern = Regexp.escape(delim.dup) delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x @@ -209,12 +215,12 @@ module Scanners else / (?= #{delim_pattern}() | \\ ) /mx end - } + end def initialize kind, interpreted, delim, heredoc = false if heredoc pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] - delim = nil + delim = nil else pattern = STRING_PATTERN[ [delim, interpreted] ] if paren = CLOSING_PAREN[delim] |