summaryrefslogtreecommitdiff
path: root/lib/coderay/scanners
diff options
context:
space:
mode:
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r--lib/coderay/scanners/ruby.rb13
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb36
2 files changed, 29 insertions, 20 deletions
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index b8cba97..cb9637b 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -130,14 +130,14 @@ module Scanners
if match = scan(/[ \t\f]+/)
kind = :space
match << scan(/\s*/) unless eos? || heredocs
- value_expected = true if match.index(?\n) # FIXME not quite true
+ value_expected = true if match.index(?\n)
tokens << [match, kind]
next
elsif match = scan(/\\?\n/)
kind = :space
if match == "\n"
- value_expected = true # FIXME not quite true
+ value_expected = true
state = :initial if state == :undef_comma_expected
end
if heredocs
@@ -159,7 +159,6 @@ module Scanners
elsif match = scan(/\#.*/) or
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
kind = :comment
- value_expected = true
tokens << [match, kind]
next
@@ -176,9 +175,9 @@ module Scanners
kind = :constant
elsif kind == :reserved
state = patterns::DEF_NEW_STATE[match]
+ value_expected = :set if patterns::VALUE_EXPECTING_KEYWORDS[match]
end
end
- ## experimental!
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
@@ -186,7 +185,6 @@ module Scanners
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
# OPERATORS #
- # TODO: match (), [], {} as one single operator
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
value_expected = :set
@@ -289,6 +287,7 @@ module Scanners
kind = :error
match = (scan(/./mu) rescue nil) || getch
if !unicode && match.size > 1
+ # warn 'Switchig to unicode mode: %p' % ['ä'[/#{patterns::METHOD_NAME}/uo]]
unicode = true
unscan
next
@@ -298,6 +297,10 @@ module Scanners
elsif state == :def_expected
state = :initial
+ if scan(/self\./)
+ tokens << ['self', :pre_constant]
+ tokens << ['.', :operator]
+ end
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
kind = :method
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index cf5f8c1..51ceba3 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -1,3 +1,4 @@
+# encoding: utf-8
module CodeRay
module Scanners
@@ -31,7 +32,7 @@ module Scanners
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
- IDENT = /[^\W\d]\w*/
+ IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
METHOD_NAME = / #{IDENT} [?!]? /ox
METHOD_NAME_OPERATOR = /
@@ -59,7 +60,7 @@ module Scanners
QUOTE_TO_TYPE.default = :string
REGEXP_MODIFIERS = /[mixounse]*/
- REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
+ REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
DECIMAL = /\d+(?:_\d+)*/
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
@@ -141,14 +142,19 @@ module Scanners
| #{CHARACTER}
)
/x
+ VALUE_EXPECTING_KEYWORDS = WordList.new.add(%w[
+ and end in or unless begin
+ defined? ensure redo super until
+ break do next rescue then
+ when case else for retry
+ while elsif if not return
+ yield
+ ])
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
- # FIXME: \s and = are only a workaround, they are still allowed
- # as delimiters.
- FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
FancyStringType = {
@@ -174,15 +180,15 @@ module Scanners
CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
OPENING_PAREN = CLOSING_PAREN.invert
- STRING_PATTERN = Hash.new { |h, k|
+ STRING_PATTERN = Hash.new do |h, k|
delim, interpreted = *k
delim_pattern = Regexp.escape(delim)
if closing_paren = CLOSING_PAREN[delim]
delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
delim_pattern << Regexp.escape(closing_paren)
end
-
-
+ delim_pattern << '\\\\' unless delim == '\\'
+
special_escapes =
case interpreted
when :regexp_symbols
@@ -190,16 +196,16 @@ module Scanners
when :words
'| \s'
end
-
+
h[k] =
if interpreted and not delim == '#'
- / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
+ / (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
else
- / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
+ / (?= [#{delim_pattern}] #{special_escapes} ) /mx
end
- }
+ end
- HEREDOC_PATTERN = Hash.new { |h, k|
+ HEREDOC_PATTERN = Hash.new do |h, k|
delim, interpreted, indented = *k
delim_pattern = Regexp.escape(delim.dup)
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
@@ -209,12 +215,12 @@ module Scanners
else
/ (?= #{delim_pattern}() | \\ ) /mx
end
- }
+ end
def initialize kind, interpreted, delim, heredoc = false
if heredoc
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
- delim = nil
+ delim = nil
else
pattern = STRING_PATTERN[ [delim, interpreted] ]
if paren = CLOSING_PAREN[delim]