summaryrefslogtreecommitdiff
path: root/lib/coderay
diff options
context:
space:
mode:
authormurphy <murphy@rubychan.de>2006-07-09 23:18:40 +0000
committermurphy <murphy@rubychan.de>2006-07-09 23:18:40 +0000
commit196765788f6f03fb0754e71c7038669377797374 (patch)
tree8c1fe9905c1a398dbe27dfd8682bc4f4aceffbce /lib/coderay
parent7f5279a503bf72fb6260d417ff3adb550eaab3fd (diff)
downloadcoderay-196765788f6f03fb0754e71c7038669377797374.tar.gz
Fixed another bug in the Ruby scanner, this time it was unfinished heredocs with empty delimiter.
Fixed documentation uploading.
Diffstat (limited to 'lib/coderay')
-rw-r--r--lib/coderay/scanners/ruby.rb729
-rw-r--r--lib/coderay/scanners/ruby/patterns.rb420
2 files changed, 575 insertions, 574 deletions
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 2a415eb..7ba3029 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -1,397 +1,398 @@
module CodeRay
module Scanners
- # This scanner is really complex, since Ruby _is_ a complex language!
- #
- # It tries to highlight 100% of all common code,
- # and 90% of strange codes.
- #
- # It is optimized for HTML highlighting, and is not very useful for
- # parsing or pretty printing.
- #
- # For now, I think it's better than the scanners in VIM or Syntax, or
- # any highlighter I was able to find, except Caleb's RubyLexer.
- #
- # I hope it's also better than the rdoc/irb lexer.
- class Ruby < Scanner
+ # This scanner is really complex, since Ruby _is_ a complex language!
+ #
+ # It tries to highlight 100% of all common code,
+ # and 90% of strange codes.
+ #
+ # It is optimized for HTML highlighting, and is not very useful for
+ # parsing or pretty printing.
+ #
+ # For now, I think it's better than the scanners in VIM or Syntax, or
+ # any highlighter I was able to find, except Caleb's RubyLexer.
+ #
+ # I hope it's also better than the rdoc/irb lexer.
+ class Ruby < Scanner
- include Streamable
+ include Streamable
- register_for :ruby
+ register_for :ruby
- helper :patterns
-
- DEFAULT_OPTIONS = {
- :parse_regexps => true,
- }
+ helper :patterns
+
+ DEFAULT_OPTIONS = {
+ :parse_regexps => true,
+ }
- private
- def scan_tokens tokens, options
- parse_regexp = false # options[:parse_regexps]
- first_bake = saved_tokens = nil
- last_token_dot = false
- fancy_allowed = regexp_allowed = true
- heredocs = nil
- last_state = nil
- state = :initial
- depth = nil
- states = []
+ private
+ def scan_tokens tokens, options
+ parse_regexp = false # options[:parse_regexps]
+ first_bake = saved_tokens = nil
+ last_token_dot = false
+ fancy_allowed = regexp_allowed = true
+ heredocs = nil
+ last_state = nil
+ state = :initial
+ depth = nil
+ states = []
- patterns = Patterns # avoid constant lookup
+ patterns = Patterns # avoid constant lookup
- until eos?
- type = :error
- match = nil
- kind = nil
+ until eos?
+ type = :error
+ match = nil
+ kind = nil
- if state.instance_of? patterns::StringState
+ if state.instance_of? patterns::StringState
# {{{
- match = scan_until(state.pattern) || scan_until(/\z/)
- tokens << [match, :content] unless match.empty?
- break if eos?
-
- if state.heredoc and self[1]
- match = getch + scan_until(/$/)
- tokens << [match, :delimiter]
- tokens << [:close, state.type]
- state = state.next_state
- next
- end
-
- case match = getch
-
- when state.delim
- if state.paren
- state.paren_depth -= 1
- if state.paren_depth > 0
- tokens << [match, :nesting_delimiter]
- next
- end
- end
- tokens << [match, :delimiter]
- if state.type == :regexp and not eos?
- modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
- tokens << [modifiers, :modifier] unless modifiers.empty?
- if parse_regexp
- extended = modifiers.index ?x
- tokens = saved_tokens
- regexp = tokens
- for text, type in regexp
- if text.is_a? ::String
- case type
- when :content
- text.scan(/([^#]+)|(#.*)/) do |plain, comment|
- if plain
- tokens << [plain, :content]
- else
- tokens << [comment, :comment]
- end
- end
- when :character
- if text[/\\(?:[swdSWDAzZbB]|\d+)/]
- tokens << [text, :modifier]
- else
- tokens << [text, type]
- end
- else
- tokens << [text, type]
- end
- else
- tokens << [text, type]
- end
- end
- first_bake = saved_tokens = nil
- end
- end
- tokens << [:close, state.type]
- fancy_allowed = regexp_allowed = false
- state = state.next_state
-
- when '\\'
- if state.interpreted
- if esc = scan(/ #{patterns::ESCAPE} /ox)
- tokens << [match + esc, :char]
- else
- tokens << [match, :error]
- end
- else
- case m = getch
- when state.delim, '\\'
- tokens << [match + m, :char]
+ match = scan_until(state.pattern) || scan_until(/\z/)
+ tokens << [match, :content] unless match.empty?
+ break if eos?
+
+ if state.heredoc and self[1] # end of heredoc
+ match = getch.to_s
+ match << scan_until(/$/) unless eos?
+ tokens << [match, :delimiter]
+ tokens << [:close, state.type]
+ state = state.next_state
+ next
+ end
+
+ case match = getch
+
+ when state.delim
+ if state.paren
+ state.paren_depth -= 1
+ if state.paren_depth > 0
+ tokens << [match, :nesting_delimiter]
+ next
+ end
+ end
+ tokens << [match, :delimiter]
+ if state.type == :regexp and not eos?
+ modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
+ tokens << [modifiers, :modifier] unless modifiers.empty?
+ if parse_regexp
+ extended = modifiers.index ?x
+ tokens = saved_tokens
+ regexp = tokens
+ for text, type in regexp
+ if text.is_a? ::String
+ case type
+ when :content
+ text.scan(/([^#]+)|(#.*)/) do |plain, comment|
+ if plain
+ tokens << [plain, :content]
+ else
+ tokens << [comment, :comment]
+ end
+ end
+ when :character
+ if text[/\\(?:[swdSWDAzZbB]|\d+)/]
+ tokens << [text, :modifier]
+ else
+ tokens << [text, type]
+ end
+ else
+ tokens << [text, type]
+ end
+ else
+ tokens << [text, type]
+ end
+ end
+ first_bake = saved_tokens = nil
+ end
+ end
+ tokens << [:close, state.type]
+ fancy_allowed = regexp_allowed = false
+ state = state.next_state
+
+ when '\\'
+ if state.interpreted
+ if esc = scan(/ #{patterns::ESCAPE} /ox)
+ tokens << [match + esc, :char]
+ else
+ tokens << [match, :error]
+ end
+ else
+ case m = getch
+ when state.delim, '\\'
+ tokens << [match + m, :char]
when nil
tokens << [match, :error]
- else
- tokens << [match + m, :content]
- end
- end
-
- when '#'
- case peek(1)[0]
- when ?{
- states.push [state, depth, heredocs]
- fancy_allowed = regexp_allowed = true
- state = :initial
- depth = 1
- tokens << [:open, :inline]
- tokens << [match + getch, :delimiter]
- when ?$, ?@
- tokens << [match, :escape]
- last_state = state # scan one token as normal code, then return here
- state = :initial
- else
- raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
- end
-
- when state.paren
- state.paren_depth += 1
- tokens << [match, :nesting_delimiter]
+ else
+ tokens << [match + m, :content]
+ end
+ end
+
+ when '#'
+ case peek(1)[0]
+ when ?{
+ states.push [state, depth, heredocs]
+ fancy_allowed = regexp_allowed = true
+ state = :initial
+ depth = 1
+ tokens << [:open, :inline]
+ tokens << [match + getch, :delimiter]
+ when ?$, ?@
+ tokens << [match, :escape]
+ last_state = state # scan one token as normal code, then return here
+ state = :initial
+ else
+ raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
+ end
+
+ when state.paren
+ state.paren_depth += 1
+ tokens << [match, :nesting_delimiter]
- when /#{patterns::REGEXP_SYMBOLS}/ox
- tokens << [match, :function]
-
- else
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
-
- end
- next
+ when /#{patterns::REGEXP_SYMBOLS}/ox
+ tokens << [match, :function]
+
+ else
+ raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
+
+ end
+ next
# }}}
- else
-# {{{
- if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
- ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
- fancy_allowed = true
- case m = match[0]
- when ?\s, ?\t, ?\f
- match << scan(/\s*/) unless eos? or heredocs
- type = :space
- when ?\n, ?\\
- type = :space
- if m == ?\n
- regexp_allowed = true
- state = :initial if state == :undef_comma_expected
- end
- if heredocs
- unscan # heredoc scanning needs \n at start
- state = heredocs.shift
- tokens << [:open, state.type]
- heredocs = nil if heredocs.empty?
- next
- else
- match << scan(/\s*/) unless eos?
- end
- when ?#, ?=, ?_
- type = :comment
- regexp_allowed = true
- else
- raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
- end
- tokens << [match, type]
- next
+ else
+# {{{
+ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
+ ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
+ fancy_allowed = true
+ case m = match[0]
+ when ?\s, ?\t, ?\f
+ match << scan(/\s*/) unless eos? or heredocs
+ type = :space
+ when ?\n, ?\\
+ type = :space
+ if m == ?\n
+ regexp_allowed = true
+ state = :initial if state == :undef_comma_expected
+ end
+ if heredocs
+ unscan # heredoc scanning needs \n at start
+ state = heredocs.shift
+ tokens << [:open, state.type]
+ heredocs = nil if heredocs.empty?
+ next
+ else
+ match << scan(/\s*/) unless eos?
+ end
+ when ?#, ?=, ?_
+ type = :comment
+ regexp_allowed = true
+ else
+ raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
+ end
+ tokens << [match, type]
+ next
- elsif state == :initial
-
- # IDENTS #
- if match = scan(/#{patterns::METHOD_NAME}/o)
- if last_token_dot
- type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
- else
- type = patterns::IDENT_KIND[match]
- if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
- type = :constant
- elsif type == :reserved
- state = patterns::DEF_NEW_STATE[match]
- end
- end
- ## experimental!
- fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
-
- # OPERATORS #
- elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
- (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
- if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
- regexp_allowed = fancy_allowed = :set
- end
- last_token_dot = :set if match == '.' or match == '::'
- type = :operator
- unless states.empty?
- case match
- when '{'
- depth += 1
- when '}'
- depth -= 1
- if depth == 0
- state, depth, heredocs = states.pop
- tokens << [match, :delimiter]
- type = :inline
- match = :close
- end
- end
- end
-
- elsif match = scan(/ ['"] /mx)
- tokens << [:open, :string]
- type = :delimiter
- state = patterns::StringState.new :string, match == '"', match # important for streaming
-
- elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
- type = :instance_variable
+ elsif state == :initial
+
+ # IDENTS #
+ if match = scan(/#{patterns::METHOD_NAME}/o)
+ if last_token_dot
+ type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
+ else
+ type = patterns::IDENT_KIND[match]
+ if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
+ type = :constant
+ elsif type == :reserved
+ state = patterns::DEF_NEW_STATE[match]
+ end
+ end
+ ## experimental!
+ fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
+
+ # OPERATORS #
+ elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
+ (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
+ if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
+ regexp_allowed = fancy_allowed = :set
+ end
+ last_token_dot = :set if match == '.' or match == '::'
+ type = :operator
+ unless states.empty?
+ case match
+ when '{'
+ depth += 1
+ when '}'
+ depth -= 1
+ if depth == 0
+ state, depth, heredocs = states.pop
+ tokens << [match, :delimiter]
+ type = :inline
+ match = :close
+ end
+ end
+ end
+
+ elsif match = scan(/ ['"] /mx)
+ tokens << [:open, :string]
+ type = :delimiter
+ state = patterns::StringState.new :string, match == '"', match # important for streaming
+
+ elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
+ type = :instance_variable
- elsif regexp_allowed and match = scan(/\//)
- tokens << [:open, :regexp]
- type = :delimiter
- interpreted = true
- state = patterns::StringState.new :regexp, interpreted, match
- if parse_regexp
- tokens = []
- saved_tokens = tokens
- end
-
- elsif match = scan(/#{patterns::NUMERIC}/o)
- type = if self[1] then :float else :integer end
+ elsif regexp_allowed and match = scan(/\//)
+ tokens << [:open, :regexp]
+ type = :delimiter
+ interpreted = true
+ state = patterns::StringState.new :regexp, interpreted, match
+ if parse_regexp
+ tokens = []
+ saved_tokens = tokens
+ end
+
+ elsif match = scan(/#{patterns::NUMERIC}/o)
+ type = if self[1] then :float else :integer end
- elsif match = scan(/#{patterns::SYMBOL}/o)
- case delim = match[1]
- when ?', ?"
- tokens << [:open, :symbol]
- tokens << [':', :symbol]
- match = delim.chr
- type = :delimiter
- state = patterns::StringState.new :symbol, delim == ?", match
- else
- type = :symbol
- end
-
- elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
- regexp_allowed = fancy_allowed = :set
- type = :operator
-
- elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
- indented = self[1] == '-'
- quote = self[3]
- delim = self[quote ? 4 : 2]
- type = patterns::QUOTE_TO_TYPE[quote]
- tokens << [:open, type]
- tokens << [match, :delimiter]
- match = :close
- heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
- heredocs ||= [] # create heredocs if empty
- heredocs << heredoc
-
- elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
- type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
- raise_inspect 'Unknown fancy string: %%%p' % k, tokens
- end
- tokens << [:open, type]
- state = patterns::StringState.new type, interpreted, self[2]
- type = :delimiter
+ elsif match = scan(/#{patterns::SYMBOL}/o)
+ case delim = match[1]
+ when ?', ?"
+ tokens << [:open, :symbol]
+ tokens << [':', :symbol]
+ match = delim.chr
+ type = :delimiter
+ state = patterns::StringState.new :symbol, delim == ?", match
+ else
+ type = :symbol
+ end
+
+ elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
+ regexp_allowed = fancy_allowed = :set
+ type = :operator
+
+ elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
+ indented = self[1] == '-'
+ quote = self[3]
+ delim = self[quote ? 4 : 2]
+ type = patterns::QUOTE_TO_TYPE[quote]
+ tokens << [:open, type]
+ tokens << [match, :delimiter]
+ match = :close
+ heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
+ heredocs ||= [] # create heredocs if empty
+ heredocs << heredoc
+
+ elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
+ type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
+ raise_inspect 'Unknown fancy string: %%%p' % k, tokens
+ end
+ tokens << [:open, type]
+ state = patterns::StringState.new type, interpreted, self[2]
+ type = :delimiter
- elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
- type = :integer
+ elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
+ type = :integer
- elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
- regexp_allowed = fancy_allowed = :set
- type = :operator
+ elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
+ regexp_allowed = fancy_allowed = :set
+ type = :operator
- elsif match = scan(/`/)
- if last_token_dot
- type = :operator
- else
- tokens << [:open, :shell]
- type = :delimiter
- state = patterns::StringState.new :shell, true, match
- end
-
- elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
- type = :global_variable
-
- elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
- type = :class_variable
-
- else
- match = getch
-
- end
-
- elsif state == :def_expected
- state = :initial
- if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
- type = :method
- else
- next
- end
+ elsif match = scan(/`/)
+ if last_token_dot
+ type = :operator
+ else
+ tokens << [:open, :shell]
+ type = :delimiter
+ state = patterns::StringState.new :shell, true, match
+ end
+
+ elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
+ type = :global_variable
+
+ elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
+ type = :class_variable
+
+ else
+ match = getch
+
+ end
+
+ elsif state == :def_expected
+ state = :initial
+ if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
+ type = :method
+ else
+ next
+ end
- elsif state == :undef_expected
- state = :undef_comma_expected
- if match = scan(/#{patterns::METHOD_NAME_EX}/o)
- type = :method
- elsif match = scan(/#{patterns::SYMBOL}/o)
- case delim = match[1]
- when ?', ?"
- tokens << [:open, :symbol]
- tokens << [':', :symbol]
- match = delim.chr
- type = :delimiter
- state = patterns::StringState.new :symbol, delim == ?", match
- state.next_state = :undef_comma_expected
- else
- type = :symbol
- end
- else
- state = :initial
- next
- end
-
- elsif state == :undef_comma_expected
- if match = scan(/,/)
- type = :operator
- state = :undef_expected
- else
- state = :initial
- next
- end
+ elsif state == :undef_expected
+ state = :undef_comma_expected
+ if match = scan(/#{patterns::METHOD_NAME_EX}/o)
+ type = :method
+ elsif match = scan(/#{patterns::SYMBOL}/o)
+ case delim = match[1]
+ when ?', ?"
+ tokens << [:open, :symbol]
+ tokens << [':', :symbol]
+ match = delim.chr
+ type = :delimiter
+ state = patterns::StringState.new :symbol, delim == ?", match
+ state.next_state = :undef_comma_expected
+ else
+ type = :symbol
+ end
+ else
+ state = :initial
+ next
+ end
+
+ elsif state == :undef_comma_expected
+ if match = scan(/,/)
+ type = :operator
+ state = :undef_expected
+ else
+ state = :initial
+ next
+ end
- elsif state == :module_expected
- if match = scan(/<</)
- type = :operator
- else
- state = :initial
- if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
- type = :class
- else
- next
- end
- end
+ elsif state == :module_expected
+ if match = scan(/<</)
+ type = :operator
+ else
+ state = :initial
+ if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
+ type = :class
+ else
+ next
+ end
+ end
- end
+ end
# }}}
- regexp_allowed = regexp_allowed == :set
- fancy_allowed = fancy_allowed == :set
- last_token_dot = last_token_dot == :set
+ regexp_allowed = regexp_allowed == :set
+ fancy_allowed = fancy_allowed == :set
+ last_token_dot = last_token_dot == :set
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
+ if $DEBUG and (not kind or kind == :error)
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
- tokens << [match, type]
-
- if last_state
- state = last_state
- last_state = nil
- end
- end
- end
+ tokens << [match, type]
+
+ if last_state
+ state = last_state
+ last_state = nil
+ end
+ end
+ end
- states << state if state.is_a? patterns::StringState
- until states.empty?
- tokens << [:close, states.pop.type]
- end
+ states << state if state.is_a? patterns::StringState
+ until states.empty?
+ tokens << [:close, states.pop.type]
+ end
- tokens
- end
- end
+ tokens
+ end
+ end
end
end
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index 63d12f0..c38739d 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -1,216 +1,216 @@
module CodeRay
module Scanners
- module Ruby::Patterns # :nodoc:
-
- RESERVED_WORDS = %w[
- and def end in or unless begin
- defined? ensure module redo super until
- BEGIN break do next rescue then
- when END case else for retry
- while alias class elsif if not return
- undef yield
- ]
-
- DEF_KEYWORDS = %w[ def ]
- UNDEF_KEYWORDS = %w[ undef ]
- MODULE_KEYWORDS = %w[class module]
- DEF_NEW_STATE = WordList.new(:initial).
- add(DEF_KEYWORDS, :def_expected).
- add(UNDEF_KEYWORDS, :undef_expected).
- add(MODULE_KEYWORDS, :module_expected)
-
- IDENTS_ALLOWING_REGEXP = %w[
- and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
- ]
- REGEXP_ALLOWED = WordList.new(false).
- add(IDENTS_ALLOWING_REGEXP, :set)
-
- PREDEFINED_CONSTANTS = %w[
- nil true false self
- DATA ARGV ARGF __FILE__ __LINE__
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- IDENT = /[a-z_][\w_]*/i
-
- METHOD_NAME = / #{IDENT} [?!]? /ox
- METHOD_NAME_OPERATOR = /
- \*\*? # multiplication and power
- | [-+]@? # plus, minus
- | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
- | \[\]=? # array getter and setter
- | << | >> # append or shift left, shift right
- | <=?>? | >=? # comparison, rocket operator
- | ===? # simple equality and case equality
- /ox
- METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
- INSTANCE_VARIABLE = / @ #{IDENT} /ox
- CLASS_VARIABLE = / @@ #{IDENT} /ox
- OBJECT_VARIABLE = / @@? #{IDENT} /ox
- GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
- PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
- VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
-
- QUOTE_TO_TYPE = {
- '`' => :shell,
- '/'=> :regexp,
- }
- QUOTE_TO_TYPE.default = :string
-
- REGEXP_MODIFIERS = /[mixounse]*/
- REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
-
- DECIMAL = /\d+(?:_\d+)*/
- OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
- HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
- BINARY = /0b[01]+(?:_[01]+)*/
-
- EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
- FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
- FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
- NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
-
- SYMBOL = /
- :
- (?:
- #{METHOD_NAME_EX}
- | #{PREFIX_VARIABLE}
- | ['"]
- )
- /ox
-
- # TODO investigste \M, \c and \C escape sequences
- # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
- # assert_equal(225, ?\M-a)
- # assert_equal(129, ?\M-\C-a)
- ESCAPE = /
- [abefnrstv]
- | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
- | [0-7]{1,3}
- | x[0-9A-Fa-f]{1,2}
- | .
- /mx
-
- CHARACTER = /
- \?
- (?:
- [^\s\\]
- | \\ #{ESCAPE}
- )
- /mx
-
- # NOTE: This is not completely correct, but
- # nobody needs heredoc delimiters ending with \n.
- HEREDOC_OPEN = /
- << (-)? # $1 = float
- (?:
- ( [A-Za-z_0-9]+ ) # $2 = delim
- |
- ( ["'`] ) # $3 = quote, type
- ( [^\n]*? ) \3 # $4 = delim
- )
- /mx
-
- RUBYDOC = /
- =begin (?!\S)
- .*?
- (?: \Z | ^=end (?!\S) [^\n]* )
- /mx
-
- DATA = /
- __END__$
- .*?
- (?: \Z | (?=^\#CODE) )
- /mx
-
- RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
-
- RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
-
- # FIXME: \s and = are only a workaround, they are still allowed
- # as delimiters.
- FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
- FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
-
- FancyStringType = {
- 'q' => [:string, false],
- 'Q' => [:string, true],
- 'r' => [:regexp, true],
- 's' => [:symbol, false],
- 'x' => [:shell, true]
- }
- FancyStringType['w'] = FancyStringType['q']
- FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
-
- class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
- :paren, :paren_depth, :pattern, :next_state
-
- CLOSING_PAREN = Hash[ *%w[
- ( )
- [ ]
- < >
- { }
- ] ]
-
- CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
- OPENING_PAREN = CLOSING_PAREN.invert
-
- STRING_PATTERN = Hash.new { |h, k|
- delim, interpreted = *k
- delim_pattern = Regexp.escape(delim.dup)
- if closing_paren = CLOSING_PAREN[delim]
- delim_pattern << Regexp.escape(closing_paren)
- end
-
-
- special_escapes =
- case interpreted
- when :regexp_symbols
- '| ' + REGEXP_SYMBOLS.source
- when :words
- '| \s'
- end
-
- h[k] =
- if interpreted and not delim == '#'
- / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
- else
- / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
- end
- }
-
- HEREDOC_PATTERN = Hash.new { |h, k|
- delim, interpreted, indented = *k
- delim_pattern = Regexp.escape(delim.dup)
- delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
- h[k] =
- if interpreted
- / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
- else
- / (?= #{delim_pattern}() | \\ ) /mx
- end
- }
-
- def initialize kind, interpreted, delim, heredoc = false
- if heredoc
- pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
- delim = nil
- else
- pattern = STRING_PATTERN[ [delim, interpreted] ]
- if paren = CLOSING_PAREN[delim]
- delim, paren = paren, delim
- paren_depth = 1
- end
- end
- super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
- end
- end unless defined? StringState
-
- end
+ module Ruby::Patterns # :nodoc:
+
+ RESERVED_WORDS = %w[
+ and def end in or unless begin
+ defined? ensure module redo super until
+ BEGIN break do next rescue then
+ when END case else for retry
+ while alias class elsif if not return
+ undef yield
+ ]
+
+ DEF_KEYWORDS = %w[ def ]
+ UNDEF_KEYWORDS = %w[ undef ]
+ MODULE_KEYWORDS = %w[class module]
+ DEF_NEW_STATE = WordList.new(:initial).
+ add(DEF_KEYWORDS, :def_expected).
+ add(UNDEF_KEYWORDS, :undef_expected).
+ add(MODULE_KEYWORDS, :module_expected)
+
+ IDENTS_ALLOWING_REGEXP = %w[
+ and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
+ ]
+ REGEXP_ALLOWED = WordList.new(false).
+ add(IDENTS_ALLOWING_REGEXP, :set)
+
+ PREDEFINED_CONSTANTS = %w[
+ nil true false self
+ DATA ARGV ARGF __FILE__ __LINE__
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ IDENT = /[a-z_][\w_]*/i
+
+ METHOD_NAME = / #{IDENT} [?!]? /ox
+ METHOD_NAME_OPERATOR = /
+ \*\*? # multiplication and power
+ | [-+]@? # plus, minus
+ | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
+ | \[\]=? # array getter and setter
+ | << | >> # append or shift left, shift right
+ | <=?>? | >=? # comparison, rocket operator
+ | ===? # simple equality and case equality
+ /ox
+ METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
+ INSTANCE_VARIABLE = / @ #{IDENT} /ox
+ CLASS_VARIABLE = / @@ #{IDENT} /ox
+ OBJECT_VARIABLE = / @@? #{IDENT} /ox
+ GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+ PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
+ VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
+
+ QUOTE_TO_TYPE = {
+ '`' => :shell,
+ '/'=> :regexp,
+ }
+ QUOTE_TO_TYPE.default = :string
+
+ REGEXP_MODIFIERS = /[mixounse]*/
+ REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
+
+ DECIMAL = /\d+(?:_\d+)*/
+ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+ BINARY = /0b[01]+(?:_[01]+)*/
+
+ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+ FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
+ FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
+ NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
+
+ SYMBOL = /
+ :
+ (?:
+ #{METHOD_NAME_EX}
+ | #{PREFIX_VARIABLE}
+ | ['"]
+ )
+ /ox
+
+ # TODO investigste \M, \c and \C escape sequences
+ # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
+ # assert_equal(225, ?\M-a)
+ # assert_equal(129, ?\M-\C-a)
+ ESCAPE = /
+ [abefnrstv]
+ | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
+ | [0-7]{1,3}
+ | x[0-9A-Fa-f]{1,2}
+ | .
+ /mx
+
+ CHARACTER = /
+ \?
+ (?:
+ [^\s\\]
+ | \\ #{ESCAPE}
+ )
+ /mx
+
+ # NOTE: This is not completely correct, but
+ # nobody needs heredoc delimiters ending with \n.
+ HEREDOC_OPEN = /
+ << (-)? # $1 = float
+ (?:
+ ( [A-Za-z_0-9]+ ) # $2 = delim
+ |
+ ( ["'`] ) # $3 = quote, type
+ ( [^\n]*? ) \3 # $4 = delim
+ )
+ /mx
+
+ RUBYDOC = /
+ =begin (?!\S)
+ .*?
+ (?: \Z | ^=end (?!\S) [^\n]* )
+ /mx
+
+ DATA = /
+ __END__$
+ .*?
+ (?: \Z | (?=^\#CODE) )
+ /mx
+
+ RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
+
+ RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
+
+ # FIXME: \s and = are only a workaround, they are still allowed
+ # as delimiters.
+ FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
+ FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
+
+ FancyStringType = {
+ 'q' => [:string, false],
+ 'Q' => [:string, true],
+ 'r' => [:regexp, true],
+ 's' => [:symbol, false],
+ 'x' => [:shell, true]
+ }
+ FancyStringType['w'] = FancyStringType['q']
+ FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
+
+ class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
+ :paren, :paren_depth, :pattern, :next_state
+
+ CLOSING_PAREN = Hash[ *%w[
+ ( )
+ [ ]
+ < >
+ { }
+ ] ]
+
+ CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
+ OPENING_PAREN = CLOSING_PAREN.invert
+
+ STRING_PATTERN = Hash.new { |h, k|
+ delim, interpreted = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ if closing_paren = CLOSING_PAREN[delim]
+ delim_pattern << Regexp.escape(closing_paren)
+ end
+
+
+ special_escapes =
+ case interpreted
+ when :regexp_symbols
+ '| ' + REGEXP_SYMBOLS.source
+ when :words
+ '| \s'
+ end
+
+ h[k] =
+ if interpreted and not delim == '#'
+ / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
+ else
+ / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
+ end
+ }
+
+ HEREDOC_PATTERN = Hash.new { |h, k|
+ delim, interpreted, indented = *k
+ delim_pattern = Regexp.escape(delim.dup)
+ delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
+ h[k] =
+ if interpreted
+ / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
+ else
+ / (?= #{delim_pattern}() | \\ ) /mx
+ end
+ }
+
+ def initialize kind, interpreted, delim, heredoc = false
+ if heredoc
+ pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
+ delim = nil
+ else
+ pattern = STRING_PATTERN[ [delim, interpreted] ]
+ if paren = CLOSING_PAREN[delim]
+ delim, paren = paren, delim
+ paren_depth = 1
+ end
+ end
+ super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
+ end
+ end unless defined? StringState
+
+ end
end
end