From 0b60a93641a5bfafb2b07f81a8e9c7da03ef3c86 Mon Sep 17 00:00:00 2001 From: murphy Date: Sat, 5 Nov 2005 02:28:36 +0000 Subject: New helper module scheme, step 3 --- lib/coderay/scanners/ruby/helper.rb | 213 +++++++++++++++++++++++++++++++ lib/coderay/scanners/ruby/ruby_helper.rb | 213 ------------------------------- 2 files changed, 213 insertions(+), 213 deletions(-) create mode 100644 lib/coderay/scanners/ruby/helper.rb delete mode 100644 lib/coderay/scanners/ruby/ruby_helper.rb (limited to 'lib/coderay/scanners/ruby') diff --git a/lib/coderay/scanners/ruby/helper.rb b/lib/coderay/scanners/ruby/helper.rb new file mode 100644 index 0000000..ad649da --- /dev/null +++ b/lib/coderay/scanners/ruby/helper.rb @@ -0,0 +1,213 @@ +module CodeRay module Scanners + + class Ruby + + RESERVED_WORDS = %w[ + and def end in or unless begin + defined? ensure module redo super until + BEGIN break do next rescue then + when END case else for retry + while alias class elsif if not return + undef yield + ] + + DEF_KEYWORDS = %w[ def ] + UNDEF_KEYWORDS = %w[ undef ] + MODULE_KEYWORDS = %w[class module] + DEF_NEW_STATE = WordList.new(:initial). + add(DEF_KEYWORDS, :def_expected). + add(UNDEF_KEYWORDS, :undef_expected). + add(MODULE_KEYWORDS, :module_expected) + + IDENTS_ALLOWING_REGEXP = %w[ + and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split + ] + REGEXP_ALLOWED = WordList.new(false). + add(IDENTS_ALLOWING_REGEXP, :set) + + PREDEFINED_CONSTANTS = %w[ + nil true false self + DATA ARGV ARGF __FILE__ __LINE__ + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :pre_constant) + + IDENT = /[a-z_][\w_]*/i + + METHOD_NAME = / #{IDENT} [?!]? /ox + METHOD_NAME_OPERATOR = / + \*\*? # multiplication and power + | [-+]@? # plus, minus + | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde + | \[\]=? # array getter and setter + | << | >> # append or shift left, shift right + | <=?>? | >=? # comparison, rocket operator + | ===? # simple equality and case equality + /ox + METHOD_NAME_EX = / #{IDENT} [?!=]? | #{METHOD_NAME_OPERATOR} /ox + INSTANCE_VARIABLE = / @ #{IDENT} /ox + CLASS_VARIABLE = / @@ #{IDENT} /ox + OBJECT_VARIABLE = / @@? #{IDENT} /ox + GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox + PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox + VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox + + QUOTE_TO_TYPE = { + '`' => :shell, + '/'=> :regexp, + } + QUOTE_TO_TYPE.default = :string + + REGEXP_MODIFIERS = /[mixounse]*/ + REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ + + DECIMAL = /\d+(?:_\d+)*/ + OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ + HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ + BINARY = /0b[01]+(?:_[01]+)*/ + + EXPONENT = / [eE] [+-]? #{DECIMAL} /ox + FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox + FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox + NUMERIC = / (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} /ox + + SYMBOL = / + : + (?: + #{METHOD_NAME_EX} + | #{PREFIX_VARIABLE} + | ['"] + ) + /ox + + # TODO investigste \M, \c and \C escape sequences + # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) ) + # assert_equal(225, ?\M-a) + # assert_equal(129, ?\M-\C-a) + ESCAPE = / + [abefnrstv] + | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M- + | [0-7]{1,3} + | x[0-9A-Fa-f]{1,2} + | . + /mx + + CHARACTER = / + \? + (?: + [^\s\\] + | \\ #{ESCAPE} + ) + /mx + + # NOTE: This is not completely correct, but + # nobody needs heredoc delimiters ending with \n. + HEREDOC_OPEN = / + << (-)? # $1 = float + (?: + ( [A-Za-z_0-9]+ ) # $2 = delim + | + ( ["'`] ) # $3 = quote, type + ( [^\n]*? ) \3 # $4 = delim + ) + /mx + + RUBYDOC = / + =begin (?!\S) + .*? + (?: \Z | ^=end (?!\S) [^\n]* ) + /mx + + DATA = / + __END__$ + .*? + (?: \Z | (?=^\#CODE) ) + /mx + + RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo + + RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x + + FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox + + FancyStringType = { + 'q' => [:string, false], + 'Q' => [:string, true], + 'r' => [:regexp, true], + 's' => [:symbol, false], + 'x' => [:shell, true], + 'w' => [:string, :word], + 'W' => [:string, :word], + } + FancyStringType['w'] = FancyStringType['q'] + FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] + + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, + :paren, :paren_depth, :pattern, :next_state + + CLOSING_PAREN = Hash[ *%w[ + ( ) + [ ] + < > + { } + ] ] + + CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << + OPENING_PAREN = CLOSING_PAREN.invert + + STRING_PATTERN = Hash.new { |h, k| + delim, interpreted = *k + delim_pattern = Regexp.escape(delim.dup) + if starter = OPENING_PAREN[delim] + delim_pattern << Regexp.escape(starter) + end + + + special_escapes = + case interpreted + when :regexp_symbols + '| ' + REGEXP_SYMBOLS.source + when :words + '| \s' + end + + h[k] = + if interpreted and not delim == '#' + / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx + else + / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx + end + } + + HEREDOC_PATTERN = Hash.new { |h, k| + delim, interpreted, indented = *k + delim_pattern = Regexp.escape(delim.dup) + delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x + h[k] = + if interpreted + / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc + else + / (?= #{delim_pattern}() | \\ ) /mx + end + } + + def initialize kind, interpreted, delim, heredoc = false + if paren = CLOSING_PAREN[delim] + delim, paren = paren, delim + paren_depth = 1 + end + if heredoc + pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] + delim = nil + else + pattern = STRING_PATTERN[ [delim, interpreted] ] + end + super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial + end + end unless defined? StringState + + end + +end end diff --git a/lib/coderay/scanners/ruby/ruby_helper.rb b/lib/coderay/scanners/ruby/ruby_helper.rb deleted file mode 100644 index ad649da..0000000 --- a/lib/coderay/scanners/ruby/ruby_helper.rb +++ /dev/null @@ -1,213 +0,0 @@ -module CodeRay module Scanners - - class Ruby - - RESERVED_WORDS = %w[ - and def end in or unless begin - defined? ensure module redo super until - BEGIN break do next rescue then - when END case else for retry - while alias class elsif if not return - undef yield - ] - - DEF_KEYWORDS = %w[ def ] - UNDEF_KEYWORDS = %w[ undef ] - MODULE_KEYWORDS = %w[class module] - DEF_NEW_STATE = WordList.new(:initial). - add(DEF_KEYWORDS, :def_expected). - add(UNDEF_KEYWORDS, :undef_expected). - add(MODULE_KEYWORDS, :module_expected) - - IDENTS_ALLOWING_REGEXP = %w[ - and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split - ] - REGEXP_ALLOWED = WordList.new(false). - add(IDENTS_ALLOWING_REGEXP, :set) - - PREDEFINED_CONSTANTS = %w[ - nil true false self - DATA ARGV ARGF __FILE__ __LINE__ - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_CONSTANTS, :pre_constant) - - IDENT = /[a-z_][\w_]*/i - - METHOD_NAME = / #{IDENT} [?!]? /ox - METHOD_NAME_OPERATOR = / - \*\*? # multiplication and power - | [-+]@? # plus, minus - | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde - | \[\]=? # array getter and setter - | << | >> # append or shift left, shift right - | <=?>? | >=? # comparison, rocket operator - | ===? # simple equality and case equality - /ox - METHOD_NAME_EX = / #{IDENT} [?!=]? | #{METHOD_NAME_OPERATOR} /ox - INSTANCE_VARIABLE = / @ #{IDENT} /ox - CLASS_VARIABLE = / @@ #{IDENT} /ox - OBJECT_VARIABLE = / @@? #{IDENT} /ox - GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox - PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox - VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox - - QUOTE_TO_TYPE = { - '`' => :shell, - '/'=> :regexp, - } - QUOTE_TO_TYPE.default = :string - - REGEXP_MODIFIERS = /[mixounse]*/ - REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ - - DECIMAL = /\d+(?:_\d+)*/ - OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ - HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ - BINARY = /0b[01]+(?:_[01]+)*/ - - EXPONENT = / [eE] [+-]? #{DECIMAL} /ox - FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox - FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox - NUMERIC = / (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} /ox - - SYMBOL = / - : - (?: - #{METHOD_NAME_EX} - | #{PREFIX_VARIABLE} - | ['"] - ) - /ox - - # TODO investigste \M, \c and \C escape sequences - # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) ) - # assert_equal(225, ?\M-a) - # assert_equal(129, ?\M-\C-a) - ESCAPE = / - [abefnrstv] - | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M- - | [0-7]{1,3} - | x[0-9A-Fa-f]{1,2} - | . - /mx - - CHARACTER = / - \? - (?: - [^\s\\] - | \\ #{ESCAPE} - ) - /mx - - # NOTE: This is not completely correct, but - # nobody needs heredoc delimiters ending with \n. - HEREDOC_OPEN = / - << (-)? # $1 = float - (?: - ( [A-Za-z_0-9]+ ) # $2 = delim - | - ( ["'`] ) # $3 = quote, type - ( [^\n]*? ) \3 # $4 = delim - ) - /mx - - RUBYDOC = / - =begin (?!\S) - .*? - (?: \Z | ^=end (?!\S) [^\n]* ) - /mx - - DATA = / - __END__$ - .*? - (?: \Z | (?=^\#CODE) ) - /mx - - RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo - - RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x - - FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox - - FancyStringType = { - 'q' => [:string, false], - 'Q' => [:string, true], - 'r' => [:regexp, true], - 's' => [:symbol, false], - 'x' => [:shell, true], - 'w' => [:string, :word], - 'W' => [:string, :word], - } - FancyStringType['w'] = FancyStringType['q'] - FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] - - class StringState < Struct.new :type, :interpreted, :delim, :heredoc, - :paren, :paren_depth, :pattern, :next_state - - CLOSING_PAREN = Hash[ *%w[ - ( ) - [ ] - < > - { } - ] ] - - CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << - OPENING_PAREN = CLOSING_PAREN.invert - - STRING_PATTERN = Hash.new { |h, k| - delim, interpreted = *k - delim_pattern = Regexp.escape(delim.dup) - if starter = OPENING_PAREN[delim] - delim_pattern << Regexp.escape(starter) - end - - - special_escapes = - case interpreted - when :regexp_symbols - '| ' + REGEXP_SYMBOLS.source - when :words - '| \s' - end - - h[k] = - if interpreted and not delim == '#' - / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx - else - / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx - end - } - - HEREDOC_PATTERN = Hash.new { |h, k| - delim, interpreted, indented = *k - delim_pattern = Regexp.escape(delim.dup) - delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x - h[k] = - if interpreted - / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc - else - / (?= #{delim_pattern}() | \\ ) /mx - end - } - - def initialize kind, interpreted, delim, heredoc = false - if paren = CLOSING_PAREN[delim] - delim, paren = paren, delim - paren_depth = 1 - end - if heredoc - pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] - delim = nil - else - pattern = STRING_PATTERN[ [delim, interpreted] ] - end - super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial - end - end unless defined? StringState - - end - -end end -- cgit v1.2.1