diff options
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r-- | lib/coderay/scanners/plaintext.rb | 30 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 432 | ||||
-rw-r--r-- | lib/coderay/scanners/xml.rb | 36 |
3 files changed, 249 insertions, 249 deletions
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index 9007646..432745f 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -1,15 +1,15 @@ -module CodeRay
-module Scanners
-
- class Plaintext < Scanner
-
- register_for :plaintext, :plain
-
- def scan_tokens tokens, options
- tokens << [scan_until(/\z/), :plain]
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + class Plaintext < Scanner + + register_for :plaintext, :plain + + def scan_tokens tokens, options + tokens << [scan_until(/\z/), :plain] + end + + end + +end +end diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index b1e0d1b..c601011 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -1,216 +1,216 @@ -module CodeRay
-module Scanners
-
- module Ruby::Patterns # :nodoc:
-
- RESERVED_WORDS = %w[
- and def end in or unless begin
- defined? ensure module redo super until
- BEGIN break do next rescue then
- when END case else for retry
- while alias class elsif if not return
- undef yield
- ]
-
- DEF_KEYWORDS = %w[ def ]
- UNDEF_KEYWORDS = %w[ undef ]
- MODULE_KEYWORDS = %w[class module]
- DEF_NEW_STATE = WordList.new(:initial).
- add(DEF_KEYWORDS, :def_expected).
- add(UNDEF_KEYWORDS, :undef_expected).
- add(MODULE_KEYWORDS, :module_expected)
-
- IDENTS_ALLOWING_REGEXP = %w[
- and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
- ]
- REGEXP_ALLOWED = WordList.new(false).
- add(IDENTS_ALLOWING_REGEXP, :set)
-
- PREDEFINED_CONSTANTS = %w[
- nil true false self
- DATA ARGV ARGF __FILE__ __LINE__
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- IDENT = /[a-z_][\w_]*/i
-
- METHOD_NAME = / #{IDENT} [?!]? /ox
- METHOD_NAME_OPERATOR = /
- \*\*? # multiplication and power
- | [-+]@? # plus, minus
- | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
- | \[\]=? # array getter and setter
- | << | >> # append or shift left, shift right
- | <=?>? | >=? # comparison, rocket operator
- | ===? # simple equality and case equality
- /ox
- METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
- INSTANCE_VARIABLE = / @ #{IDENT} /ox
- CLASS_VARIABLE = / @@ #{IDENT} /ox
- OBJECT_VARIABLE = / @@? #{IDENT} /ox
- GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
- PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
- VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
-
- QUOTE_TO_TYPE = {
- '`' => :shell,
- '/'=> :regexp,
- }
- QUOTE_TO_TYPE.default = :string
-
- REGEXP_MODIFIERS = /[mixounse]*/
- REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
-
- DECIMAL = /\d+(?:_\d+)*/
- OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
- HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
- BINARY = /0b[01]+(?:_[01]+)*/
-
- EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
- FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox
- FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox
- NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox
-
- SYMBOL = /
- :
- (?:
- #{METHOD_NAME_EX}
- | #{PREFIX_VARIABLE}
- | ['"]
- )
- /ox
-
- # TODO investigste \M, \c and \C escape sequences
- # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
- # assert_equal(225, ?\M-a)
- # assert_equal(129, ?\M-\C-a)
- ESCAPE = /
- [abefnrstv]
- | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
- | [0-7]{1,3}
- | x[0-9A-Fa-f]{1,2}
- | .
- /mx
-
- CHARACTER = /
- \?
- (?:
- [^\s\\]
- | \\ #{ESCAPE}
- )
- /mx
-
- # NOTE: This is not completely correct, but
- # nobody needs heredoc delimiters ending with \n.
- HEREDOC_OPEN = /
- << (-)? # $1 = float
- (?:
- ( [A-Za-z_0-9]+ ) # $2 = delim
- |
- ( ["'`] ) # $3 = quote, type
- ( [^\n]*? ) \3 # $4 = delim
- )
- /mx
-
- RUBYDOC = /
- =begin (?!\S)
- .*?
- (?: \Z | ^=end (?!\S) [^\n]* )
- /mx
-
- DATA = /
- __END__$
- .*?
- (?: \Z | (?=^\#CODE) )
- /mx
-
- RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
-
- RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
-
- # FIXME: \s and = are only a workaround, they are still allowed
- # as delimiters.
- FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
- FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
-
- FancyStringType = {
- 'q' => [:string, false],
- 'Q' => [:string, true],
- 'r' => [:regexp, true],
- 's' => [:symbol, false],
- 'x' => [:shell, true]
- }
- FancyStringType['w'] = FancyStringType['q']
- FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
-
- class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
- :paren, :paren_depth, :pattern, :next_state
-
- CLOSING_PAREN = Hash[ *%w[
- ( )
- [ ]
- < >
- { }
- ] ]
-
- CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
- OPENING_PAREN = CLOSING_PAREN.invert
-
- STRING_PATTERN = Hash.new { |h, k|
- delim, interpreted = *k
- delim_pattern = Regexp.escape(delim.dup)
- if closing_paren = CLOSING_PAREN[delim]
- delim_pattern << Regexp.escape(closing_paren)
- end
-
-
- special_escapes =
- case interpreted
- when :regexp_symbols
- '| ' + REGEXP_SYMBOLS.source
- when :words
- '| \s'
- end
-
- h[k] =
- if interpreted and not delim == '#'
- / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
- else
- / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
- end
- }
-
- HEREDOC_PATTERN = Hash.new { |h, k|
- delim, interpreted, indented = *k
- delim_pattern = Regexp.escape(delim.dup)
- delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
- h[k] =
- if interpreted
- / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc
- else
- / (?= #{delim_pattern}() | \\ ) /mx
- end
- }
-
- def initialize kind, interpreted, delim, heredoc = false
- if heredoc
- pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
- delim = nil
- else
- pattern = STRING_PATTERN[ [delim, interpreted] ]
- if paren = CLOSING_PAREN[delim]
- delim, paren = paren, delim
- paren_depth = 1
- end
- end
- super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial
- end
- end unless defined? StringState
-
- end
-
-end
-end
+module CodeRay +module Scanners + + module Ruby::Patterns # :nodoc: + + RESERVED_WORDS = %w[ + and def end in or unless begin + defined? ensure module redo super until + BEGIN break do next rescue then + when END case else for retry + while alias class elsif if not return + undef yield + ] + + DEF_KEYWORDS = %w[ def ] + UNDEF_KEYWORDS = %w[ undef ] + MODULE_KEYWORDS = %w[class module] + DEF_NEW_STATE = WordList.new(:initial). + add(DEF_KEYWORDS, :def_expected). + add(UNDEF_KEYWORDS, :undef_expected). + add(MODULE_KEYWORDS, :module_expected) + + IDENTS_ALLOWING_REGEXP = %w[ + and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split + ] + REGEXP_ALLOWED = WordList.new(false). + add(IDENTS_ALLOWING_REGEXP, :set) + + PREDEFINED_CONSTANTS = %w[ + nil true false self + DATA ARGV ARGF __FILE__ __LINE__ + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_CONSTANTS, :pre_constant) + + IDENT = /[a-z_][\w_]*/i + + METHOD_NAME = / #{IDENT} [?!]? /ox + METHOD_NAME_OPERATOR = / + \*\*? # multiplication and power + | [-+]@? # plus, minus + | [\/%&|^`~] # division, modulo or format strings, &and, |or, ^xor, `system`, tilde + | \[\]=? # array getter and setter + | << | >> # append or shift left, shift right + | <=?>? | >=? # comparison, rocket operator + | ===? # simple equality and case equality + /ox + METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox + INSTANCE_VARIABLE = / @ #{IDENT} /ox + CLASS_VARIABLE = / @@ #{IDENT} /ox + OBJECT_VARIABLE = / @@? #{IDENT} /ox + GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9]\d* | 0\w* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox + PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox + VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox + + QUOTE_TO_TYPE = { + '`' => :shell, + '/'=> :regexp, + } + QUOTE_TO_TYPE.default = :string + + REGEXP_MODIFIERS = /[mixounse]*/ + REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ + + DECIMAL = /\d+(?:_\d+)*/ + OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ + HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ + BINARY = /0b[01]+(?:_[01]+)*/ + + EXPONENT = / [eE] [+-]? #{DECIMAL} /ox + FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox + FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox + NUMERIC = / [-+]? (?: (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} ) /ox + + SYMBOL = / + : + (?: + #{METHOD_NAME_EX} + | #{PREFIX_VARIABLE} + | ['"] + ) + /ox + + # TODO investigste \M, \c and \C escape sequences + # (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) ) + # assert_equal(225, ?\M-a) + # assert_equal(129, ?\M-\C-a) + ESCAPE = / + [abefnrstv] + | M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M- + | [0-7]{1,3} + | x[0-9A-Fa-f]{1,2} + | . + /mx + + CHARACTER = / + \? + (?: + [^\s\\] + | \\ #{ESCAPE} + ) + /mx + + # NOTE: This is not completely correct, but + # nobody needs heredoc delimiters ending with \n. + HEREDOC_OPEN = / + << (-)? # $1 = float + (?: + ( [A-Za-z_0-9]+ ) # $2 = delim + | + ( ["'`] ) # $3 = quote, type + ( [^\n]*? ) \3 # $4 = delim + ) + /mx + + RUBYDOC = / + =begin (?!\S) + .*? + (?: \Z | ^=end (?!\S) [^\n]* ) + /mx + + DATA = / + __END__$ + .*? + (?: \Z | (?=^\#CODE) ) + /mx + + RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo + + RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x + + # FIXME: \s and = are only a workaround, they are still allowed + # as delimiters. + FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx + FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx + + FancyStringType = { + 'q' => [:string, false], + 'Q' => [:string, true], + 'r' => [:regexp, true], + 's' => [:symbol, false], + 'x' => [:shell, true] + } + FancyStringType['w'] = FancyStringType['q'] + FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] + + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, + :paren, :paren_depth, :pattern, :next_state + + CLOSING_PAREN = Hash[ *%w[ + ( ) + [ ] + < > + { } + ] ] + + CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << + OPENING_PAREN = CLOSING_PAREN.invert + + STRING_PATTERN = Hash.new { |h, k| + delim, interpreted = *k + delim_pattern = Regexp.escape(delim.dup) + if closing_paren = CLOSING_PAREN[delim] + delim_pattern << Regexp.escape(closing_paren) + end + + + special_escapes = + case interpreted + when :regexp_symbols + '| ' + REGEXP_SYMBOLS.source + when :words + '| \s' + end + + h[k] = + if interpreted and not delim == '#' + / (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx + else + / (?= [#{delim_pattern}\\] #{special_escapes} ) /mx + end + } + + HEREDOC_PATTERN = Hash.new { |h, k| + delim, interpreted, indented = *k + delim_pattern = Regexp.escape(delim.dup) + delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x + h[k] = + if interpreted + / (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx # $1 set == end of heredoc + else + / (?= #{delim_pattern}() | \\ ) /mx + end + } + + def initialize kind, interpreted, delim, heredoc = false + if heredoc + pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ] + delim = nil + else + pattern = STRING_PATTERN[ [delim, interpreted] ] + if paren = CLOSING_PAREN[delim] + delim, paren = paren, delim + paren_depth = 1 + end + end + super kind, interpreted, delim, heredoc, paren, paren_depth, pattern, :initial + end + end unless defined? StringState + + end + +end +end diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb index 5ce8ce9..ff923fb 100644 --- a/lib/coderay/scanners/xml.rb +++ b/lib/coderay/scanners/xml.rb @@ -1,18 +1,18 @@ -module CodeRay
-module Scanners
-
- load :html
-
- # XML Scanner
- #
- # $Id$
- #
- # Currently this is the same scanner as Scanners::HTML.
- class XML < HTML
-
- register_for :xml
-
- end
-
-end
-end
+module CodeRay +module Scanners + + load :html + + # XML Scanner + # + # $Id$ + # + # Currently this is the same scanner as Scanners::HTML. + class XML < HTML + + register_for :xml + + end + +end +end |