diff options
Diffstat (limited to 'lib/coderay/scanners')
-rw-r--r-- | lib/coderay/scanners/_map.rb | 18 | ||||
-rw-r--r-- | lib/coderay/scanners/c.rb | 300 | ||||
-rw-r--r-- | lib/coderay/scanners/delphi.rb | 226 | ||||
-rw-r--r-- | lib/coderay/scanners/html.rb | 322 | ||||
-rw-r--r-- | lib/coderay/scanners/nitro_html.rb | 238 | ||||
-rw-r--r-- | lib/coderay/scanners/plaintext.rb | 14 | ||||
-rw-r--r-- | lib/coderay/scanners/rhtml.rb | 118 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby.rb | 32 | ||||
-rw-r--r-- | lib/coderay/scanners/ruby/patterns.rb | 14 | ||||
-rw-r--r-- | lib/coderay/scanners/xml.rb | 20 |
10 files changed, 651 insertions, 651 deletions
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 1482ee9..6268a6c 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,14 @@ module CodeRay
module Scanners
-
- map :cpp => :c,
- :plain => :plaintext,
- :pascal => :delphi,
- :irb => :ruby,
- :xml => :html,
- :xhtml => :nitro_html
- default :plain
-
+ map :cpp => :c,
+ :plain => :plaintext,
+ :pascal => :delphi,
+ :irb => :ruby,
+ :xml => :html,
+ :xhtml => :nitro_html
+
+ default :plain
+
end
end
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index effaaa1..66b8de1 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,155 @@ module CodeRay
module Scanners
-
- class C < Scanner
-
- register_for :c
-
- RESERVED_WORDS = [
- 'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
- 'for', 'goto', 'if', 'return', 'switch', 'while',
- 'struct', 'union', 'enum', 'typedef',
- 'static', 'register', 'auto', 'extern',
- 'sizeof',
- 'volatile', 'const', # C89
- 'inline', 'restrict', # C99
- ]
-
- PREDEFINED_TYPES = [
- 'int', 'long', 'short', 'char', 'void',
- 'signed', 'unsigned', 'float', 'double',
- 'bool', 'complex', # C99
- ]
-
- PREDEFINED_CONSTANTS = [
- 'EOF', 'NULL',
- 'true', 'false', # C99
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_TYPES, :pre_type).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
-
- def scan_tokens tokens, options
-
- state = :initial
-
- until eos?
-
- kind = :error
- match = nil
-
- case state
-
- when :initial
-
- if scan(/ \s+ | \\\n /x)
- kind = :space
-
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
- kind = :comment
-
- elsif match = scan(/ \# \s* if \s* 0 /x)
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
- kind = :comment
-
- elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
- kind = :operator
-
- elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = IDENT_KIND[match]
- if kind == :ident and check(/:(?!:)/)
- match << scan(/:/)
- kind = :label
- end
-
- elsif match = scan(/L?"/)
- tokens << [:open, :string]
- if match[0] == ?L
- tokens << ['L', :modifier]
- match = '"'
- end
- state = :string
- kind = :delimiter
-
- elsif scan(/#\s*(\w*)/)
- kind = :preprocessor # FIXME multiline preprocs
- state = :include_expected if self[1] == 'include'
-
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
- kind = :char
-
- elsif scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
-
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
- kind = :oct
-
- elsif scan(/(?:\d+)(?![.eEfF])/)
- kind = :integer
-
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
- kind = :float
-
- else
- getch
- end
-
- when :string
- if scan(/[^\\"]+/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, :string]
- state = :initial
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/ \\ | $ /x)
- kind = :error
- state = :initial
- else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
- end
-
- when :include_expected
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
- kind = :include
- state = :initial
-
- elsif match = scan(/\s+/)
- kind = :space
- state = :initial if match.index ?\n
-
- else
- getch
-
- end
-
- else
- raise_inspect 'Unknown state', tokens
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
- end
-
- tokens
- end
-
- end
+
+ class C < Scanner
+
+ register_for :c
+
+ RESERVED_WORDS = [
+ 'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
+ 'for', 'goto', 'if', 'return', 'switch', 'while',
+ 'struct', 'union', 'enum', 'typedef',
+ 'static', 'register', 'auto', 'extern',
+ 'sizeof',
+ 'volatile', 'const', # C89
+ 'inline', 'restrict', # C99
+ ]
+
+ PREDEFINED_TYPES = [
+ 'int', 'long', 'short', 'char', 'void',
+ 'signed', 'unsigned', 'float', 'double',
+ 'bool', 'complex', # C99
+ ]
+
+ PREDEFINED_CONSTANTS = [
+ 'EOF', 'NULL',
+ 'true', 'false', # C99
+ ]
+
+ IDENT_KIND = WordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(PREDEFINED_TYPES, :pre_type).
+ add(PREDEFINED_CONSTANTS, :pre_constant)
+
+ ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+
+ def scan_tokens tokens, options
+
+ state = :initial
+
+ until eos?
+
+ kind = :error
+ match = nil
+
+ case state
+
+ when :initial
+
+ if scan(/ \s+ | \\\n /x)
+ kind = :space
+
+ elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+ kind = :comment
+
+ elsif match = scan(/ \# \s* if \s* 0 /x)
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
+ kind = :comment
+
+ elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
+ kind = :operator
+
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+ kind = IDENT_KIND[match]
+ if kind == :ident and check(/:(?!:)/)
+ match << scan(/:/)
+ kind = :label
+ end
+
+ elsif match = scan(/L?"/)
+ tokens << [:open, :string]
+ if match[0] == ?L
+ tokens << ['L', :modifier]
+ match = '"'
+ end
+ state = :string
+ kind = :delimiter
+
+ elsif scan(/#\s*(\w*)/)
+ kind = :preprocessor # FIXME multiline preprocs
+ state = :include_expected if self[1] == 'include'
+
+ elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
+ kind = :char
+
+ elsif scan(/0[xX][0-9A-Fa-f]+/)
+ kind = :hex
+
+ elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+ kind = :oct
+
+ elsif scan(/(?:\d+)(?![.eEfF])/)
+ kind = :integer
+
+ elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+ kind = :float
+
+ else
+ getch
+ end
+
+ when :string
+ if scan(/[^\\"]+/)
+ kind = :content
+ elsif scan(/"/)
+ tokens << ['"', :delimiter]
+ tokens << [:close, :string]
+ state = :initial
+ next
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+ kind = :char
+ elsif scan(/ \\ | $ /x)
+ kind = :error
+ state = :initial
+ else
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+ end
+
+ when :include_expected
+ if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+ kind = :include
+ state = :initial
+
+ elsif match = scan(/\s+/)
+ kind = :space
+ state = :initial if match.index ?\n
+
+ else
+ getch
+
+ end
+
+ else
+ raise_inspect 'Unknown state', tokens
+
+ end
+
+ match ||= matched
+ if $DEBUG and (not kind or kind == :error)
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
+
+ tokens << [match, kind]
+
+ end
+
+ tokens
+ end
+
+ end
end
end
diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index c92fab5..d9d9e1d 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -1,129 +1,129 @@ module CodeRay
module Scanners
-
- class Delphi < Scanner
+
+ class Delphi < Scanner
- register_for :delphi
-
- RESERVED_WORDS = [
- 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
- 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
- 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
- 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
- 'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
- 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
- 'procedure', 'program', 'property', 'raise', 'record', 'repeat',
- 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
- 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
- 'xor', 'on'
- ]
+ register_for :delphi
+
+ RESERVED_WORDS = [
+ 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
+ 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
+ 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
+ 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
+ 'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
+ 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
+ 'procedure', 'program', 'property', 'raise', 'record', 'repeat',
+ 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
+ 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
+ 'xor', 'on'
+ ]
- DIRECTIVES = [
- 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
- 'contains', 'deprecated', 'dispid', 'dynamic', 'export',
- 'external', 'far', 'forward', 'implements', 'local',
- 'near', 'nodefault', 'on', 'overload', 'override',
- 'package', 'pascal', 'platform', 'private', 'protected', 'public',
- 'published', 'read', 'readonly', 'register', 'reintroduce',
- 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
- 'virtual', 'write', 'writeonly'
- ]
+ DIRECTIVES = [
+ 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
+ 'contains', 'deprecated', 'dispid', 'dynamic', 'export',
+ 'external', 'far', 'forward', 'implements', 'local',
+ 'near', 'nodefault', 'on', 'overload', 'override',
+ 'package', 'pascal', 'platform', 'private', 'protected', 'public',
+ 'published', 'read', 'readonly', 'register', 'reintroduce',
+ 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
+ 'virtual', 'write', 'writeonly'
+ ]
- IDENT_KIND = CaseIgnoringWordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(DIRECTIVES, :directive)
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
+ add(RESERVED_WORDS, :reserved).
+ add(DIRECTIVES, :directive)
- def scan_tokens tokens, options
+ def scan_tokens tokens, options
- state = :initial
+ state = :initial
- until eos?
+ until eos?
- kind = :error
- match = nil
+ kind = :error
+ match = nil
- if state == :initial
-
- if scan(/ \s+ /x)
- kind = :space
-
- elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
- kind = :preprocessor
-
- elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
- kind = :comment
-
- elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
- kind = :operator
-
- elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = IDENT_KIND[match]
-
- elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
- tokens << [:open, :char]
- tokens << ["'", :delimiter]
- tokens << [self[1], :content]
- tokens << ["'", :delimiter]
- tokens << [:close, :char]
- next
-
- elsif match = scan(/ ' /x)
- tokens << [:open, :string]
- state = :string
- kind = :delimiter
-
- elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
- kind = :char
-
- elsif scan(/ \$ [0-9A-Fa-f]+ /x)
- kind = :hex
-
- elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
- kind = :integer
-
- elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
- kind = :float
+ if state == :initial
+
+ if scan(/ \s+ /x)
+ kind = :space
+
+ elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+ kind = :preprocessor
+
+ elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+ kind = :comment
+
+ elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
+ kind = :operator
+
+ elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+ kind = IDENT_KIND[match]
+
+ elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
+ tokens << [:open, :char]
+ tokens << ["'", :delimiter]
+ tokens << [self[1], :content]
+ tokens << ["'", :delimiter]
+ tokens << [:close, :char]
+ next
+
+ elsif match = scan(/ ' /x)
+ tokens << [:open, :string]
+ state = :string
+ kind = :delimiter
+
+ elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+ kind = :char
+
+ elsif scan(/ \$ [0-9A-Fa-f]+ /x)
+ kind = :hex
+
+ elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+ kind = :integer
+
+ elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+ kind = :float
- else
- getch
- end
-
- elsif state == :string
- if scan(/[^\n']+/)
- kind = :content
- elsif scan(/''/)
- kind = :char
- elsif scan(/'/)
- tokens << ["'", :delimiter]
- tokens << [:close, :string]
- state = :initial
- next
- elsif scan(/\n/)
- state = :initial
- else
- raise "else case \' reached; %p not handled." % peek(1), tokens
- end
-
- else
- raise 'else-case reached', tokens
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
+ else
+ getch
+ end
+
+ elsif state == :string
+ if scan(/[^\n']+/)
+ kind = :content
+ elsif scan(/''/)
+ kind = :char
+ elsif scan(/'/)
+ tokens << ["'", :delimiter]
+ tokens << [:close, :string]
+ state = :initial
+ next
+ elsif scan(/\n/)
+ state = :initial
+ else
+ raise "else case \' reached; %p not handled." % peek(1), tokens
+ end
+
+ else
+ raise 'else-case reached', tokens
+
+ end
+
+ match ||= matched
+ if $DEBUG and (not kind or kind == :error)
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
- tokens << [match, kind]
-
- end
-
- tokens
- end
+ tokens << [match, kind]
+
+ end
+
+ tokens
+ end
- end
+ end
end
end
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index cbf8a55..7cdc07e 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,167 +1,167 @@ module CodeRay
module Scanners
- # HTML Scanner
- #
- # $Id$
- class HTML < Scanner
-
- include Streamable
- register_for :html
-
- ATTR_NAME = /[\w.:-]+/
- ATTR_VALUE_UNQUOTED = ATTR_NAME
- TAG_END = /\/?>/
- HEX = /[0-9a-fA-F]/
- ENTITY = /
- &
- (?:
- \w+
- |
- \#
- (?:
- \d+
- |
- x#{HEX}+
- )
- )
- ;
- /ox
-
- PLAIN_STRING_CONTENT = {
- "'" => /[^&'>\n]+/,
- '"' => /[^&">\n]+/,
- }
-
- private
- def setup
- @state = :initial
- @plain_string_content = nil
- end
-
- def scan_tokens tokens, options
-
- state = @state
- plain_string_content = @plain_string_content
-
- until eos?
-
- kind = :error
- match = nil
-
- if scan(/\s+/m)
- kind = :space
-
- else
-
- case state
-
- when :initial
- if scan(/<!--.*?-->/m)
- kind = :comment
- elsif scan(/<!DOCTYPE.*?>/m)
- kind = :preprocessor
- elsif scan(/<\?xml.*?\?>/m)
- kind = :preprocessor
- elsif scan(/<\?.*?\?>|<%.*?%>/m)
- kind = :comment
- elsif scan(/<\/[-\w_.:]*>/m)
- kind = :tag
- elsif match = scan(/<[-\w_.:]*>?/m)
- kind = :tag
- state = :attribute unless match[-1] == ?>
- elsif scan(/[^<>&]+/)
- kind = :plain
- elsif scan(/#{ENTITY}/ox)
- kind = :entity
- elsif scan(/[>&]/)
- kind = :error
- else
- raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
- end
-
- when :attribute
- if scan(/#{TAG_END}/)
- kind = :tag
- state = :initial
- elsif scan(/#{ATTR_NAME}/o)
- kind = :attribute_name
- state = :attribute_equal
- else
- getch
- end
-
- when :attribute_equal
- if scan(/=/)
- kind = :operator
- state = :attribute_value
- elsif scan(/#{ATTR_NAME}/o)
- kind = :attribute_name
- elsif scan(/#{TAG_END}/o)
- kind = :tag
- state = :initial
- elsif scan(/./)
- state = :attribute
- end
-
- when :attribute_value
- if scan(/#{ATTR_VALUE_UNQUOTED}/o)
- kind = :attribute_value
- state = :attribute
- elsif match = scan(/["']/)
- tokens << [:open, :string]
- state = :attribute_value_string
- plain_string_content = PLAIN_STRING_CONTENT[match]
- kind = :delimiter
- elsif scan(/#{TAG_END}/o)
- kind = :tag
- state = :initial
- else
- getch
- end
-
- when :attribute_value_string
- if scan(plain_string_content)
- kind = :content
- elsif scan(/['"]/)
- tokens << [matched, :delimiter]
- tokens << [:close, :string]
- state = :attribute
- next
- elsif scan(/#{ENTITY}/ox)
- kind = :entity
- elsif scan(/[\n>]/)
- tokens << [:close, :string]
- kind = :error
- state = :initial
- end
-
- else
- raise_inspect 'Unknown state: %p' % [state], tokens
-
- end
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
- end
-
- if options[:keep_state]
- @state = state
- @plain_string_content = plain_string_content
- end
-
- tokens
- end
-
- end
+ # HTML Scanner
+ #
+ # $Id$
+ class HTML < Scanner
+
+ include Streamable
+ register_for :html
+
+ ATTR_NAME = /[\w.:-]+/
+ ATTR_VALUE_UNQUOTED = ATTR_NAME
+ TAG_END = /\/?>/
+ HEX = /[0-9a-fA-F]/
+ ENTITY = /
+ &
+ (?:
+ \w+
+ |
+ \#
+ (?:
+ \d+
+ |
+ x#{HEX}+
+ )
+ )
+ ;
+ /ox
+
+ PLAIN_STRING_CONTENT = {
+ "'" => /[^&'>\n]+/,
+ '"' => /[^&">\n]+/,
+ }
+
+ private
+ def setup
+ @state = :initial
+ @plain_string_content = nil
+ end
+
+ def scan_tokens tokens, options
+
+ state = @state
+ plain_string_content = @plain_string_content
+
+ until eos?
+
+ kind = :error
+ match = nil
+
+ if scan(/\s+/m)
+ kind = :space
+
+ else
+
+ case state
+
+ when :initial
+ if scan(/<!--.*?-->/m)
+ kind = :comment
+ elsif scan(/<!DOCTYPE.*?>/m)
+ kind = :preprocessor
+ elsif scan(/<\?xml.*?\?>/m)
+ kind = :preprocessor
+ elsif scan(/<\?.*?\?>|<%.*?%>/m)
+ kind = :comment
+ elsif scan(/<\/[-\w_.:]*>/m)
+ kind = :tag
+ elsif match = scan(/<[-\w_.:]*>?/m)
+ kind = :tag
+ state = :attribute unless match[-1] == ?>
+ elsif scan(/[^<>&]+/)
+ kind = :plain
+ elsif scan(/#{ENTITY}/ox)
+ kind = :entity
+ elsif scan(/[>&]/)
+ kind = :error
+ else
+ raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
+ end
+
+ when :attribute
+ if scan(/#{TAG_END}/)
+ kind = :tag
+ state = :initial
+ elsif scan(/#{ATTR_NAME}/o)
+ kind = :attribute_name
+ state = :attribute_equal
+ else
+ getch
+ end
+
+ when :attribute_equal
+ if scan(/=/)
+ kind = :operator
+ state = :attribute_value
+ elsif scan(/#{ATTR_NAME}/o)
+ kind = :attribute_name
+ elsif scan(/#{TAG_END}/o)
+ kind = :tag
+ state = :initial
+ elsif scan(/./)
+ state = :attribute
+ end
+
+ when :attribute_value
+ if scan(/#{ATTR_VALUE_UNQUOTED}/o)
+ kind = :attribute_value
+ state = :attribute
+ elsif match = scan(/["']/)
+ tokens << [:open, :string]
+ state = :attribute_value_string
+ plain_string_content = PLAIN_STRING_CONTENT[match]
+ kind = :delimiter
+ elsif scan(/#{TAG_END}/o)
+ kind = :tag
+ state = :initial
+ else
+ getch
+ end
+
+ when :attribute_value_string
+ if scan(plain_string_content)
+ kind = :content
+ elsif scan(/['"]/)
+ tokens << [matched, :delimiter]
+ tokens << [:close, :string]
+ state = :attribute
+ next
+ elsif scan(/#{ENTITY}/ox)
+ kind = :entity
+ elsif scan(/[\n>]/)
+ tokens << [:close, :string]
+ kind = :error
+ state = :initial
+ end
+
+ else
+ raise_inspect 'Unknown state: %p' % [state], tokens
+
+ end
+
+ end
+
+ match ||= matched
+ if $DEBUG and (not kind or kind == :error)
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens
+ end
+ raise_inspect 'Empty token', tokens unless match
+
+ tokens << [match, kind]
+ end
+
+ if options[:keep_state]
+ @state = state
+ @plain_string_content = plain_string_content
+ end
+
+ tokens
+ end
+
+ end
end
end
diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb index 119924b..5955195 100644 --- a/lib/coderay/scanners/nitro_html.rb +++ b/lib/coderay/scanners/nitro_html.rb @@ -1,125 +1,125 @@ module CodeRay
module Scanners
- load :html
- load :ruby
-
- # RHTML Scanner
- #
- # $Id$
- class NitroHTML < Scanner
-
- include Streamable
- register_for :nitro_html
-
- NITRO_RUBY_BLOCK = /
- <\?r
- (?>
- [^\?]*
- (?> \?(?!>) [^\?]* )*
- )
- (?: \?> )?
- |
- <ruby>
- (?>
- [^<]*
- (?> <(?!\/ruby>) [^<]* )*
- )
- (?: <\/ruby> )?
- |
- <%
- (?>
- [^%]*
- (?> %(?!>) [^%]* )*
- )
- (?: %> )?
- /mx
-
- NITRO_VALUE_BLOCK = /
- \#
- (?:
- \{
- [^{}]*
- (?>
- \{ [^}]* \}
- (?> [^{}]* )
- )*
- \}?
- | \| [^|]* \|?
- | \( [^)]* \)?
- | \[ [^\]]* \]?
- | \\ [^\\]* \\?
- )
- /x
-
- NITRO_ENTITY = /
- % (?: \#\d+ | \w+ ) ;
- /
-
- START_OF_RUBY = /
- (?=[<\#%])
- < (?: \?r | % | ruby> )
- | \# [{(|]
- | % (?: \#\d+ | \w+ ) ;
- /x
-
- CLOSING_PAREN = Hash.new do |h, p|
- h[p] = p
- end.update( {
- '(' => ')',
- '[' => ']',
- '{' => '}',
- } )
-
- private
-
- def setup
- @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
- @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
- end
-
- def scan_tokens tokens, options
-
- until eos?
-
- if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
- @html_scanner.tokenize match
-
- elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
- start_tag = match[0,2]
- delimiter = CLOSING_PAREN[start_tag[1,1]]
- end_tag = match[-1,1] == delimiter ? delimiter : ''
- tokens << [:open, :inline]
- tokens << [start_tag, :delimiter]
- code = match[start_tag.size .. -1 - end_tag.size]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
- elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
- start_tag = '<?r'
- end_tag = match[-2,2] == '?>' ? '?>' : ''
- tokens << [:open, :inline]
- tokens << [start_tag, :delimiter]
- code = match[start_tag.size .. -(end_tag.size)-1]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
- elsif entity = scan(/#{NITRO_ENTITY}/o)
- tokens << [entity, :entity]
-
- else
- raise_inspect 'else-case reached!', tokens
- end
-
- end
-
- tokens
-
- end
-
- end
+ load :html
+ load :ruby
+
+ # RHTML Scanner
+ #
+ # $Id$
+ class NitroHTML < Scanner
+
+ include Streamable
+ register_for :nitro_html
+
+ NITRO_RUBY_BLOCK = /
+ <\?r
+ (?>
+ [^\?]*
+ (?> \?(?!>) [^\?]* )*
+ )
+ (?: \?> )?
+ |
+ <ruby>
+ (?>
+ [^<]*
+ (?> <(?!\/ruby>) [^<]* )*
+ )
+ (?: <\/ruby> )?
+ |
+ <%
+ (?>
+ [^%]*
+ (?> %(?!>) [^%]* )*
+ )
+ (?: %> )?
+ /mx
+
+ NITRO_VALUE_BLOCK = /
+ \#
+ (?:
+ \{
+ [^{}]*
+ (?>
+ \{ [^}]* \}
+ (?> [^{}]* )
+ )*
+ \}?
+ | \| [^|]* \|?
+ | \( [^)]* \)?
+ | \[ [^\]]* \]?
+ | \\ [^\\]* \\?
+ )
+ /x
+
+ NITRO_ENTITY = /
+ % (?: \#\d+ | \w+ ) ;
+ /
+
+ START_OF_RUBY = /
+ (?=[<\#%])
+ < (?: \?r | % | ruby> )
+ | \# [{(|]
+ | % (?: \#\d+ | \w+ ) ;
+ /x
+
+ CLOSING_PAREN = Hash.new do |h, p|
+ h[p] = p
+ end.update( {
+ '(' => ')',
+ '[' => ']',
+ '{' => '}',
+ } )
+
+ private
+
+ def setup
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+ end
+
+ def scan_tokens tokens, options
+
+ until eos?
+
+ if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
+ @html_scanner.tokenize match
+
+ elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
+ start_tag = match[0,2]
+ delimiter = CLOSING_PAREN[start_tag[1,1]]
+ end_tag = match[-1,1] == delimiter ? delimiter : ''
+ tokens << [:open, :inline]
+ tokens << [start_tag, :delimiter]
+ code = match[start_tag.size .. -1 - end_tag.size]
+ @ruby_scanner.tokenize code
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
+ tokens << [:close, :inline]
+
+ elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
+ start_tag = '<?r'
+ end_tag = match[-2,2] == '?>' ? '?>' : ''
+ tokens << [:open, :inline]
+ tokens << [start_tag, :delimiter]
+ code = match[start_tag.size .. -(end_tag.size)-1]
+ @ruby_scanner.tokenize code
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
+ tokens << [:close, :inline]
+
+ elsif entity = scan(/#{NITRO_ENTITY}/o)
+ tokens << [entity, :entity]
+
+ else
+ raise_inspect 'else-case reached!', tokens
+ end
+
+ end
+
+ tokens
+
+ end
+
+ end
end
end
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index 3824ee9..9007646 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -1,15 +1,15 @@ module CodeRay
module Scanners
- class Plaintext < Scanner
-
- register_for :plaintext, :plain
+ class Plaintext < Scanner
- def scan_tokens tokens, options
- tokens << [scan_until(/\z/), :plain]
- end
+ register_for :plaintext, :plain
- end
+ def scan_tokens tokens, options
+ tokens << [scan_until(/\z/), :plain]
+ end
+
+ end
end
end
diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 3b1817e..15a7566 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,65 +1,65 @@ module CodeRay
module Scanners
- load :html
- load :ruby
-
- # RHTML Scanner
- #
- # $Id$
- class RHTML < Scanner
-
- include Streamable
- register_for :rhtml
-
- ERB_RUBY_BLOCK = /
- <%(?!%)[=-]?
- (?>
- [^%]*
- (?> %(?!>) [^%]* )*
- )
- (?: %> )?
- /x
-
- START_OF_ERB = /
- <%(?!%)
- /x
-
- private
-
- def setup
- @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
- @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
- end
-
- def scan_tokens tokens, options
-
- until eos?
-
- if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
- @html_scanner.tokenize match
-
- elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
- start_tag = match[/\A<%[-=]?/]
- end_tag = match[/%?>?\z/]
- tokens << [:open, :inline]
- tokens << [start_tag, :delimiter]
- code = match[start_tag.size .. -1 - end_tag.size]
- @ruby_scanner.tokenize code
- tokens << [end_tag, :delimiter] unless end_tag.empty?
- tokens << [:close, :inline]
-
- else
- raise_inspect 'else-case reached!', tokens
- end
-
- end
-
- tokens
-
- end
-
- end
+ load :html
+ load :ruby
+
+ # RHTML Scanner
+ #
+ # $Id$
+ class RHTML < Scanner
+
+ include Streamable
+ register_for :rhtml
+
+ ERB_RUBY_BLOCK = /
+ <%(?!%)[=-]?
+ (?>
+ [^%]*
+ (?> %(?!>) [^%]* )*
+ )
+ (?: %> )?
+ /x
+
+ START_OF_ERB = /
+ <%(?!%)
+ /x
+
+ private
+
+ def setup
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+ end
+
+ def scan_tokens tokens, options
+
+ until eos?
+
+ if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
+ @html_scanner.tokenize match
+
+ elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
+ start_tag = match[/\A<%[-=]?/]
+ end_tag = match[/%?>?\z/]
+ tokens << [:open, :inline]
+ tokens << [start_tag, :delimiter]
+ code = match[start_tag.size .. -1 - end_tag.size]
+ @ruby_scanner.tokenize code
+ tokens << [end_tag, :delimiter] unless end_tag.empty?
+ tokens << [:close, :inline]
+
+ else
+ raise_inspect 'else-case reached!', tokens
+ end
+
+ end
+
+ tokens
+
+ end
+
+ end
end
end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 7ba3029..3ce5003 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -157,7 +157,7 @@ module Scanners next
# }}}
else
-# {{{
+# {{{
if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
fancy_allowed = true
@@ -190,7 +190,7 @@ module Scanners next
elsif state == :initial
-
+
# IDENTS #
if match = scan(/#{patterns::METHOD_NAME}/o)
if last_token_dot
@@ -205,7 +205,7 @@ module Scanners end
## experimental!
fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
-
+
# OPERATORS #
elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
(last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
@@ -228,12 +228,12 @@ module Scanners end
end
end
-
+
elsif match = scan(/ ['"] /mx)
tokens << [:open, :string]
type = :delimiter
state = patterns::StringState.new :string, match == '"', match # important for streaming
-
+
elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
type = :instance_variable
@@ -246,7 +246,7 @@ module Scanners tokens = []
saved_tokens = tokens
end
-
+
elsif match = scan(/#{patterns::NUMERIC}/o)
type = if self[1] then :float else :integer end
@@ -261,11 +261,11 @@ module Scanners else
type = :symbol
end
-
+
elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
regexp_allowed = fancy_allowed = :set
type = :operator
-
+
elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
indented = self[1] == '-'
quote = self[3]
@@ -277,7 +277,7 @@ module Scanners heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
heredocs ||= [] # create heredocs if empty
heredocs << heredoc
-
+
elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
raise_inspect 'Unknown fancy string: %%%p' % k, tokens
@@ -301,18 +301,18 @@ module Scanners type = :delimiter
state = patterns::StringState.new :shell, true, match
end
-
+
elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
type = :global_variable
-
+
elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
type = :class_variable
-
+
else
match = getch
-
+
end
-
+
elsif state == :def_expected
state = :initial
if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
@@ -341,7 +341,7 @@ module Scanners state = :initial
next
end
-
+
elsif state == :undef_comma_expected
if match = scan(/,/)
type = :operator
@@ -377,7 +377,7 @@ module Scanners raise_inspect 'Empty token', tokens unless match
tokens << [match, type]
-
+
if last_state
state = last_state
last_state = nil
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index c38739d..b1e0d1b 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -25,7 +25,7 @@ module Scanners ]
REGEXP_ALLOWED = WordList.new(false).
add(IDENTS_ALLOWING_REGEXP, :set)
-
+
PREDEFINED_CONSTANTS = %w[
nil true false self
DATA ARGV ARGF __FILE__ __LINE__
@@ -60,7 +60,7 @@ module Scanners '/'=> :regexp,
}
QUOTE_TO_TYPE.default = :string
-
+
REGEXP_MODIFIERS = /[mixounse]*/
REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/
@@ -145,17 +145,17 @@ module Scanners }
FancyStringType['w'] = FancyStringType['q']
FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
-
+
class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
:paren, :paren_depth, :pattern, :next_state
-
+
CLOSING_PAREN = Hash[ *%w[
( )
[ ]
< >
{ }
] ]
-
+
CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with <<
OPENING_PAREN = CLOSING_PAREN.invert
@@ -166,8 +166,8 @@ module Scanners delim_pattern << Regexp.escape(closing_paren)
end
-
- special_escapes =
+
+ special_escapes =
case interpreted
when :regexp_symbols
'| ' + REGEXP_SYMBOLS.source
diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb index d1c6bad..5ce8ce9 100644 --- a/lib/coderay/scanners/xml.rb +++ b/lib/coderay/scanners/xml.rb @@ -1,18 +1,18 @@ module CodeRay
module Scanners
- load :html
-
- # XML Scanner
- #
- # $Id$
- #
- # Currently this is the same scanner as Scanners::HTML.
- class XML < HTML
+ load :html
- register_for :xml
+ # XML Scanner
+ #
+ # $Id$
+ #
+ # Currently this is the same scanner as Scanners::HTML.
+ class XML < HTML
- end
+ register_for :xml
+
+ end
end
end
|