From 9f4c7ab7553f9be7c9d14da0ba7462ad746c2f5d Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 10 Jul 2006 00:32:57 +0000 Subject: Big re-indenting - no more tabs! --- lib/coderay/scanners/_map.rb | 18 +- lib/coderay/scanners/c.rb | 300 +++++++++++++++---------------- lib/coderay/scanners/delphi.rb | 226 ++++++++++++------------ lib/coderay/scanners/html.rb | 322 +++++++++++++++++----------------- lib/coderay/scanners/nitro_html.rb | 238 ++++++++++++------------- lib/coderay/scanners/plaintext.rb | 14 +- lib/coderay/scanners/rhtml.rb | 118 ++++++------- lib/coderay/scanners/ruby.rb | 32 ++-- lib/coderay/scanners/ruby/patterns.rb | 14 +- lib/coderay/scanners/xml.rb | 20 +-- 10 files changed, 651 insertions(+), 651 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 1482ee9..6268a6c 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,14 @@ module CodeRay module Scanners - - map :cpp => :c, - :plain => :plaintext, - :pascal => :delphi, - :irb => :ruby, - :xml => :html, - :xhtml => :nitro_html - default :plain - + map :cpp => :c, + :plain => :plaintext, + :pascal => :delphi, + :irb => :ruby, + :xml => :html, + :xhtml => :nitro_html + + default :plain + end end diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index effaaa1..66b8de1 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,155 @@ module CodeRay module Scanners - - class C < Scanner - - register_for :c - - RESERVED_WORDS = [ - 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', - 'for', 'goto', 'if', 'return', 'switch', 'while', - 'struct', 'union', 'enum', 'typedef', - 'static', 'register', 'auto', 'extern', - 'sizeof', - 'volatile', 'const', # C89 - 'inline', 'restrict', # C99 - ] - - PREDEFINED_TYPES = [ - 'int', 'long', 'short', 'char', 'void', - 'signed', 'unsigned', 'float', 'double', - 'bool', 'complex', # C99 - ] - - PREDEFINED_CONSTANTS = [ - 'EOF', 'NULL', - 'true', 'false', # C99 - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_TYPES, :pre_type). - add(PREDEFINED_CONSTANTS, :pre_constant) - - ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - case state - - when :initial - - if scan(/ \s+ | \\\n /x) - kind = :space - - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment - - elsif match = scan(/ \# \s* if \s* 0 /x) - match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment - - elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - match << scan(/:/) - kind = :label - end - - elsif match = scan(/L?"/) - tokens << [:open, :string] - if match[0] == ?L - tokens << ['L', :modifier] - match = '"' - end - state = :string - kind = :delimiter - - elsif scan(/#\s*(\w*)/) - kind = :preprocessor # FIXME multiline preprocs - state = :include_expected if self[1] == 'include' - - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) - kind = :char - - elsif scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) - kind = :oct - - elsif scan(/(?:\d+)(?![.eEfF])/) - kind = :integer - - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) - kind = :float - - else - getch - end - - when :string - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - kind = :error - state = :initial - else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens - end - - when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include - state = :initial - - elsif match = scan(/\s+/) - kind = :space - state = :initial if match.index ?\n - - else - getch - - end - - else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = :error + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + end + + when :string + if scan(/[^\\"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and (not kind or kind == :error) + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + tokens + end + + end end end diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index c92fab5..d9d9e1d 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -1,129 +1,129 @@ module CodeRay module Scanners - - class Delphi < Scanner + + class Delphi < Scanner - register_for :delphi - - RESERVED_WORDS = [ - 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', - 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', - 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', - 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', - 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', - 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', - 'procedure', 'program', 'property', 'raise', 'record', 'repeat', - 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', - 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', - 'xor', 'on' - ] + register_for :delphi + + RESERVED_WORDS = [ + 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', + 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', + 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', + 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', + 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', + 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', + 'procedure', 'program', 'property', 'raise', 'record', 'repeat', + 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', + 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', + 'xor', 'on' + ] - DIRECTIVES = [ - 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', - 'contains', 'deprecated', 'dispid', 'dynamic', 'export', - 'external', 'far', 'forward', 'implements', 'local', - 'near', 'nodefault', 'on', 'overload', 'override', - 'package', 'pascal', 'platform', 'private', 'protected', 'public', - 'published', 'read', 'readonly', 'register', 'reintroduce', - 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', - 'virtual', 'write', 'writeonly' - ] + DIRECTIVES = [ + 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', + 'contains', 'deprecated', 'dispid', 'dynamic', 'export', + 'external', 'far', 'forward', 'implements', 'local', + 'near', 'nodefault', 'on', 'overload', 'override', + 'package', 'pascal', 'platform', 'private', 'protected', 'public', + 'published', 'read', 'readonly', 'register', 'reintroduce', + 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', + 'virtual', 'write', 'writeonly' + ] - IDENT_KIND = CaseIgnoringWordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(DIRECTIVES, :directive) + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(DIRECTIVES, :directive) - def scan_tokens tokens, options + def scan_tokens tokens, options - state = :initial + state = :initial - until eos? + until eos? - kind = :error - match = nil + kind = :error + match = nil - if state == :initial - - if scan(/ \s+ /x) - kind = :space - - elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) - kind = :preprocessor - - elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) - kind = :comment - - elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - - elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) - tokens << [:open, :char] - tokens << ["'", :delimiter] - tokens << [self[1], :content] - tokens << ["'", :delimiter] - tokens << [:close, :char] - next - - elsif match = scan(/ ' /x) - tokens << [:open, :string] - state = :string - kind = :delimiter - - elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) - kind = :char - - elsif scan(/ \$ [0-9A-Fa-f]+ /x) - kind = :hex - - elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) - kind = :integer - - elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) - kind = :float + if state == :initial + + if scan(/ \s+ /x) + kind = :space + + elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + kind = :preprocessor + + elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) + kind = :comment + + elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + + elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) + tokens << [:open, :char] + tokens << ["'", :delimiter] + tokens << [self[1], :content] + tokens << ["'", :delimiter] + tokens << [:close, :char] + next + + elsif match = scan(/ ' /x) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) + kind = :char + + elsif scan(/ \$ [0-9A-Fa-f]+ /x) + kind = :hex + + elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) + kind = :integer + + elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) + kind = :float - else - getch - end - - elsif state == :string - if scan(/[^\n']+/) - kind = :content - elsif scan(/''/) - kind = :char - elsif scan(/'/) - tokens << ["'", :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/\n/) - state = :initial - else - raise "else case \' reached; %p not handled." % peek(1), tokens - end - - else - raise 'else-case reached', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match + else + getch + end + + elsif state == :string + if scan(/[^\n']+/) + kind = :content + elsif scan(/''/) + kind = :char + elsif scan(/'/) + tokens << ["'", :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/\n/) + state = :initial + else + raise "else case \' reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched + if $DEBUG and (not kind or kind == :error) + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match - tokens << [match, kind] - - end - - tokens - end + tokens << [match, kind] + + end + + tokens + end - end + end end end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index cbf8a55..7cdc07e 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,167 +1,167 @@ module CodeRay module Scanners - # HTML Scanner - # - # $Id$ - class HTML < Scanner - - include Streamable - register_for :html - - ATTR_NAME = /[\w.:-]+/ - ATTR_VALUE_UNQUOTED = ATTR_NAME - TAG_END = /\/?>/ - HEX = /[0-9a-fA-F]/ - ENTITY = / - & - (?: - \w+ - | - \# - (?: - \d+ - | - x#{HEX}+ - ) - ) - ; - /ox - - PLAIN_STRING_CONTENT = { - "'" => /[^&'>\n]+/, - '"' => /[^&">\n]+/, - } - - private - def setup - @state = :initial - @plain_string_content = nil - end - - def scan_tokens tokens, options - - state = @state - plain_string_content = @plain_string_content - - until eos? - - kind = :error - match = nil - - if scan(/\s+/m) - kind = :space - - else - - case state - - when :initial - if scan(//m) - kind = :comment - elsif scan(//m) - kind = :preprocessor - elsif scan(/<\?xml.*?\?>/m) - kind = :preprocessor - elsif scan(/<\?.*?\?>|<%.*?%>/m) - kind = :comment - elsif scan(/<\/[-\w_.:]*>/m) - kind = :tag - elsif match = scan(/<[-\w_.:]*>?/m) - kind = :tag - state = :attribute unless match[-1] == ?> - elsif scan(/[^<>&]+/) - kind = :plain - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[>&]/) - kind = :error - else - raise_inspect '[BUG] else-case reached with state %p' % [state], tokens - end - - when :attribute - if scan(/#{TAG_END}/) - kind = :tag - state = :initial - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - state = :attribute_equal - else - getch - end - - when :attribute_equal - if scan(/=/) - kind = :operator - state = :attribute_value - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - elsif scan(/./) - state = :attribute - end - - when :attribute_value - if scan(/#{ATTR_VALUE_UNQUOTED}/o) - kind = :attribute_value - state = :attribute - elsif match = scan(/["']/) - tokens << [:open, :string] - state = :attribute_value_string - plain_string_content = PLAIN_STRING_CONTENT[match] - kind = :delimiter - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - else - getch - end - - when :attribute_value_string - if scan(plain_string_content) - kind = :content - elsif scan(/['"]/) - tokens << [matched, :delimiter] - tokens << [:close, :string] - state = :attribute - next - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[\n>]/) - tokens << [:close, :string] - kind = :error - state = :initial - end - - else - raise_inspect 'Unknown state: %p' % [state], tokens - - end - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end - - if options[:keep_state] - @state = state - @plain_string_content = plain_string_content - end - - tokens - end - - end + # HTML Scanner + # + # $Id$ + class HTML < Scanner + + include Streamable + register_for :html + + ATTR_NAME = /[\w.:-]+/ + ATTR_VALUE_UNQUOTED = ATTR_NAME + TAG_END = /\/?>/ + HEX = /[0-9a-fA-F]/ + ENTITY = / + & + (?: + \w+ + | + \# + (?: + \d+ + | + x#{HEX}+ + ) + ) + ; + /ox + + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + + private + def setup + @state = :initial + @plain_string_content = nil + end + + def scan_tokens tokens, options + + state = @state + plain_string_content = @plain_string_content + + until eos? + + kind = :error + match = nil + + if scan(/\s+/m) + kind = :space + + else + + case state + + when :initial + if scan(//m) + kind = :comment + elsif scan(//m) + kind = :preprocessor + elsif scan(/<\?xml.*?\?>/m) + kind = :preprocessor + elsif scan(/<\?.*?\?>|<%.*?%>/m) + kind = :comment + elsif scan(/<\/[-\w_.:]*>/m) + kind = :tag + elsif match = scan(/<[-\w_.:]*>?/m) + kind = :tag + state = :attribute unless match[-1] == ?> + elsif scan(/[^<>&]+/) + kind = :plain + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[>&]/) + kind = :error + else + raise_inspect '[BUG] else-case reached with state %p' % [state], tokens + end + + when :attribute + if scan(/#{TAG_END}/) + kind = :tag + state = :initial + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + state = :attribute_equal + else + getch + end + + when :attribute_equal + if scan(/=/) + kind = :operator + state = :attribute_value + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + elsif scan(/./) + state = :attribute + end + + when :attribute_value + if scan(/#{ATTR_VALUE_UNQUOTED}/o) + kind = :attribute_value + state = :attribute + elsif match = scan(/["']/) + tokens << [:open, :string] + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + kind = :delimiter + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + else + getch + end + + when :attribute_value_string + if scan(plain_string_content) + kind = :content + elsif scan(/['"]/) + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :attribute + next + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[\n>]/) + tokens << [:close, :string] + kind = :error + state = :initial + end + + else + raise_inspect 'Unknown state: %p' % [state], tokens + + end + + end + + match ||= matched + if $DEBUG and (not kind or kind == :error) + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + end + + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + + tokens + end + + end end end diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb index 119924b..5955195 100644 --- a/lib/coderay/scanners/nitro_html.rb +++ b/lib/coderay/scanners/nitro_html.rb @@ -1,125 +1,125 @@ module CodeRay module Scanners - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class NitroHTML < Scanner - - include Streamable - register_for :nitro_html - - NITRO_RUBY_BLOCK = / - <\?r - (?> - [^\?]* - (?> \?(?!>) [^\?]* )* - ) - (?: \?> )? - | - - (?> - [^<]* - (?> <(?!\/ruby>) [^<]* )* - ) - (?: <\/ruby> )? - | - <% - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /mx - - NITRO_VALUE_BLOCK = / - \# - (?: - \{ - [^{}]* - (?> - \{ [^}]* \} - (?> [^{}]* ) - )* - \}? - | \| [^|]* \|? - | \( [^)]* \)? - | \[ [^\]]* \]? - | \\ [^\\]* \\? - ) - /x - - NITRO_ENTITY = / - % (?: \#\d+ | \w+ ) ; - / - - START_OF_RUBY = / - (?=[<\#%]) - < (?: \?r | % | ruby> ) - | \# [{(|] - | % (?: \#\d+ | \w+ ) ; - /x - - CLOSING_PAREN = Hash.new do |h, p| - h[p] = p - end.update( { - '(' => ')', - '[' => ']', - '{' => '}', - } ) - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) - start_tag = match[0,2] - delimiter = CLOSING_PAREN[start_tag[1,1]] - end_tag = match[-1,1] == delimiter ? delimiter : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) - start_tag = '' ? '?>' : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -(end_tag.size)-1] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif entity = scan(/#{NITRO_ENTITY}/o) - tokens << [entity, :entity] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class NitroHTML < Scanner + + include Streamable + register_for :nitro_html + + NITRO_RUBY_BLOCK = / + <\?r + (?> + [^\?]* + (?> \?(?!>) [^\?]* )* + ) + (?: \?> )? + | + + (?> + [^<]* + (?> <(?!\/ruby>) [^<]* )* + ) + (?: <\/ruby> )? + | + <% + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /mx + + NITRO_VALUE_BLOCK = / + \# + (?: + \{ + [^{}]* + (?> + \{ [^}]* \} + (?> [^{}]* ) + )* + \}? + | \| [^|]* \|? + | \( [^)]* \)? + | \[ [^\]]* \]? + | \\ [^\\]* \\? + ) + /x + + NITRO_ENTITY = / + % (?: \#\d+ | \w+ ) ; + / + + START_OF_RUBY = / + (?=[<\#%]) + < (?: \?r | % | ruby> ) + | \# [{(|] + | % (?: \#\d+ | \w+ ) ; + /x + + CLOSING_PAREN = Hash.new do |h, p| + h[p] = p + end.update( { + '(' => ')', + '[' => ']', + '{' => '}', + } ) + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) + start_tag = match[0,2] + delimiter = CLOSING_PAREN[start_tag[1,1]] + end_tag = match[-1,1] == delimiter ? delimiter : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) + start_tag = '' ? '?>' : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -(end_tag.size)-1] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif entity = scan(/#{NITRO_ENTITY}/o) + tokens << [entity, :entity] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end end end diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb index 3824ee9..9007646 100644 --- a/lib/coderay/scanners/plaintext.rb +++ b/lib/coderay/scanners/plaintext.rb @@ -1,15 +1,15 @@ module CodeRay module Scanners - class Plaintext < Scanner - - register_for :plaintext, :plain + class Plaintext < Scanner - def scan_tokens tokens, options - tokens << [scan_until(/\z/), :plain] - end + register_for :plaintext, :plain - end + def scan_tokens tokens, options + tokens << [scan_until(/\z/), :plain] + end + + end end end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 3b1817e..15a7566 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,65 +1,65 @@ module CodeRay module Scanners - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class RHTML < Scanner - - include Streamable - register_for :rhtml - - ERB_RUBY_BLOCK = / - <%(?!%)[=-]? - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /x - - START_OF_ERB = / - <%(?!%) - /x - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{ERB_RUBY_BLOCK}/o) - start_tag = match[/\A<%[-=]?/] - end_tag = match[/%?>?\z/] - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class RHTML < Scanner + + include Streamable + register_for :rhtml + + ERB_RUBY_BLOCK = / + <%(?!%)[=-]? + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /x + + START_OF_ERB = / + <%(?!%) + /x + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) + start_tag = match[/\A<%[-=]?/] + end_tag = match[/%?>?\z/] + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end end end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 7ba3029..3ce5003 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -157,7 +157,7 @@ module Scanners next # }}} else -# {{{ +# {{{ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) fancy_allowed = true @@ -190,7 +190,7 @@ module Scanners next elsif state == :initial - + # IDENTS # if match = scan(/#{patterns::METHOD_NAME}/o) if last_token_dot @@ -205,7 +205,7 @@ module Scanners end ## experimental! fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) - + # OPERATORS # elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) @@ -228,12 +228,12 @@ module Scanners end end end - + elsif match = scan(/ ['"] /mx) tokens << [:open, :string] type = :delimiter state = patterns::StringState.new :string, match == '"', match # important for streaming - + elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) type = :instance_variable @@ -246,7 +246,7 @@ module Scanners tokens = [] saved_tokens = tokens end - + elsif match = scan(/#{patterns::NUMERIC}/o) type = if self[1] then :float else :integer end @@ -261,11 +261,11 @@ module Scanners else type = :symbol end - + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) regexp_allowed = fancy_allowed = :set type = :operator - + elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) indented = self[1] == '-' quote = self[3] @@ -277,7 +277,7 @@ module Scanners heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) heredocs ||= [] # create heredocs if empty heredocs << heredoc - + elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) type, interpreted = *patterns::FancyStringType.fetch(self[1]) do raise_inspect 'Unknown fancy string: %%%p' % k, tokens @@ -301,18 +301,18 @@ module Scanners type = :delimiter state = patterns::StringState.new :shell, true, match end - + elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) type = :global_variable - + elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) type = :class_variable - + else match = getch - + end - + elsif state == :def_expected state = :initial if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) @@ -341,7 +341,7 @@ module Scanners state = :initial next end - + elsif state == :undef_comma_expected if match = scan(/,/) type = :operator @@ -377,7 +377,7 @@ module Scanners raise_inspect 'Empty token', tokens unless match tokens << [match, type] - + if last_state state = last_state last_state = nil diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index c38739d..b1e0d1b 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -25,7 +25,7 @@ module Scanners ] REGEXP_ALLOWED = WordList.new(false). add(IDENTS_ALLOWING_REGEXP, :set) - + PREDEFINED_CONSTANTS = %w[ nil true false self DATA ARGV ARGF __FILE__ __LINE__ @@ -60,7 +60,7 @@ module Scanners '/'=> :regexp, } QUOTE_TO_TYPE.default = :string - + REGEXP_MODIFIERS = /[mixounse]*/ REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ @@ -145,17 +145,17 @@ module Scanners } FancyStringType['w'] = FancyStringType['q'] FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q'] - + class StringState < Struct.new :type, :interpreted, :delim, :heredoc, :paren, :paren_depth, :pattern, :next_state - + CLOSING_PAREN = Hash[ *%w[ ( ) [ ] < > { } ] ] - + CLOSING_PAREN.values.each { |o| o.freeze } # debug, if I try to change it with << OPENING_PAREN = CLOSING_PAREN.invert @@ -166,8 +166,8 @@ module Scanners delim_pattern << Regexp.escape(closing_paren) end - - special_escapes = + + special_escapes = case interpreted when :regexp_symbols '| ' + REGEXP_SYMBOLS.source diff --git a/lib/coderay/scanners/xml.rb b/lib/coderay/scanners/xml.rb index d1c6bad..5ce8ce9 100644 --- a/lib/coderay/scanners/xml.rb +++ b/lib/coderay/scanners/xml.rb @@ -1,18 +1,18 @@ module CodeRay module Scanners - load :html - - # XML Scanner - # - # $Id$ - # - # Currently this is the same scanner as Scanners::HTML. - class XML < HTML + load :html - register_for :xml + # XML Scanner + # + # $Id$ + # + # Currently this is the same scanner as Scanners::HTML. + class XML < HTML - end + register_for :xml + + end end end -- cgit v1.2.1