diff options
Diffstat (limited to 'lib/coderay/scanners/c.rb')
-rw-r--r-- | lib/coderay/scanners/c.rb | 318 |
1 files changed, 163 insertions, 155 deletions
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index 66b8de1..be113d0 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,163 @@ -module CodeRay
-module Scanners
-
- class C < Scanner
-
- register_for :c
-
- RESERVED_WORDS = [
- 'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
- 'for', 'goto', 'if', 'return', 'switch', 'while',
- 'struct', 'union', 'enum', 'typedef',
- 'static', 'register', 'auto', 'extern',
- 'sizeof',
- 'volatile', 'const', # C89
- 'inline', 'restrict', # C99
- ]
-
- PREDEFINED_TYPES = [
- 'int', 'long', 'short', 'char', 'void',
- 'signed', 'unsigned', 'float', 'double',
- 'bool', 'complex', # C99
- ]
-
- PREDEFINED_CONSTANTS = [
- 'EOF', 'NULL',
- 'true', 'false', # C99
- ]
-
- IDENT_KIND = WordList.new(:ident).
- add(RESERVED_WORDS, :reserved).
- add(PREDEFINED_TYPES, :pre_type).
- add(PREDEFINED_CONSTANTS, :pre_constant)
-
- ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
-
- def scan_tokens tokens, options
-
- state = :initial
-
- until eos?
-
- kind = :error
- match = nil
-
- case state
-
- when :initial
-
- if scan(/ \s+ | \\\n /x)
- kind = :space
-
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
- kind = :comment
-
- elsif match = scan(/ \# \s* if \s* 0 /x)
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
- kind = :comment
-
- elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
- kind = :operator
-
- elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
- kind = IDENT_KIND[match]
- if kind == :ident and check(/:(?!:)/)
- match << scan(/:/)
- kind = :label
- end
-
- elsif match = scan(/L?"/)
- tokens << [:open, :string]
- if match[0] == ?L
- tokens << ['L', :modifier]
- match = '"'
- end
- state = :string
- kind = :delimiter
-
- elsif scan(/#\s*(\w*)/)
- kind = :preprocessor # FIXME multiline preprocs
- state = :include_expected if self[1] == 'include'
-
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
- kind = :char
-
- elsif scan(/0[xX][0-9A-Fa-f]+/)
- kind = :hex
-
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
- kind = :oct
-
- elsif scan(/(?:\d+)(?![.eEfF])/)
- kind = :integer
-
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
- kind = :float
-
- else
- getch
- end
-
- when :string
- if scan(/[^\\"]+/)
- kind = :content
- elsif scan(/"/)
- tokens << ['"', :delimiter]
- tokens << [:close, :string]
- state = :initial
- next
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
- kind = :char
- elsif scan(/ \\ | $ /x)
- kind = :error
- state = :initial
- else
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
- end
-
- when :include_expected
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
- kind = :include
- state = :initial
-
- elsif match = scan(/\s+/)
- kind = :space
- state = :initial if match.index ?\n
-
- else
- getch
-
- end
-
- else
- raise_inspect 'Unknown state', tokens
-
- end
-
- match ||= matched
- if $DEBUG and (not kind or kind == :error)
- raise_inspect 'Error token %p in line %d' %
- [[match, kind], line], tokens
- end
- raise_inspect 'Empty token', tokens unless match
-
- tokens << [match, kind]
-
- end
-
- tokens
- end
-
- end
-
-end
-end
+module CodeRay +module Scanners + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + kind = :error + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end |