diff options
author | murphy <murphy@rubychan.de> | 2009-10-19 16:52:59 +0000 |
---|---|---|
committer | murphy <murphy@rubychan.de> | 2009-10-19 16:52:59 +0000 |
commit | a6f93eb4304a062af43083a75d525acc1af9883d (patch) | |
tree | 624996688e5b983a38ba08f9ea05a07017d0e432 /lib/coderay | |
parent | 4f67b22339ea7025dc4536a7e369f60755103acc (diff) | |
download | coderay-a6f93eb4304a062af43083a75d525acc1af9883d.tar.gz |
New Scanner: *C++* (#76)!
There's a problem with the ternary operator (?:) and labels which
needs to be fixed in C, C++ and PHP scanners. I'll get to that soon.
Diffstat (limited to 'lib/coderay')
-rw-r--r-- | lib/coderay/scanners/_map.rb | 4 | ||||
-rw-r--r-- | lib/coderay/scanners/cpp.rb | 197 |
2 files changed, 200 insertions, 1 deletions
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 9f08d7d..01078c1 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -2,7 +2,9 @@ module CodeRay module Scanners map \ - :cpp => :c, + :h => :c, + :cplusplus => :cpp, + :'c++' => :cpp, :ecma => :java_script, :ecmascript => :java_script, :ecma_script => :java_script, diff --git a/lib/coderay/scanners/cpp.rb b/lib/coderay/scanners/cpp.rb new file mode 100644 index 0000000..7ba9ec0 --- /dev/null +++ b/lib/coderay/scanners/cpp.rb @@ -0,0 +1,197 @@ +module CodeRay +module Scanners + + class CPlusPlus < Scanner + + include Streamable + + register_for :cpp + file_extension 'cpp' + title 'C++' + + # http://www.cppreference.com/wiki/keywords/start + RESERVED_WORDS = [ + 'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break', + 'case', 'catch', 'class', 'compl', 'const_cast', + 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else', + 'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new', + 'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return', + 'sizeof', 'static_cast', 'struct', 'switch', 'template', + 'throw', 'try', 'typedef', 'typeid', 'typename', 'union', + 'while', 'xor', 'xor_eq' + ] + + PREDEFINED_TYPES = [ + 'bool', 'char', 'double', 'float', 'int', 'long', + 'short', 'signed', 'unsigned', 'wchar_t', 'string' + ] + PREDEFINED_CONSTANTS = [ + 'false', 'true', + 'EOF', 'NULL', + ] + PREDEFINED_VARIABLES = [ + 'this' + ] + DIRECTIVES = [ + 'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator', + 'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void', + 'volatile' + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_VARIABLES, :local_variable). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + # FIXME: don't match a?b:c + kind = :label + elsif match == 'class' + state = :class_name_expected + end + + elsif scan(/\$/) + kind = :ident + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])L?L?/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + state = :initial + next + + end + + when :class_name_expected + if scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = :class + state = :initial + + elsif match = scan(/\s+/) + kind = :space + + else + getch + kind = :error + state = :initial + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end |