From 432aeb74d5a49e6f0efd1063113cef099c93aef6 Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 28 Dec 2009 07:27:12 +0000 Subject: Copying changes and fixes for 0.9.0rc3 from terminal-encoder branch over to trunk. --- lib/coderay/encoders/term.rb | 137 +++++++++++++++++++++++ lib/coderay/scanners/c.rb | 55 +++++++--- lib/coderay/scanners/cpp.rb | 47 ++++++-- lib/coderay/scanners/java_script.rb | 3 +- lib/coderay/scanners/nitro_xhtml.rb | 1 + lib/coderay/scanners/php.rb | 197 ++++++++++++++++++++++++---------- lib/coderay/scanners/ruby.rb | 4 +- lib/coderay/scanners/ruby/patterns.rb | 4 +- lib/coderay/scanners/scheme.rb | 3 + 9 files changed, 372 insertions(+), 79 deletions(-) create mode 100644 lib/coderay/encoders/term.rb (limited to 'lib/coderay') diff --git a/lib/coderay/encoders/term.rb b/lib/coderay/encoders/term.rb new file mode 100644 index 0000000..287529f --- /dev/null +++ b/lib/coderay/encoders/term.rb @@ -0,0 +1,137 @@ +# encoders/term.rb +# By Rob Aldred (http://robaldred.co.uk) +# Based on idea by Nathan Weizenbaum (http://nex-3.com) +# MIT License (http://www.opensource.org/licenses/mit-license.php) +# +# A CodeRay encoder that outputs code highlighted for a color terminal. +# Check out http://robaldred.co.uk + +module CodeRay + module Encoders + class Term < Encoder + register_for :term + + TOKEN_COLORS = { + :attribute_name => '33', + :attribute_name_fat => '33', + :attribute_value => '31', + :attribute_value_fat => '31', + :bin => '1;35', + :char => {:self => '36', :delimiter => '34'}, + :class => '1;35', + :class_variable => '36', + :color => '32', + :comment => '37', + :constant => ['34', '4'], + :definition => '1;32', + :directive => ['32', '4'], + :doc => '46', + :doc_string => ['31', '4'], + :entity => '33', + :error => ['1;33', '41'], + :exception => '1;31', + :float => '1;35', + :function => '1;34', + :global_variable => '42', + :hex => '1;36', + :include => '33', + :integer => '1;34', + :interpreted => '1;35', + :label => '1;4', + :local_variable => '33', + :oct => '1;35', + :operator_name => '1;29', + :pre_constant => '1;36', + :pre_type => '1;30', + :predefined => ['4', '1;34'], + :preprocessor => '36', + :regexp => { + :content => '31', + :delimiter => '1;29', + :modifier => '35', + :function => '1;29' + }, + :reserved => '1;31', + :shell => {:self => '42', :content => '1;29'}, + :string => '32', + :symbol => '1;32', + :tag => '34', + :tag_fat => '1;34', + :tag_special => ['34', '4'], + :type => '1;34', + :variable => '34' + } + TOKEN_COLORS[:procedure] = TOKEN_COLORS[:method] = TOKEN_COLORS[:function] + TOKEN_COLORS[:open] = TOKEN_COLORS[:close] = TOKEN_COLORS[:nesting_delimiter] = TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter] + + protected + + def setup(options) + @out = '' + @opened = [nil] + @subcolors = nil + end + + def finish(options) + super + end + + def token text, type = :plain + case text + + when nil + # raise 'Token with nil as text was given: %p' % [[text, type]] + + when String + + if color = (@subcolors || TOKEN_COLORS)[type] + color = color[:self] || return if Hash === color + + @out << col(color) + text.gsub("\n", col(0) + "\n" + col(color)) + col(0) + @out << col(@subcolors[:self]) if @subcolors && @subcolors[:self] + else + @out << text + end + + # token groups, eg. strings + when :open + @opened[0] = type + if color = TOKEN_COLORS[type] + if Hash === color + @subcolors = color + @out << col(color[:self]) if color[:self] + else + @subcolors = {} + @out << col(color) + end + end + @opened << type + when :close + if @opened.empty? + # nothing to close + else + if (@subcolors || {})[:self] + @out << col(0) + end + @subcolors = nil + @opened.pop + end + + # whole lines to be highlighted, eg. a added/modified/deleted lines in a diff + when :begin_line + + when :end_line + + else + raise 'unknown token kind: %p' % [text] + end + end + + private + + def col(color) + Array(color).map { |c| "\e[#{c}m" }.join + end + end + end +end \ No newline at end of file diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index f175204..ac1d0d2 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -12,23 +12,23 @@ module Scanners 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', 'enum', 'for', 'goto', 'if', 'return', 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while', - 'restrict', # C99 + 'restrict', # added in C99 ] PREDEFINED_TYPES = [ 'int', 'long', 'short', 'char', 'signed', 'unsigned', 'float', 'double', - 'bool', 'complex', # C99 + 'bool', 'complex', # added in C99 ] PREDEFINED_CONSTANTS = [ 'EOF', 'NULL', - 'true', 'false', # C99 + 'true', 'false', # added in C99 ] DIRECTIVES = [ 'auto', 'extern', 'register', 'static', 'void', - 'const', 'volatile', # C89 - 'inline', # C99 + 'const', 'volatile', # added in C89 + 'inline', # added in C99 ] IDENT_KIND = WordList.new(:ident). @@ -43,6 +43,10 @@ module Scanners def scan_tokens tokens, options state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false until eos? @@ -53,8 +57,13 @@ module Scanners when :initial - if scan(/ \s+ | \\\n /x) - kind = :space + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + tokens << [match, :space] + next elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) kind = :comment @@ -63,14 +72,27 @@ module Scanners match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? kind = :comment - elsif scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end kind = :operator elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - # FIXME: don't match a?b:c + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) kind = :label + match << matched + else + label_expected = false + if kind == :reserved + case match + when 'case', 'default' + case_expected = true + end + end end elsif scan(/\$/) @@ -85,23 +107,30 @@ module Scanners state = :string kind = :delimiter - elsif scan(/#\s*(\w*)/) - kind = :preprocessor # FIXME multiline preprocs + elsif scan(/#[ \t]*(\w*)/) + kind = :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false kind = :char elsif scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false kind = :hex elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false kind = :oct elsif scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false kind = :integer elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false kind = :float else @@ -117,6 +146,7 @@ module Scanners tokens << ['"', :delimiter] tokens << [:close, :string] state = :initial + label_expected = false next elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) kind = :char @@ -124,6 +154,7 @@ module Scanners tokens << [:close, :string] kind = :error state = :initial + label_expected = false else raise_inspect "else case \" reached; %p not handled." % peek(1), tokens end diff --git a/lib/coderay/scanners/cpp.rb b/lib/coderay/scanners/cpp.rb index 0b92ef9..6af5066 100644 --- a/lib/coderay/scanners/cpp.rb +++ b/lib/coderay/scanners/cpp.rb @@ -51,6 +51,10 @@ module Scanners def scan_tokens tokens, options state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false until eos? @@ -61,8 +65,13 @@ module Scanners when :initial - if scan(/ \s+ | \\\n /x) - kind = :space + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + tokens << [match, :space] + next elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) kind = :comment @@ -71,16 +80,29 @@ module Scanners match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? kind = :comment - elsif scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end kind = :operator elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - # FIXME: don't match a?b:c + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) kind = :label - elsif match == 'class' - state = :class_name_expected + match << matched + else + label_expected = false + if kind == :reserved + case match + when 'class' + state = :class_name_expected + when 'case', 'default' + case_expected = true + end + end end elsif scan(/\$/) @@ -95,23 +117,30 @@ module Scanners state = :string kind = :delimiter - elsif scan(/#\s*(\w*)/) + elsif scan(/#[ \t]*(\w*)/) kind = :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected state = :include_expected if self[1] == 'include' elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false kind = :char elsif scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false kind = :hex elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false kind = :oct elsif scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false kind = :integer elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false kind = :float else @@ -127,6 +156,7 @@ module Scanners tokens << ['"', :delimiter] tokens << [:close, :string] state = :initial + label_expected = false next elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) kind = :char @@ -134,6 +164,7 @@ module Scanners tokens << [:close, :string] kind = :error state = :initial + label_expected = false else raise_inspect "else case \" reached; %p not handled." % peek(1), tokens end diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb index e7edf35..46c8c50 100644 --- a/lib/coderay/scanners/java_script.rb +++ b/lib/coderay/scanners/java_script.rb @@ -21,7 +21,7 @@ module Scanners MAGIC_VARIABLES = %w[ this arguments ] # arguments was introduced in JavaScript 1.4 KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ - case delete in instanceof new return throw typeof while with + case delete in instanceof new return throw typeof with ] # Reserved for future use. @@ -108,6 +108,7 @@ module Scanners elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x) kind = IDENT_KIND[match] value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] + # TODO: labels if kind == :ident if match.index(?$) # $ allowed inside an identifier kind = :predefined diff --git a/lib/coderay/scanners/nitro_xhtml.rb b/lib/coderay/scanners/nitro_xhtml.rb index 356e049..6eb1d3b 100644 --- a/lib/coderay/scanners/nitro_xhtml.rb +++ b/lib/coderay/scanners/nitro_xhtml.rb @@ -121,6 +121,7 @@ module Scanners else raise_inspect 'else-case reached!', tokens + end end diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb index bfbc642..af9e16f 100644 --- a/lib/coderay/scanners/php.rb +++ b/lib/coderay/scanners/php.rb @@ -38,7 +38,7 @@ module Scanners require require_once return print unset ] - CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ] + CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ] # according to http://php.net/quickref.php on 2009-04-21; # all functions with _ excluded (module functions) and selected additional functions @@ -117,6 +117,18 @@ module Scanners utf8_decode utf8_encode var_dump var_export version_compare zend_logo_guid zend_thread_id zend_version + create_function call_user_func_array + posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid + posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups + posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid + posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid + posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod + posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid + posix_setuid posix_strerror posix_times posix_ttyname posix_uname + pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority + pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait + pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited + pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig ] # TODO: more built-in PHP functions? @@ -158,6 +170,12 @@ module Scanners LOG_NDELAY LOG_NOWAIT LOG_PERROR ] + PREDEFINED = %w[ + $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV + $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header + $argc $argv + ] + IDENT_KIND = CaseIgnoringWordList.new(:ident, true). add(KEYWORDS, :reserved). add(TYPES, :pre_type). @@ -166,6 +184,9 @@ module Scanners add(CLASSES, :pre_constant). add(EXCEPTIONS, :exception). add(CONSTANTS, :pre_constant) + + VARIABLE_KIND = WordList.new(:local_variable). + add(PREDEFINED, :predefined) end module RE @@ -194,7 +215,8 @@ module Scanners \+\+ | -- | # increment, decrement [,;?:()\[\]{}] | # simple delimiters [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts - [~@$] | # whatever + [~$] | # whatever + =& | # reference assignment [=!]=?=? | <> | # comparison and assignment <<=? | >>=? | [<>]=? # comparison and shift /x @@ -203,17 +225,23 @@ module Scanners def scan_tokens tokens, options - states = [:initial] - if match?(RE::PHP_START) || # starts with bar kind of stuff - # TODO: highlight tokens separately! if check(/\[#{RE::IDENTIFIER}\]/o) - match << scan(/\[#{RE::IDENTIFIER}\]/o) + tokens << [:open, :inline] + tokens << [match, :local_variable] + tokens << [scan(/\[/), :operator] + tokens << [scan(/#{RE::IDENTIFIER}/o), :ident] + tokens << [scan(/\]/), :operator] + tokens << [:close, :inline] + next elsif check(/\[/) - match << scan(/\[#{RE::IDENTIFIER}?/o) + match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o) kind = :error elsif check(/->#{RE::IDENTIFIER}/o) - match << scan(/->#{RE::IDENTIFIER}/o) + tokens << [:open, :inline] + tokens << [match, :local_variable] + tokens << [scan(/->/), :operator] + tokens << [scan(/#{RE::IDENTIFIER}/o), :ident] + tokens << [:close, :inline] + next elsif check(/->/) match << scan(/->/) kind = :error diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index cb9637b..014ab7b 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -175,7 +175,7 @@ module Scanners kind = :constant elsif kind == :reserved state = patterns::DEF_NEW_STATE[match] - value_expected = :set if patterns::VALUE_EXPECTING_KEYWORDS[match] + value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match] end end value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o) @@ -287,7 +287,7 @@ module Scanners kind = :error match = (scan(/./mu) rescue nil) || getch if !unicode && match.size > 1 - # warn 'Switchig to unicode mode: %p' % ['รค'[/#{patterns::METHOD_NAME}/uo]] + # warn 'Switchig to unicode mode for char %p' % [match] unicode = true unscan next diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index 51ceba3..fc6ce56 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -142,7 +142,7 @@ module Scanners | #{CHARACTER} ) /x - VALUE_EXPECTING_KEYWORDS = WordList.new.add(%w[ + KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[ and end in or unless begin defined? ensure redo super until break do next rescue then @@ -182,7 +182,7 @@ module Scanners STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k - delim_pattern = Regexp.escape(delim) + delim_pattern = Regexp.escape(delim.dup) # dup: Fix for x86_64-linux Ruby if closing_paren = CLOSING_PAREN[delim] delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix delim_pattern << Regexp.escape(closing_paren) diff --git a/lib/coderay/scanners/scheme.rb b/lib/coderay/scanners/scheme.rb index c869a30..3fc60d2 100644 --- a/lib/coderay/scanners/scheme.rb +++ b/lib/coderay/scanners/scheme.rb @@ -5,6 +5,9 @@ module CodeRay # Thanks to murphy for putting CodeRay into public. class Scheme < Scanner + # TODO: function defs + # TODO: built-in functions + register_for :scheme file_extension 'scm' -- cgit v1.2.1