diff options
author | Kornelius Kalnbach <murphy@rubychan.de> | 2016-02-13 16:12:48 +0100 |
---|---|---|
committer | Kornelius Kalnbach <murphy@rubychan.de> | 2016-02-13 16:12:48 +0100 |
commit | 0b8c69cfb7a65bec04c44e58e5776e323d2aa1af (patch) | |
tree | fd81bf6229bfc0d173f5b744534a76b5c70eb440 /lib | |
parent | 916711c9983483c39f9a68c29e21a0ed40004bd2 (diff) | |
parent | 0a1f500d524ff0fb5eeafef051ccbb641954a87a (diff) | |
download | coderay-paint-integration.tar.gz |
Merge branch 'master' into paint-integrationpaint-integration
Diffstat (limited to 'lib')
33 files changed, 1190 insertions, 527 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb index 88c7cc2..f759ed6 100644 --- a/lib/coderay.rb +++ b/lib/coderay.rb @@ -127,14 +127,14 @@ module CodeRay $CODERAY_DEBUG ||= false - CODERAY_PATH = File.join File.dirname(__FILE__), 'coderay' + CODERAY_PATH = File.expand_path('../coderay', __FILE__) # Assuming the path is a subpath of lib/coderay/ def self.coderay_path *path File.join CODERAY_PATH, *path end - require coderay_path('version') + require 'coderay/version' # helpers autoload :FileType, coderay_path('helpers', 'file_type') @@ -166,7 +166,6 @@ module CodeRay # # See also demo/demo_simple. def scan code, lang, options = {}, &block - # FIXME: return a proxy for direct-stream encoding TokensProxy.new code, lang, options, block end diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb index c03d3fb..f4db330 100644 --- a/lib/coderay/encoders/debug.rb +++ b/lib/coderay/encoders/debug.rb @@ -9,7 +9,6 @@ module Encoders # # You cannot fully restore the tokens information from the # output, because consecutive :space tokens are merged. - # Use Tokens#dump for caching purposes. # # See also: Scanners::Debug class Debug < Encoder @@ -18,38 +17,26 @@ module Encoders FILE_EXTENSION = 'raydebug' - def initialize options = {} - super - @opened = [] - end - def text_token text, kind - raise 'empty token' if $CODERAY_DEBUG && text.empty? if kind == :space @out << text else - # TODO: Escape ( - text = text.gsub(/[)\\]/, '\\\\\0') if text.index(/[)\\]/) - @out << kind.to_s << '(' << text << ')' + text = text.gsub('\\', '\\\\\\\\') if text.index('\\') + text = text.gsub(')', '\\\\)') if text.index(')') + @out << "#{kind}(#{text})" end end def begin_group kind - @opened << kind - @out << kind.to_s << '<' + @out << "#{kind}<" end def end_group kind - if @opened.last != kind - puts @out - raise "we are inside #{@opened.inspect}, not #{kind}" - end - @opened.pop @out << '>' end def begin_line kind - @out << kind.to_s << '[' + @out << "#{kind}[" end def end_line kind diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb new file mode 100644 index 0000000..a4eba2c --- /dev/null +++ b/lib/coderay/encoders/debug_lint.rb @@ -0,0 +1,63 @@ +module CodeRay +module Encoders + + load :lint + + # = Debug Lint Encoder + # + # Debug encoder with additional checks for: + # + # - empty tokens + # - incorrect nesting + # + # It will raise an InvalidTokenStream exception when any of the above occurs. + # + # See also: Encoders::Debug + class DebugLint < Debug + + register_for :debug_lint + + def text_token text, kind + raise Lint::EmptyToken, 'empty token for %p' % [kind] if text.empty? + raise Lint::UnknownTokenKind, 'unknown token kind %p (text was %p)' % [kind, text] unless TokenKinds.has_key? kind + super + end + + def begin_group kind + @opened << kind + super + end + + def end_group kind + raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + @opened.pop + super + end + + def begin_line kind + @opened << kind + super + end + + def end_line kind + raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + @opened.pop + super + end + + protected + + def setup options + super + @opened = [] + end + + def finish options + raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty? + super + end + + end + +end +end diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb index b897f5e..942b9c8 100644 --- a/lib/coderay/encoders/html.rb +++ b/lib/coderay/encoders/html.rb @@ -25,7 +25,8 @@ module Encoders # == Options # # === :tab_width - # Convert \t characters to +n+ spaces (a number.) + # Convert \t characters to +n+ spaces (a number or false.) + # false will keep tab characters untouched. # # Default: 8 # @@ -180,7 +181,7 @@ module Encoders @break_lines = (options[:break_lines] == true) - @HTML_ESCAPE = HTML_ESCAPE.merge("\t" => ' ' * options[:tab_width]) + @HTML_ESCAPE = HTML_ESCAPE.merge("\t" => options[:tab_width] ? ' ' * options[:tab_width] : "\t") @opened = [] @last_opened = nil @@ -193,18 +194,19 @@ module Encoders def finish options unless @opened.empty? - warn '%d tokens still open: %p' % [@opened.size, @opened] if $CODERAY_DEBUG @out << '</span>' while @opened.pop @last_opened = nil end - @out.extend Output - @out.css = @css - if options[:line_numbers] - Numbering.number! @out, options[:line_numbers], options + if @out.respond_to? :to_str + @out.extend Output + @out.css = @css + if options[:line_numbers] + Numbering.number! @out, options[:line_numbers], options + end + @out.wrap! options[:wrap] + @out.apply_title! options[:title] end - @out.wrap! options[:wrap] - @out.apply_title! options[:title] if defined?(@real_out) && @real_out @real_out << @out @@ -286,7 +288,7 @@ module Encoders def make_span_for_kinds method, hint Hash.new do |h, kinds| - h[kinds.is_a?(Symbol) ? kinds : kinds.dup] = begin + begin css_class = css_class_for_kinds(kinds) title = HTML.token_path_to_hint hint, kinds if hint @@ -298,6 +300,9 @@ module Encoders "<span#{title}#{" class=\"#{css_class}\"" if css_class}>" end end + end.tap do |span| + h.clear if h.size >= 100 + h[kinds] = span end end end @@ -310,8 +315,8 @@ module Encoders def break_lines text, style reopen = '' - @opened.each_with_index do |k, index| - reopen << (@span_for_kinds[index > 0 ? [k, *@opened[0...index]] : k] || '<span>') + @opened.each_with_index do |kind, index| + reopen << (@span_for_kinds[index > 0 ? [kind, *@opened[0...index]] : kind] || '<span>') end text.gsub("\n", "#{'</span>' * @opened.size}#{'</span>' if style}\n#{reopen}#{style}") end diff --git a/lib/coderay/encoders/html/numbering.rb b/lib/coderay/encoders/html/numbering.rb index 332145b..a1b9c04 100644 --- a/lib/coderay/encoders/html/numbering.rb +++ b/lib/coderay/encoders/html/numbering.rb @@ -26,7 +26,7 @@ module Encoders "<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>" end else - proc { |line| line.to_s } # :to_s.to_proc in Ruby 1.8.7+ + :to_s.to_proc end bold_every = options[:bold_every] @@ -75,7 +75,7 @@ module Encoders line_number = start output.gsub!(/^.*$\n?/) do |line| line_number_text = bolding.call line_number - indent = ' ' * (max_width - line_number.to_s.size) # TODO: Optimize (10^x) + indent = ' ' * (max_width - line_number.to_s.size) line_number += 1 "<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{line}" end diff --git a/lib/coderay/encoders/lint.rb b/lib/coderay/encoders/lint.rb new file mode 100644 index 0000000..88c8bd1 --- /dev/null +++ b/lib/coderay/encoders/lint.rb @@ -0,0 +1,59 @@ +module CodeRay +module Encoders + + # = Lint Encoder + # + # Checks for: + # + # - empty tokens + # - incorrect nesting + # + # It will raise an InvalidTokenStream exception when any of the above occurs. + # + # See also: Encoders::DebugLint + class Lint < Debug + + register_for :lint + + InvalidTokenStream = Class.new StandardError + EmptyToken = Class.new InvalidTokenStream + UnknownTokenKind = Class.new InvalidTokenStream + IncorrectTokenGroupNesting = Class.new InvalidTokenStream + + def text_token text, kind + raise EmptyToken, 'empty token for %p' % [kind] if text.empty? + raise UnknownTokenKind, 'unknown token kind %p (text was %p)' % [kind, text] unless TokenKinds.has_key? kind + end + + def begin_group kind + @opened << kind + end + + def end_group kind + raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + @opened.pop + end + + def begin_line kind + @opened << kind + end + + def end_line kind + raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind + @opened.pop + end + + protected + + def setup options + @opened = [] + end + + def finish options + raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty? + end + + end + +end +end diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb index 2315d9e..b2f8b83 100644 --- a/lib/coderay/encoders/statistic.rb +++ b/lib/coderay/encoders/statistic.rb @@ -67,7 +67,6 @@ Token Types (%d): @type_stats['TOTAL'].count += 1 end - # TODO Hierarchy handling def begin_group kind block_token ':begin_group', kind end diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb index 500e5d8..c7ae014 100644 --- a/lib/coderay/encoders/terminal.rb +++ b/lib/coderay/encoders/terminal.rb @@ -19,105 +19,135 @@ module CodeRay register_for :terminal TOKEN_COLORS = { - :annotation => "\e[35m", - :attribute_name => "\e[33m", + :debug => "\e[1;37;44m", + + :annotation => "\e[34m", + :attribute_name => "\e[35m", :attribute_value => "\e[31m", - :binary => "\e[1;35m", + :binary => { + :self => "\e[31m", + :char => "\e[1;31m", + :delimiter => "\e[1;31m", + }, :char => { - :self => "\e[36m", :delimiter => "\e[1;34m" + :self => "\e[35m", + :delimiter => "\e[1;35m" }, - :class => "\e[1;35m", + :class => "\e[1;35;4m", :class_variable => "\e[36m", :color => "\e[32m", - :comment => "\e[37m", - :complex => "\e[1;34m", - :constant => "\e[1;34m\e[4m", - :decoration => "\e[35m", - :definition => "\e[1;32m", - :directive => "\e[32m\e[4m", - :doc => "\e[46m", - :doctype => "\e[1;30m", - :doc_string => "\e[31m\e[4m", - :entity => "\e[33m", - :error => "\e[1;33m\e[41m", + :comment => { + :self => "\e[1;30m", + :char => "\e[37m", + :delimiter => "\e[37m", + }, + :constant => "\e[1;34;4m", + :decorator => "\e[35m", + :definition => "\e[1;33m", + :directive => "\e[33m", + :docstring => "\e[31m", + :doctype => "\e[1;34m", + :done => "\e[1;30;2m", + :entity => "\e[31m", + :error => "\e[1;37;41m", :exception => "\e[1;31m", :float => "\e[1;35m", :function => "\e[1;34m", - :global_variable => "\e[42m", + :global_variable => "\e[1;32m", :hex => "\e[1;36m", - :include => "\e[33m", + :id => "\e[1;34m", + :include => "\e[31m", :integer => "\e[1;34m", - :key => "\e[35m", - :label => "\e[1;15m", + :imaginary => "\e[1;34m", + :important => "\e[1;31m", + :key => { + :self => "\e[35m", + :char => "\e[1;35m", + :delimiter => "\e[1;35m", + }, + :keyword => "\e[32m", + :label => "\e[1;33m", :local_variable => "\e[33m", - :octal => "\e[1;35m", - :operator_name => "\e[1;29m", + :namespace => "\e[1;35m", + :octal => "\e[1;34m", + :predefined => "\e[36m", :predefined_constant => "\e[1;36m", - :predefined_type => "\e[1;30m", - :predefined => "\e[4m\e[1;34m", - :preprocessor => "\e[36m", + :predefined_type => "\e[1;32m", + :preprocessor => "\e[1;36m", :pseudo_class => "\e[1;34m", :regexp => { - :self => "\e[31m", - :content => "\e[31m", - :delimiter => "\e[1;29m", + :self => "\e[35m", + :delimiter => "\e[1;35m", :modifier => "\e[35m", + :char => "\e[1;35m", }, - :reserved => "\e[1;31m", + :reserved => "\e[32m", :shell => { - :self => "\e[42m", - :content => "\e[1;29m", - :delimiter => "\e[37m", + :self => "\e[33m", + :char => "\e[1;33m", + :delimiter => "\e[1;33m", + :escape => "\e[1;33m", }, :string => { - :self => "\e[32m", - :modifier => "\e[1;32m", - :escape => "\e[1;36m", - :delimiter => "\e[1;32m", - :char => "\e[1;36m", + :self => "\e[31m", + :modifier => "\e[1;31m", + :char => "\e[1;35m", + :delimiter => "\e[1;31m", + :escape => "\e[1;31m", + }, + :symbol => { + :self => "\e[33m", + :delimiter => "\e[1;33m", }, - :symbol => "\e[1;32m", - :tag => "\e[1;34m", + :tag => "\e[32m", :type => "\e[1;34m", :value => "\e[36m", - :variable => "\e[1;34m", + :variable => "\e[34m", - :insert => "\e[42m", - :delete => "\e[41m", - :change => "\e[44m", - :head => "\e[45m" + :insert => { + :self => "\e[42m", + :insert => "\e[1;32;42m", + :eyecatcher => "\e[102m", + }, + :delete => { + :self => "\e[41m", + :delete => "\e[1;31;41m", + :eyecatcher => "\e[101m", + }, + :change => { + :self => "\e[44m", + :change => "\e[37;44m", + }, + :head => { + :self => "\e[45m", + :filename => "\e[37;45m" + }, } + TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved] TOKEN_COLORS[:method] = TOKEN_COLORS[:function] - TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex] - TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] = - TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter] + TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter] protected def setup(options) super @opened = [] - @subcolors = nil + @color_scopes = [TOKEN_COLORS] end public def text_token text, kind - if color = (@subcolors || TOKEN_COLORS)[kind] - if Hash === color - if color[:self] - color = color[:self] - else - @out << text - return - end - end + if color = @color_scopes.last[kind] + color = color[:self] if color.is_a? Hash @out << color - @out << text.gsub("\n", "\e[0m\n" + color) + @out << (text.index("\n") ? text.gsub("\n", "\e[0m\n" + color) : text) @out << "\e[0m" - @out << @subcolors[:self] if @subcolors + if outer_color = @color_scopes.last[:self] + @out << outer_color + end else @out << text end @@ -130,40 +160,33 @@ module CodeRay alias begin_line begin_group def end_group kind - if @opened.empty? - # nothing to close - else - @opened.pop + if @opened.pop + @color_scopes.pop @out << "\e[0m" - @out << open_token(@opened.last) + if outer_color = @color_scopes.last[:self] + @out << outer_color + end end end def end_line kind - if @opened.empty? - # nothing to close - else - @opened.pop - # whole lines to be highlighted, - # eg. added/modified/deleted lines in a diff - @out << (@line_filler ||= "\t" * 100 + "\e[0m") - @out << open_token(@opened.last) - end + @out << (@line_filler ||= "\t" * 100) + end_group kind end private def open_token kind - if color = TOKEN_COLORS[kind] - if Hash === color - @subcolors = color + if color = @color_scopes.last[kind] + if color.is_a? Hash + @color_scopes << color color[:self] else - @subcolors = {} + @color_scopes << @color_scopes.last color end else - @subcolors = nil + @color_scopes << @color_scopes.last '' end end diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index a5d83ff..7de34d5 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -38,7 +38,7 @@ module CodeRay (TypeFromExt[ext2.downcase] if ext2) || TypeFromName[name] || TypeFromName[name.downcase] - type ||= shebang(filename) if read_shebang + type ||= type_from_shebang(filename) if read_shebang type end @@ -63,7 +63,7 @@ module CodeRay protected - def shebang filename + def type_from_shebang filename return unless File.exist? filename File.open filename, 'r' do |f| if first_line = f.gets @@ -77,54 +77,58 @@ module CodeRay end TypeFromExt = { - 'c' => :c, - 'cfc' => :xml, - 'cfm' => :xml, - 'clj' => :clojure, - 'css' => :css, - 'diff' => :diff, - 'dpr' => :delphi, - 'erb' => :erb, - 'gemspec' => :ruby, - 'groovy' => :groovy, - 'gvy' => :groovy, - 'h' => :c, - 'haml' => :haml, - 'htm' => :html, - 'html' => :html, - 'html.erb' => :erb, - 'java' => :java, - 'js' => :java_script, - 'json' => :json, - 'mab' => :ruby, - 'pas' => :delphi, - 'patch' => :diff, - 'phtml' => :php, - 'php' => :php, - 'php3' => :php, - 'php4' => :php, - 'php5' => :php, - 'prawn' => :ruby, - 'py' => :python, - 'py3' => :python, - 'pyw' => :python, - 'rake' => :ruby, - 'raydebug' => :raydebug, - 'rb' => :ruby, - 'rbw' => :ruby, - 'rhtml' => :erb, - 'rjs' => :ruby, - 'rpdf' => :ruby, - 'ru' => :ruby, - 'rxml' => :ruby, - 'sass' => :sass, - 'sql' => :sql, - 'tmproj' => :xml, - 'xaml' => :xml, - 'xhtml' => :html, - 'xml' => :xml, - 'yaml' => :yaml, - 'yml' => :yaml, + 'c' => :c, + 'cfc' => :xml, + 'cfm' => :xml, + 'clj' => :clojure, + 'css' => :css, + 'diff' => :diff, + 'dpr' => :delphi, + 'erb' => :erb, + 'gemspec' => :ruby, + 'go' => :go, + 'groovy' => :groovy, + 'gvy' => :groovy, + 'h' => :c, + 'haml' => :haml, + 'htm' => :html, + 'html' => :html, + 'html.erb' => :erb, + 'java' => :java, + 'js' => :java_script, + 'json' => :json, + 'lua' => :lua, + 'mab' => :ruby, + 'pas' => :delphi, + 'patch' => :diff, + 'phtml' => :php, + 'php' => :php, + 'php3' => :php, + 'php4' => :php, + 'php5' => :php, + 'prawn' => :ruby, + 'py' => :python, + 'py3' => :python, + 'pyw' => :python, + 'rake' => :ruby, + 'raydebug' => :raydebug, + 'rb' => :ruby, + 'rbw' => :ruby, + 'rhtml' => :erb, + 'rjs' => :ruby, + 'rpdf' => :ruby, + 'ru' => :ruby, # config.ru + 'rxml' => :ruby, + 'sass' => :sass, + 'sql' => :sql, + 'taskpaper' => :taskpaper, + 'template' => :json, # AWS CloudFormation template + 'tmproj' => :xml, + 'xaml' => :xml, + 'xhtml' => :html, + 'xml' => :xml, + 'yaml' => :yaml, + 'yml' => :yaml, } for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu] TypeFromExt[cpp_alias] = :cpp @@ -137,6 +141,9 @@ module CodeRay 'Rakefile' => :ruby, 'Rantfile' => :ruby, 'Gemfile' => :ruby, + 'Guardfile' => :ruby, + 'Vagrantfile' => :ruby, + 'Appraisals' => :ruby } end diff --git a/lib/coderay/helpers/gzip.rb b/lib/coderay/helpers/gzip.rb deleted file mode 100644 index 245014a..0000000 --- a/lib/coderay/helpers/gzip.rb +++ /dev/null @@ -1,41 +0,0 @@ -module CodeRay - - # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.) - module GZip - - require 'zlib' - - # The default zipping level. 7 zips good and fast. - DEFAULT_GZIP_LEVEL = 7 - - # Unzips the given string +s+. - # - # Example: - # require 'gzip_simple' - # print GZip.gunzip(File.read('adresses.gz')) - def GZip.gunzip s - Zlib::Inflate.inflate s - end - - # Zips the given string +s+. - # - # Example: - # require 'gzip_simple' - # File.open('adresses.gz', 'w') do |file - # file.write GZip.gzip('Mum: 0123 456 789', 9) - # end - # - # If you provide a +level+, you can control how strong - # the string is compressed: - # - 0: no compression, only convert to gzip format - # - 1: compress fast - # - 7: compress more, but still fast (default) - # - 8: compress more, slower - # - 9: compress best, very slow - def GZip.gzip s, level = DEFAULT_GZIP_LEVEL - Zlib::Deflate.new(level).deflate s, Zlib::FINISH - end - - end - -end diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb index d14c5a9..9a724ff 100644 --- a/lib/coderay/helpers/plugin.rb +++ b/lib/coderay/helpers/plugin.rb @@ -30,7 +30,7 @@ module CodeRay # * a file could not be found # * the requested Plugin is not registered PluginNotFound = Class.new LoadError - HostNotFound = Class.new LoadError + HostNotFound = Class.new LoadError PLUGIN_HOSTS = [] PLUGIN_HOSTS_BY_ID = {} # dummy hash @@ -49,8 +49,8 @@ module CodeRay def [] id, *args, &blk plugin = validate_id(id) begin - plugin = plugin_hash.[] plugin, *args, &blk - end while plugin.is_a? Symbol + plugin = plugin_hash.[](plugin, *args, &blk) + end while plugin.is_a? String plugin end @@ -95,7 +95,7 @@ module CodeRay def map hash for from, to in hash from = validate_id from - to = validate_id to + to = validate_id to plugin_hash[from] = to unless plugin_hash.has_key? from end end @@ -197,22 +197,22 @@ module CodeRay File.join plugin_path, "#{plugin_id}.rb" end - # Converts +id+ to a Symbol if it is a String, - # or returns +id+ if it already is a Symbol. + # Converts +id+ to a valid plugin ID String, or returns +nil+. # # Raises +ArgumentError+ for all other objects, or if the # given String includes non-alphanumeric characters (\W). def validate_id id - if id.is_a? Symbol or id.nil? - id - elsif id.is_a? String + case id + when Symbol + id.to_s + when String if id[/\w+/] == id - id.downcase.to_sym + id.downcase else raise ArgumentError, "Invalid id given: #{id}" end else - raise ArgumentError, "String or Symbol expected, but #{id.class} given." + raise ArgumentError, "Symbol or String expected, but #{id.class} given." end end diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb index 732f9c5..55d5239 100644 --- a/lib/coderay/scanners/css.rb +++ b/lib/coderay/scanners/css.rb @@ -25,7 +25,7 @@ module Scanners HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/ - Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)/ + Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)n?/ Name = /#{NMChar}+/ Ident = /-?#{NMStart}#{NMChar}*/ AtKeyword = /@#{Ident}/ @@ -53,7 +53,7 @@ module Scanners end def scan_tokens encoder, options - states = Array(options[:state] || @state) + states = Array(options[:state] || @state).dup value_expected = @value_expected until eos? @@ -145,10 +145,10 @@ module Scanners start = match[/^\w+\(/] encoder.text_token start, :delimiter if match[-1] == ?) - encoder.text_token match[start.size..-2], :content + encoder.text_token match[start.size..-2], :content if match.size > start.size + 1 encoder.text_token ')', :delimiter else - encoder.text_token match[start.size..-1], :content + encoder.text_token match[start.size..-1], :content if match.size > start.size end encoder.end_group :function diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb index 566bfa7..83ede9a 100644 --- a/lib/coderay/scanners/debug.rb +++ b/lib/coderay/scanners/debug.rb @@ -1,9 +1,11 @@ +require 'set' + module CodeRay module Scanners # = Debug Scanner # - # Interprets the output of the Encoders::Debug encoder. + # Interprets the output of the Encoders::Debug encoder (basically the inverse function). class Debug < Scanner register_for :debug @@ -11,6 +13,11 @@ module Scanners protected + def setup + super + @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set + end + def scan_tokens encoder, options opened_tokens = [] @@ -21,16 +28,19 @@ module Scanners encoder.text_token match, :space elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x) - kind = self[1].to_sym - match = self[2].gsub(/\\(.)/m, '\1') - unless TokenKinds.has_key? kind - kind = :error - match = matched + if @known_token_kinds.include? self[1] + encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym + else + encoder.text_token matched, :unknown end - encoder.text_token match, kind elsif match = scan(/ (\w+) ([<\[]) /x) - kind = self[1].to_sym + if @known_token_kinds.include? self[1] + kind = self[1].to_sym + else + kind = :unknown + end + opened_tokens << kind case self[2] when '<' diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb index af0f755..74a6c27 100644 --- a/lib/coderay/scanners/diff.rb +++ b/lib/coderay/scanners/diff.rb @@ -69,7 +69,7 @@ module Scanners state = :added elsif match = scan(/\\ .*/) encoder.text_token match, :comment - elsif match = scan(/@@(?>[^@\n]*)@@/) + elsif match = scan(/@@(?>[^@\n]+)@@/) content_scanner.state = :initial unless match?(/\n\+/) content_scanner_entry_state = nil if check(/\n|$/) @@ -100,7 +100,7 @@ module Scanners next elsif match = scan(/-/) deleted_lines_count += 1 - if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/) + if options[:inline_diff] && deleted_lines_count == 1 && (changed_lines_count = 1 + check(/.*(?:\n\-.*)*/).count("\n")) && changed_lines_count <= 100_000 && match?(/(?>.*(?:\n\-.*){#{changed_lines_count - 1}}(?:\n\+.*){#{changed_lines_count}})$(?!\n\+)/) deleted_lines = Array.new(changed_lines_count) { |i| skip(/\n\-/) if i > 0; scan(/.*/) } inserted_lines = Array.new(changed_lines_count) { |i| skip(/\n\+/) ; scan(/.*/) } diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb new file mode 100644 index 0000000..99fdd63 --- /dev/null +++ b/lib/coderay/scanners/go.rb @@ -0,0 +1,208 @@ +module CodeRay +module Scanners + + class Go < Scanner + + register_for :go + file_extension 'go' + + # http://golang.org/ref/spec#Keywords + KEYWORDS = [ + 'break', 'default', 'func', 'interface', 'select', + 'case', 'defer', 'go', 'map', 'struct', + 'chan', 'else', 'goto', 'package', 'switch', + 'const', 'fallthrough', 'if', 'range', 'type', + 'continue', 'for', 'import', 'return', 'var', + ] # :nodoc: + + # http://golang.org/ref/spec#Types + PREDEFINED_TYPES = [ + 'bool', + 'uint8', 'uint16', 'uint32', 'uint64', + 'int8', 'int16', 'int32', 'int64', + 'float32', 'float64', + 'complex64', 'complex128', + 'byte', 'rune', 'string', 'error', + 'uint', 'int', 'uintptr', + ] # :nodoc: + + PREDEFINED_CONSTANTS = [ + 'nil', 'iota', + 'true', 'false', + ] # :nodoc: + + PREDEFINED_FUNCTIONS = %w[ + append cap close complex copy delete imag len + make new panic print println real recover + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(PREDEFINED_FUNCTIONS, :predefined) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + case_expected = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ <?- (?![\d.]) | [+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + label_expected = false + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token 'L', :modifier + match = '"' + end + encoder.text_token match, :delimiter + state = :string + + elsif match = scan(/ ` ([^`]+)? (`)? /x) + encoder.begin_group :shell + encoder.text_token '`', :delimiter + encoder.text_token self[1], :content if self[1] + encoder.text_token self[2], :delimiter if self[2] + encoder.end_group :shell + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/#[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/-?\d*(\.\d*)?([eE][+-]?\d+)?i/) + label_expected = false + encoder.text_token match, :imaginary + + elsif match = scan(/-?0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/-?(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/-?(?:\d*\.\d+|\d+\.)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) + label_expected = false + encoder.text_token match, :float + + elsif match = scan(/-?(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\\n"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ /x) + encoder.text_token match, :error + elsif match = scan(/$/) + encoder.end_group :string + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb index cf55daf..c64454f 100644 --- a/lib/coderay/scanners/groovy.rb +++ b/lib/coderay/scanners/groovy.rb @@ -36,9 +36,12 @@ module Scanners protected + def setup + @state = :initial + end + def scan_tokens encoder, options - - state = :initial + state = options[:state] || @state inline_block_stack = [] inline_block_paren_depth = nil string_delimiter = nil @@ -223,7 +226,7 @@ module Scanners encoder.text_token match, :content # TODO: Shouldn't this be :error? elsif match = scan(/ \\ | \n /x) - encoder.end_group state + encoder.end_group state == :regexp ? :regexp : :string encoder.text_token match, :error after_def = value_expected = false state = :initial @@ -243,7 +246,17 @@ module Scanners end if [:multiline_string, :string, :regexp].include? state - encoder.end_group state + encoder.end_group state == :regexp ? :regexp : :string + end + + if options[:keep_state] + @state = state + end + + until inline_block_stack.empty? + state, = *inline_block_stack.pop + encoder.end_group :inline + encoder.end_group state == :regexp ? :regexp : :string end encoder diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 3ba3b79..ebe7b01 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,13 +1,13 @@ module CodeRay module Scanners - + # HTML Scanner # # Alias: +xhtml+ # # See also: Scanners::XML class HTML < Scanner - + register_for :html KINDS_NOT_LOC = [ @@ -33,7 +33,8 @@ module Scanners ) IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). - add(EVENT_ATTRIBUTES, :script) + add(EVENT_ATTRIBUTES, :script). + add(['style'], :style) ATTR_NAME = /[\w.:-]+/ # :nodoc: TAG_END = /\/?>/ # :nodoc: @@ -75,9 +76,14 @@ module Scanners def scan_java_script encoder, code if code && !code.empty? @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true - # encoder.begin_group :inline @java_script_scanner.tokenize code, :tokens => encoder - # encoder.end_group :inline + end + end + + def scan_css encoder, code, state = [:initial] + if code && !code.empty? + @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true + @css_scanner.tokenize code, :tokens => encoder, :state => state end end @@ -99,7 +105,15 @@ module Scanners case state when :initial - if match = scan(/<!--(?:.*?-->|.*)/m) + if match = scan(/<!\[CDATA\[/) + encoder.text_token match, :inline_delimiter + if match = scan(/.*?\]\]>/m) + encoder.text_token match[0..-4], :plain + encoder.text_token ']]>', :inline_delimiter + elsif match = scan(/.+/) + encoder.text_token match, :error + end + elsif match = scan(/<!--(?:.*?-->|.*)/m) encoder.text_token match, :comment elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m) encoder.text_token match, :doctype @@ -110,7 +124,7 @@ module Scanners elsif match = scan(/<\/[-\w.:]*>?/m) in_tag = nil encoder.text_token match, :tag - elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m) + elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m) encoder.text_token match, :tag in_tag = self[1] if self[2] @@ -161,17 +175,21 @@ module Scanners encoder.text_token match, :attribute_value state = :attribute elsif match = scan(/["']/) - if in_attribute == :script - encoder.begin_group :inline - encoder.text_token match, :inline_delimiter + if in_attribute == :script || in_attribute == :style + encoder.begin_group :string + encoder.text_token match, :delimiter if scan(/javascript:[ \t]*/) encoder.text_token matched, :comment end code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/) - scan_java_script encoder, code + if in_attribute == :script + scan_java_script encoder, code + else + scan_css encoder, code, [:block] + end match = scan(/["']/) - encoder.text_token match, :inline_delimiter if match - encoder.end_group :inline + encoder.text_token match, :delimiter if match + encoder.end_group :string state = :attribute in_attribute = nil else @@ -206,19 +224,23 @@ module Scanners when :in_special_tag case in_tag - when 'script' + when 'script', 'style' encoder.text_token match, :space if match = scan(/[ \t]*\n/) if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m) code = self[2] || self[4] closing = self[3] encoder.text_token self[1], :comment else - code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/) + code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/) closing = false end unless code.empty? encoder.begin_group :inline - scan_java_script encoder, code + if in_tag == 'script' + scan_java_script encoder, code + else + scan_css encoder, code + end encoder.end_group :inline end encoder.text_token closing, :comment if closing diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb index 4e0f462..b09970c 100644 --- a/lib/coderay/scanners/json.rb +++ b/lib/coderay/scanners/json.rb @@ -14,15 +14,21 @@ module Scanners ESCAPE = / [bfnrt\\"\/] /x # :nodoc: UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: + KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x protected + def setup + @state = :initial + end + # See http://json.org/ for a definition of the JSON lexic/grammar. def scan_tokens encoder, options + state = options[:state] || @state - state = :initial - stack = [] - key_expected = false + if [:string, :key].include? state + encoder.begin_group state + end until eos? @@ -32,18 +38,11 @@ module Scanners if match = scan(/ \s+ /x) encoder.text_token match, :space elsif match = scan(/"/) - state = key_expected ? :key : :string + state = check(/#{KEY}/o) ? :key : :string encoder.begin_group state encoder.text_token match, :delimiter elsif match = scan(/ [:,\[{\]}] /x) encoder.text_token match, :operator - case match - when ':' then key_expected = false - when ',' then key_expected = true if stack.last == :object - when '{' then stack << :object; key_expected = true - when '[' then stack << :array - when '}', ']' then stack.pop # no error recovery, but works for valid JSON - end elsif match = scan(/ true | false | null /x) encoder.text_token match, :value elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x) @@ -82,6 +81,10 @@ module Scanners end end + if options[:keep_state] + @state = state + end + if [:string, :key].include? state encoder.end_group state end diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb new file mode 100644 index 0000000..fb1e45a --- /dev/null +++ b/lib/coderay/scanners/lua.rb @@ -0,0 +1,280 @@ +# encoding: utf-8 + +module CodeRay +module Scanners + + # Scanner for the Lua[http://lua.org] programming lanuage. + # + # The language’s complete syntax is defined in + # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], + # which is what this scanner tries to conform to. + class Lua < Scanner + + register_for :lua + file_extension 'lua' + title 'Lua' + + # Keywords used in Lua. + KEYWORDS = %w[and break do else elseif end + for function goto if in + local not or repeat return + then until while + ] + + # Constants set by the Lua core. + PREDEFINED_CONSTANTS = %w[false true nil] + + # The expressions contained in this array are parts of Lua’s `basic' + # library. Although it’s not entirely necessary to load that library, + # it is highly recommended and one would have to provide own implementations + # of some of these expressions if one does not do so. They however aren’t + # keywords, neither are they constants, but nearly predefined, so they + # get tagged as `predefined' rather than anything else. + # + # This list excludes values of form `_UPPERCASE' because the Lua manual + # requires such identifiers to be reserved by Lua anyway and they are + # highlighted directly accordingly, without the need for specific + # identifiers to be listed here. + PREDEFINED_EXPRESSIONS = %w[ + assert collectgarbage dofile error getmetatable + ipairs load loadfile next pairs pcall print + rawequal rawget rawlen rawset select setmetatable + tonumber tostring type xpcall + ] + + # Automatic token kind selection for normal words. + IDENT_KIND = CodeRay::WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_CONSTANTS, :predefined_constant). + add(PREDEFINED_EXPRESSIONS, :predefined) + + protected + + # Scanner initialization. + def setup + @state = :initial + @brace_depth = 0 + end + + # CodeRay entry hook. Starts parsing. + def scan_tokens(encoder, options) + state = options[:state] || @state + brace_depth = @brace_depth + num_equals = nil + + until eos? + case state + + when :initial + if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] + num_equals = match.count("=") # Number must match for comment end + encoder.begin_group(:comment) + encoder.text_token(match, :delimiter) + state = :long_comment + + elsif match = scan(/--.*$/) # --Lua comment + encoder.text_token(match, :comment) + + elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] + num_equals = match.count("=") # Number must match for comment end + encoder.begin_group(:string) + encoder.text_token(match, :delimiter) + state = :long_string + + elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label:: + encoder.text_token(match, :label) + + elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua + encoder.text_token(match, :predefined) + + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) + kind = IDENT_KIND[match] + + # Extra highlighting for entities following certain keywords + if kind == :keyword and match == "function" + state = :function_expected + elsif kind == :keyword and match == "goto" + state = :goto_label_expected + elsif kind == :keyword and match == "local" + state = :local_var_expected + end + + encoder.text_token(match, kind) + + elsif match = scan(/\{/) # Opening table brace { + encoder.begin_group(:map) + encoder.text_token(match, brace_depth >= 1 ? :inline_delimiter : :delimiter) + brace_depth += 1 + state = :map + + elsif match = scan(/\}/) # Closing table brace } + if brace_depth == 1 + brace_depth = 0 + encoder.text_token(match, :delimiter) + encoder.end_group(:map) + elsif brace_depth == 0 # Mismatched brace + encoder.text_token(match, :error) + else + brace_depth -= 1 + encoder.text_token(match, :inline_delimiter) + encoder.end_group(:map) + state = :map + end + + elsif match = scan(/["']/) # String delimiters " and ' + encoder.begin_group(:string) + encoder.text_token(match, :delimiter) + start_delim = match + state = :string + + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power + encoder.text_token(match, :float) + + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power + encoder.text_token(match, :integer) + + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators + encoder.text_token(match, :operator) + + elsif match = scan(/\s+/) # Space + encoder.text_token(match, :space) + + else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors. + encoder.text_token(getch, :error) + end + + # It may be that we’re scanning a full-blown subexpression of a table + # (tables can contain full expressions in parts). + # If this is the case, return to :map scanning state. + state = :map if state == :initial && brace_depth >= 1 + + when :function_expected + if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name + encoder.text_token(match, :operator) + state = :initial + elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator + encoder.text_token(match, :ident) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() + encoder.text_token(match, :function) + state = :initial + elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace + encoder.text_token(match, :space) + else + encoder.text_token(getch, :error) + state = :initial + end + + when :goto_label_expected + if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + encoder.text_token(match, :label) + state = :initial + elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace + encoder.text_token(match, :space) + else + encoder.text_token(getch, :error) + end + + when :local_var_expected + if match = scan(/function/) # local function ... + encoder.text_token(match, :keyword) + state = :function_expected + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + encoder.text_token(match, :local_variable) + elsif match = scan(/,/) + encoder.text_token(match, :operator) + elsif match = scan(/\=/) + encoder.text_token(match, :operator) + # After encountering the equal sign, arbitrary expressions are + # allowed again, so just return to the main state for further + # parsing. + state = :initial + elsif match = scan(/\n/) + encoder.text_token(match, :space) + state = :initial + elsif match = scan(/\s+/) + encoder.text_token(match, :space) + else + encoder.text_token(getch, :error) + end + + when :long_comment + if match = scan(/.*?(?=\]={#{num_equals}}\])/m) + encoder.text_token(match, :content) + + delim = scan(/\]={#{num_equals}}\]/) + encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + encoder.text_token(rest, :error) + terminate + end + encoder.end_group(:comment) + state = :initial + + when :long_string + if match = scan(/.*?(?=\]={#{num_equals}}\])/m) # Long strings do not interpret any escape sequences + encoder.text_token(match, :content) + + delim = scan(/\]={#{num_equals}}\]/) + encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + encoder.text_token(rest, :error) + terminate + end + encoder.end_group(:string) + state = :initial + + when :string + if match = scan(/[^\\#{start_delim}\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) + encoder.text_token(match, :content) + elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) + encoder.text_token(match, :char) + elsif match = scan(Regexp.compile(start_delim)) + encoder.text_token(match, :delimiter) + encoder.end_group(:string) + state = :initial + elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings + encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings + encoder.end_group(:string) + state = :initial + else + encoder.text_token(getch, :error) + end + + when :map + if match = scan(/[,;]/) + encoder.text_token(match, :operator) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) + encoder.text_token(match, :key) + encoder.text_token(scan(/\s+/), :space) if check(/\s+/) + encoder.text_token(scan(/\=/), :operator) + state = :initial + elsif match = scan(/\s+/m) + encoder.text_token(match, :space) + else + # Note this clause doesn’t advance the scan pointer, it’s a kind of + # "retry with other options" (the :initial state then of course + # advances the pointer). + state = :initial + end + else + raise + end + + end + + if options[:keep_state] + @state = state + end + + encoder.end_group :string if [:string].include? state + brace_depth.times { encoder.end_group :map } + + encoder + end + + end + +end +end diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb index 6c68834..7a8d75d 100644 --- a/lib/coderay/scanners/php.rb +++ b/lib/coderay/scanners/php.rb @@ -265,7 +265,7 @@ module Scanners @html_scanner.tokenize match unless match.empty? end - when :php + when :php, :php_inline if match = scan(/\s+/) encoder.text_token match, :space @@ -332,7 +332,7 @@ module Scanners if states.size == 1 encoder.text_token match, :error else - states.pop + state = states.pop if states.last.is_a?(::Array) delimiter = states.last[1] states[-1] = states.last[0] @@ -340,6 +340,7 @@ module Scanners encoder.end_group :inline else encoder.text_token match, :operator + encoder.end_group :inline if state == :php_inline label_expected = true end end @@ -350,7 +351,14 @@ module Scanners elsif match = scan(RE::PHP_END) encoder.text_token match, :inline_delimiter - states = [:initial] + while state = states.pop + encoder.end_group :string if [:sqstring, :dqstring].include? state + if state.is_a? Array + encoder.end_group :inline + encoder.end_group :string if [:sqstring, :dqstring].include? state.first + end + end + states << :initial elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o) encoder.begin_group :string @@ -400,6 +408,7 @@ module Scanners elsif match = scan(/\\/) encoder.text_token match, :error else + encoder.end_group :string states.pop end @@ -459,7 +468,7 @@ module Scanners encoder.begin_group :inline states[-1] = [states.last, delimiter] delimiter = nil - states.push :php + states.push :php_inline encoder.text_token match, :delimiter else encoder.text_token match, :content @@ -469,6 +478,7 @@ module Scanners elsif match = scan(/\$/) encoder.text_token match, :content else + encoder.end_group :string states.pop end @@ -500,6 +510,14 @@ module Scanners end + while state = states.pop + encoder.end_group :string if [:sqstring, :dqstring].include? state + if state.is_a? Array + encoder.end_group :inline + encoder.end_group :string if [:sqstring, :dqstring].include? state.first + end + end + encoder end diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb index a9492ab..09c8b6e 100644 --- a/lib/coderay/scanners/python.rb +++ b/lib/coderay/scanners/python.rb @@ -157,12 +157,12 @@ module Scanners encoder.text_token match, :operator elsif match = scan(/(u?r?|b)?("""|"|'''|')/i) + modifiers = self[1] string_delimiter = self[2] - string_type = docstring_coming ? :docstring : :string + string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string) docstring_coming = false if docstring_coming encoder.begin_group string_type string_raw = false - modifiers = self[1] unless modifiers.empty? string_raw = !!modifiers.index(?r) encoder.text_token modifiers, :modifier diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb index 7a21354..1effdc8 100644 --- a/lib/coderay/scanners/raydebug.rb +++ b/lib/coderay/scanners/raydebug.rb @@ -1,23 +1,30 @@ +require 'set' + module CodeRay module Scanners - - # = Debug Scanner + + # = Raydebug Scanner # - # Parses the output of the Encoders::Debug encoder. + # Highlights the output of the Encoders::Debug encoder. class Raydebug < Scanner - + register_for :raydebug file_extension 'raydebug' title 'CodeRay Token Dump' protected + def setup + super + @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set + end + def scan_tokens encoder, options - + opened_tokens = [] - + until eos? - + if match = scan(/\s+/) encoder.text_token match, :space @@ -26,20 +33,22 @@ module Scanners encoder.text_token kind, :class encoder.text_token '(', :operator match = self[2] - encoder.text_token match, kind.to_sym + unless match.empty? + if @known_token_kinds.include? kind + encoder.text_token match, kind.to_sym + else + encoder.text_token match, :plain + end + end encoder.text_token match, :operator if match = scan(/\)/) elsif match = scan(/ (\w+) ([<\[]) /x) - kind = self[1] - case self[2] - when '<' - encoder.text_token kind, :class - when '[' - encoder.text_token kind, :class + encoder.text_token self[1], :class + if @known_token_kinds.include? self[1] + kind = self[1].to_sym else - raise 'CodeRay bug: This case should not be reached.' + kind = :unknown end - kind = kind.to_sym opened_tokens << kind encoder.begin_group kind encoder.text_token self[2], :operator @@ -59,8 +68,8 @@ module Scanners encoder end - + end - + end end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index c282f31..5b8de42 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -164,15 +164,19 @@ module Scanners end elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx) - encoder.begin_group :string if match.size == 1 + kind = check(self.class::StringState.simple_key_pattern(match)) ? :key : :string + encoder.begin_group kind encoder.text_token match, :delimiter - state = self.class::StringState.new :string, match == '"', match # important for streaming + state = self.class::StringState.new kind, match == '"', match # important for streaming else + kind = value_expected == true && scan(/:/) ? :key : :string + encoder.begin_group kind encoder.text_token match[0,1], :delimiter encoder.text_token match[1..-2], :content if match.size > 2 encoder.text_token match[-1,1], :delimiter - encoder.end_group :string + encoder.end_group kind + encoder.text_token ':', :operator if kind == :key value_expected = false end @@ -191,11 +195,14 @@ module Scanners encoder.text_token match, :error method_call_expected = false else - encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary + kind = self[1] ? :float : :integer # TODO: send :hex/:octal/:binary + match << 'r' if match !~ /e/i && scan(/r/) + match << 'i' if scan(/i/) + encoder.text_token match, kind end value_expected = false - elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x) + elsif match = scan(/ [-+!~^\/]=? | [:;] | &\. | [*|&]{1,2}=? | >>? /x) value_expected = true encoder.text_token match, :operator @@ -208,7 +215,7 @@ module Scanners encoder.end_group kind heredocs ||= [] # create heredocs if empty heredocs << self.class::StringState.new(kind, quote != "'", delim, - self[1] == '-' ? :indented : :linestart) + self[1] ? :indented : :linestart) value_expected = false elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o) @@ -269,7 +276,7 @@ module Scanners end if last_state - state = last_state + state = last_state unless state.is_a?(StringState) # otherwise, a simple 'def"' results in unclosed tokens last_state = nil end diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb index ed071d2..e5a156d 100644 --- a/lib/coderay/scanners/ruby/patterns.rb +++ b/lib/coderay/scanners/ruby/patterns.rb @@ -114,7 +114,7 @@ module Scanners # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / - << (-)? # $1 = float + << ([-~])? # $1 = float (?: ( [A-Za-z_0-9]+ ) # $2 = delim | @@ -157,13 +157,16 @@ module Scanners yield ]) - FANCY_STRING_START = / % ( [QqrsWwx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x + FANCY_STRING_START = / % ( [iIqQrswWx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x FANCY_STRING_KIND = Hash.new(:string).merge({ + 'i' => :symbol, + 'I' => :symbol, 'r' => :regexp, 's' => :symbol, 'x' => :shell, }) FANCY_STRING_INTERPRETED = Hash.new(true).merge({ + 'i' => false, 'q' => false, 's' => false, 'w' => false, diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb index 2f398d1..95f1e83 100644 --- a/lib/coderay/scanners/ruby/string_state.rb +++ b/lib/coderay/scanners/ruby/string_state.rb @@ -16,7 +16,6 @@ module Scanners STRING_PATTERN = Hash.new do |h, k| delim, interpreted = *k - # delim = delim.dup # workaround for old Ruby delim_pattern = Regexp.escape(delim) if closing_paren = CLOSING_PAREN[delim] delim_pattern << Regexp.escape(closing_paren) @@ -29,12 +28,21 @@ module Scanners # '| [|?*+(){}\[\].^$]' # end - h[k] = - if interpreted && delim != '#' - / (?= [#{delim_pattern}] | \# [{$@] ) /mx - else - / (?= [#{delim_pattern}] ) /mx - end + if interpreted && delim != '#' + / (?= [#{delim_pattern}] | \# [{$@] ) /mx + else + / (?= [#{delim_pattern}] ) /mx + end.tap do |pattern| + h[k] = pattern if (delim.respond_to?(:ord) ? delim.ord : delim[0]) < 256 + end + end + + def self.simple_key_pattern delim + if delim == "'" + / (?> (?: [^\\']+ | \\. )* ) ' : /mx + else + / (?> (?: [^\\"\#]+ | \\. | \#\$[\\"] | \#\{[^\{\}]+\} | \#(?!\{) )* ) " : /mx + end end def initialize kind, interpreted, delim, heredoc = false diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb index 167051d..e3296b9 100644 --- a/lib/coderay/scanners/sass.rb +++ b/lib/coderay/scanners/sass.rb @@ -7,11 +7,6 @@ module Scanners register_for :sass file_extension 'sass' - STRING_CONTENT_PATTERN = { - "'" => /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/, - '"' => /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/, - } - protected def setup @@ -19,8 +14,9 @@ module Scanners end def scan_tokens encoder, options - states = Array(options[:state] || @state) - string_delimiter = nil + states = Array(options[:state] || @state).dup + + encoder.begin_group :string if states.last == :sqstring || states.last == :dqstring until eos? @@ -48,7 +44,7 @@ module Scanners elsif case states.last when :initial, :media, :sass_inline if match = scan(/(?>#{RE::Ident})(?!\()/ox) - encoder.text_token match, value_expected ? :value : (check(/.*:/) ? :key : :tag) + encoder.text_token match, value_expected ? :value : (check(/.*:(?![a-z])/) ? :key : :tag) next elsif !value_expected && (match = scan(/\*/)) encoder.text_token match, :tag @@ -91,24 +87,23 @@ module Scanners next end - when :string - if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) + when :sqstring, :dqstring + if match = scan(states.last == :sqstring ? /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o : /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o) encoder.text_token match, :content elsif match = scan(/['"]/) encoder.text_token match, :delimiter encoder.end_group :string - string_delimiter = nil states.pop elsif match = scan(/#\{/) encoder.begin_group :inline encoder.text_token match, :inline_delimiter states.push :sass_inline elsif match = scan(/ \\ | $ /x) - encoder.end_group :string + encoder.end_group states.last encoder.text_token match, :error unless match.empty? states.pop else - raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder + raise_inspect "else case #{states.last} reached; %p not handled." % peek(1), encoder end when :include @@ -119,7 +114,7 @@ module Scanners else #:nocov: - raise_inspect 'Unknown state', encoder + raise_inspect 'Unknown state: %p' % [states.last], encoder #:nocov: end @@ -157,15 +152,15 @@ module Scanners elsif match = scan(/['"]/) encoder.begin_group :string - string_delimiter = match encoder.text_token match, :delimiter if states.include? :sass_inline - content = scan_until(/(?=#{string_delimiter}|\}|\z)/) + # no nesting, just scan the string until delimiter + content = scan_until(/(?=#{match}|\}|\z)/) encoder.text_token content, :content unless content.empty? - encoder.text_token string_delimiter, :delimiter if scan(/#{string_delimiter}/) + encoder.text_token match, :delimiter if scan(/#{match}/) encoder.end_group :string else - states.push :string + states.push match == "'" ? :sqstring : :dqstring end elsif match = scan(/#{RE::Function}/o) @@ -176,7 +171,7 @@ module Scanners encoder.text_token match[start.size..-2], :content encoder.text_token ')', :delimiter else - encoder.text_token match[start.size..-1], :content + encoder.text_token match[start.size..-1], :content if start.size < match.size end encoder.end_group :function @@ -195,7 +190,7 @@ module Scanners elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/) encoder.text_token match, :color - elsif match = scan(/@else if\b|#{RE::AtKeyword}/) + elsif match = scan(/@else if\b|#{RE::AtKeyword}/o) encoder.text_token match, :directive value_expected = true @@ -214,8 +209,18 @@ module Scanners end + states.pop if states.last == :include + if options[:keep_state] - @state = states + @state = states.dup + end + + while state = states.pop + if state == :sass_inline + encoder.end_group :inline + elsif state == :sqstring || state == :dqstring + encoder.end_group :string + end end encoder diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb index b757278..7d57f77 100644 --- a/lib/coderay/scanners/sql.rb +++ b/lib/coderay/scanners/sql.rb @@ -1,8 +1,9 @@ -module CodeRay module Scanners +module CodeRay +module Scanners # by Josh Goebel class SQL < Scanner - + register_for :sql KEYWORDS = %w( @@ -56,6 +57,12 @@ module CodeRay module Scanners STRING_PREFIXES = /[xnb]|_\w+/i + STRING_CONTENT_PATTERN = { + '"' => / (?: [^\\"] | "" )+ /x, + "'" => / (?: [^\\'] | '' )+ /x, + '`' => / (?: [^\\`] | `` )+ /x, + } + def scan_tokens encoder, options state = :initial @@ -89,7 +96,7 @@ module CodeRay module Scanners state = :string encoder.text_token match, :delimiter - elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9]* /x) + elsif match = scan(/ @? [A-Za-z_][A-Za-z_0-9\$]* /x) encoder.text_token match, name_expected ? :ident : (match[0] == ?@ ? :variable : IDENT_KIND[match]) name_expected = false @@ -114,41 +121,28 @@ module CodeRay module Scanners end elsif state == :string - if match = scan(/[^\\"'`]+/) - string_content << match - next + if match = scan(STRING_CONTENT_PATTERN[string_type]) + encoder.text_token match, :content elsif match = scan(/["'`]/) if string_type == match if peek(1) == string_type # doubling means escape - string_content << string_type << getch - next - end - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' + encoder.text_token match + getch, :content + else + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + string_type = nil end - encoder.text_token match, :delimiter - encoder.end_group :string - state = :initial - string_type = nil else - string_content << match + encoder.text_token match, :content end elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' - end encoder.text_token match, :char elsif match = scan(/ \\ . /mox) - string_content << match - next + encoder.text_token match, :content elsif match = scan(/ \\ | $ /x) - unless string_content.empty? - encoder.text_token string_content, :content - string_content = '' - end encoder.text_token match, :error unless match.empty? + encoder.end_group :string state = :initial else raise "else case \" reached; %p not handled." % peek(1), encoder @@ -171,4 +165,5 @@ module CodeRay module Scanners end -end end
\ No newline at end of file +end +end diff --git a/lib/coderay/scanners/taskpaper.rb b/lib/coderay/scanners/taskpaper.rb new file mode 100644 index 0000000..42670bc --- /dev/null +++ b/lib/coderay/scanners/taskpaper.rb @@ -0,0 +1,36 @@ +module CodeRay +module Scanners + + class Taskpaper < Scanner + + register_for :taskpaper + file_extension 'taskpaper' + + protected + + def scan_tokens encoder, options + until eos? + if match = scan(/\S.*:.*$/) # project + encoder.text_token(match, :namespace) + elsif match = scan(/-.+@done.*/) # completed task + encoder.text_token(match, :done) + elsif match = scan(/-(?:[^@\n]+|@(?!due))*/) # task + encoder.text_token(match, :plain) + elsif match = scan(/@due.*/) # comment + encoder.text_token(match, :important) + elsif match = scan(/.+/) # comment + encoder.text_token(match, :comment) + elsif match = scan(/\s+/) # space + encoder.text_token(match, :space) + else # other + encoder.text_token getch, :error + end + end + + encoder + end + + end + +end +end diff --git a/lib/coderay/scanners/yaml.rb b/lib/coderay/scanners/yaml.rb index 96f4e93..32c8e2c 100644 --- a/lib/coderay/scanners/yaml.rb +++ b/lib/coderay/scanners/yaml.rb @@ -47,7 +47,7 @@ module Scanners when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/) encoder.begin_group :string encoder.text_token match, :delimiter - encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx) + encoder.text_token match, :content if (match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)) && !match.empty? encoder.text_token match, :delimiter if match = scan(/"/) encoder.end_group :string next @@ -84,7 +84,7 @@ module Scanners when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/) encoder.begin_group :key encoder.text_token match[0,1], :delimiter - encoder.text_token match[1..-2], :content + encoder.text_token match[1..-2], :content if match.size > 2 encoder.text_token match[-1,1], :delimiter encoder.end_group :key key_indent = column(pos - match.size) - 1 diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb index a21fbf1..d304dc4 100644 --- a/lib/coderay/styles/alpha.rb +++ b/lib/coderay/styles/alpha.rb @@ -3,14 +3,14 @@ module Styles # A colorful theme using CSS 3 colors (with alpha channel). class Alpha < Style - + register_for :alpha - + code_background = 'hsl(0,0%,95%)' numbers_background = 'hsl(180,65%,90%)' border_color = 'silver' normal_color = 'black' - + CSS_MAIN_STYLES = <<-MAIN # :nodoc: .CodeRay { background-color: #{code_background}; @@ -56,25 +56,26 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; } .annotation { color:#007 } .attribute-name { color:#b48 } .attribute-value { color:#700 } -.binary { color:#509 } +.binary { color:#549 } +.binary .char { color:#325 } +.binary .delimiter { color:#325 } +.char { color:#D20 } .char .content { color:#D20 } .char .delimiter { color:#710 } -.char { color:#D20 } .class { color:#B06; font-weight:bold } .class-variable { color:#369 } .color { color:#0A0 } .comment { color:#777 } .comment .char { color:#444 } .comment .delimiter { color:#444 } -.complex { color:#A08 } .constant { color:#036; font-weight:bold } .decorator { color:#B0B } .definition { color:#099; font-weight:bold } .delimiter { color:black } .directive { color:#088; font-weight:bold } -.doc { color:#970 } -.doc-string { color:#D42; font-weight:bold } +.docstring { color:#D42; } .doctype { color:#34b } +.done { text-decoration: line-through; color: gray } .entity { color:#800; font-weight:bold } .error { color:#F00; background-color:#FAA } .escape { color:#666 } @@ -85,51 +86,54 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; } .global-variable { color:#d70 } .hex { color:#02b } .id { color:#33D; font-weight:bold } -.imaginary { color:#f00 } .include { color:#B44; font-weight:bold } .inline { background-color: hsla(0,0%,0%,0.07); color: black } .inline-delimiter { font-weight: bold; color: #666 } .instance-variable { color:#33B } .integer { color:#00D } +.imaginary { color:#f00 } .important { color:#D00 } +.key { color: #606 } .key .char { color: #60f } .key .delimiter { color: #404 } -.key { color: #606 } .keyword { color:#080; font-weight:bold } .label { color:#970; font-weight:bold } -.local-variable { color:#963 } +.local-variable { color:#950 } +.map .content { color:#808 } +.map .delimiter { color:#40A} +.map { background-color:hsla(200,100%,50%,0.06); } .namespace { color:#707; font-weight:bold } .octal { color:#40E } .operator { } .predefined { color:#369; font-weight:bold } .predefined-constant { color:#069 } -.predefined-type { color:#0a5; font-weight:bold } +.predefined-type { color:#0a8; font-weight:bold } .preprocessor { color:#579 } .pseudo-class { color:#00C; font-weight:bold } +.regexp { background-color:hsla(300,100%,50%,0.06); } .regexp .content { color:#808 } .regexp .delimiter { color:#404 } .regexp .modifier { color:#C2C } -.regexp { background-color:hsla(300,100%,50%,0.06); } .reserved { color:#080; font-weight:bold } +.shell { background-color:hsla(120,100%,50%,0.06); } .shell .content { color:#2B2 } .shell .delimiter { color:#161 } -.shell { background-color:hsla(120,100%,50%,0.06); } +.string { background-color:hsla(0,100%,50%,0.05); } .string .char { color: #b0b } .string .content { color: #D20 } .string .delimiter { color: #710 } .string .modifier { color: #E40 } -.string { background-color:hsla(0,100%,50%,0.05); } -.symbol .content { color:#A60 } -.symbol .delimiter { color:#630 } .symbol { color:#A60 } -.tag { color:#070 } +.symbol .content { color:#A60 } +.symbol .delimiter { color:#740 } +.tag { color:#070; font-weight:bold } .type { color:#339; font-weight:bold } -.value { color: #088; } -.variable { color:#037 } +.value { color: #088 } +.variable { color:#037 } .insert { background: hsla(120,100%,50%,0.12) } .delete { background: hsla(0,100%,50%,0.12) } -.change { color: #bbf; background: #007; } +.change { color: #bbf; background: #007 } .head { color: #f8f; background: #505 } .head .filename { color: white; } @@ -141,8 +145,8 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; } .change .change { color: #88f } .head .head { color: #f4f } TOKENS - + end - + end end diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb index bd8fd6c..f911862 100755 --- a/lib/coderay/token_kinds.rb +++ b/lib/coderay/token_kinds.rb @@ -1,89 +1,85 @@ module CodeRay # A Hash of all known token kinds and their associated CSS classes. - TokenKinds = Hash.new do |h, k| - warn 'Undefined Token kind: %p' % [k] if $CODERAY_DEBUG - false - end + TokenKinds = Hash.new(false) # speedup TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity TokenKinds.update( # :nodoc: - :annotation => 'annotation', - :attribute_name => 'attribute-name', - :attribute_value => 'attribute-value', - :binary => 'bin', - :char => 'char', - :class => 'class', - :class_variable => 'class-variable', - :color => 'color', - :comment => 'comment', - :complex => 'complex', - :constant => 'constant', - :content => 'content', - :debug => 'debug', - :decorator => 'decorator', - :definition => 'definition', - :delimiter => 'delimiter', - :directive => 'directive', - :doc => 'doc', - :doctype => 'doctype', - :doc_string => 'doc-string', - :entity => 'entity', - :error => 'error', - :escape => 'escape', - :exception => 'exception', - :filename => 'filename', - :float => 'float', - :function => 'function', - :global_variable => 'global-variable', - :hex => 'hex', - :id => 'id', - :imaginary => 'imaginary', - :important => 'important', - :include => 'include', - :inline => 'inline', - :inline_delimiter => 'inline-delimiter', - :instance_variable => 'instance-variable', - :integer => 'integer', - :key => 'key', - :keyword => 'keyword', - :label => 'label', - :local_variable => 'local-variable', - :modifier => 'modifier', - :namespace => 'namespace', - :octal => 'octal', - :predefined => 'predefined', - :predefined_constant => 'predefined-constant', - :predefined_type => 'predefined-type', - :preprocessor => 'preprocessor', - :pseudo_class => 'pseudo-class', - :regexp => 'regexp', - :reserved => 'reserved', - :shell => 'shell', - :string => 'string', - :symbol => 'symbol', - :tag => 'tag', - :type => 'type', - :value => 'value', - :variable => 'variable', + :debug => 'debug', # highlight for debugging (white on blue background) - :change => 'change', - :delete => 'delete', - :head => 'head', - :insert => 'insert', + :annotation => 'annotation', # Groovy, Java + :attribute_name => 'attribute-name', # HTML, CSS + :attribute_value => 'attribute-value', # HTML + :binary => 'binary', # Python, Ruby + :char => 'char', # most scanners, also inside of strings + :class => 'class', # lots of scanners, for different purposes also in CSS + :class_variable => 'class-variable', # Ruby, YAML + :color => 'color', # CSS + :comment => 'comment', # most scanners + :constant => 'constant', # PHP, Ruby + :content => 'content', # inside of strings, most scanners + :decorator => 'decorator', # Python + :definition => 'definition', # CSS + :delimiter => 'delimiter', # inside strings, comments and other types + :directive => 'directive', # lots of scanners + :doctype => 'doctype', # Goorvy, HTML, Ruby, YAML + :docstring => 'docstring', # Python + :done => 'done', # Taskpaper + :entity => 'entity', # HTML + :error => 'error', # invalid token, most scanners + :escape => 'escape', # Ruby (string inline variables like #$foo, #@bar) + :exception => 'exception', # Java, PHP, Python + :filename => 'filename', # Diff + :float => 'float', # most scanners + :function => 'function', # CSS, JavaScript, PHP + :global_variable => 'global-variable', # Ruby, YAML + :hex => 'hex', # hexadecimal number; lots of scanners + :id => 'id', # CSS + :imaginary => 'imaginary', # Python + :important => 'important', # CSS, Taskpaper + :include => 'include', # C, Groovy, Java, Python, Sass + :inline => 'inline', # nested code, eg. inline string evaluation; lots of scanners + :inline_delimiter => 'inline-delimiter', # used instead of :inline > :delimiter FIXME: Why use inline_delimiter? + :instance_variable => 'instance-variable', # Ruby + :integer => 'integer', # most scanners + :key => 'key', # lots of scanners, used together with :value + :keyword => 'keyword', # reserved word that's actually implemented; most scanners + :label => 'label', # C, PHP + :local_variable => 'local-variable', # local and magic variables; some scanners + :map => 'map', # Lua tables + :modifier => 'modifier', # used inside on strings; lots of scanners + :namespace => 'namespace', # Clojure, Java, Taskpaper + :octal => 'octal', # lots of scanners + :predefined => 'predefined', # predefined function: lots of scanners + :predefined_constant => 'predefined-constant',# lots of scanners + :predefined_type => 'predefined-type', # C, Java, PHP + :preprocessor => 'preprocessor', # C, Delphi, HTML + :pseudo_class => 'pseudo-class', # CSS + :regexp => 'regexp', # Groovy, JavaScript, Ruby + :reserved => 'reserved', # most scanners + :shell => 'shell', # Ruby + :string => 'string', # most scanners + :symbol => 'symbol', # Clojure, Ruby, YAML + :tag => 'tag', # CSS, HTML + :type => 'type', # CSS, Java, SQL, YAML + :value => 'value', # used together with :key; CSS, JSON, YAML + :variable => 'variable', # Sass, SQL, YAML - :eyecatcher => 'eyecatcher', + :change => 'change', # Diff + :delete => 'delete', # Diff + :head => 'head', # Diff, YAML + :insert => 'insert', # Diff + :eyecatcher => 'eyecatcher', # Diff - :ident => false, - :operator => false, + :ident => false, # almost all scanners + :operator => false, # almost all scanners - :space => false, - :plain => false + :space => false, # almost all scanners + :plain => false # almost all scanners ) - TokenKinds[:method] = TokenKinds[:function] - TokenKinds[:escape] = TokenKinds[:delimiter] - TokenKinds[:docstring] = TokenKinds[:comment] + TokenKinds[:method] = TokenKinds[:function] + TokenKinds[:unknown] = TokenKinds[:plain] end diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb index 6957d69..e7bffce 100644 --- a/lib/coderay/tokens.rb +++ b/lib/coderay/tokens.rb @@ -1,55 +1,43 @@ module CodeRay - # GZip library for writing and reading token dumps. - autoload :GZip, coderay_path('helpers', 'gzip') - - # = Tokens TODO: Rewrite! - # - # The Tokens class represents a list of tokens returnd from - # a Scanner. + # The Tokens class represents a list of tokens returned from + # a Scanner. It's actually just an Array with a few helper methods. # - # A token is not a special object, just a two-element Array - # consisting of + # A token itself is not a special object, just two elements in an Array: # * the _token_ _text_ (the original source of the token in a String) or # a _token_ _action_ (begin_group, end_group, begin_line, end_line) # * the _token_ _kind_ (a Symbol representing the type of the token) # - # A token looks like this: + # It looks like this: # - # ['# It looks like this', :comment] - # ['3.1415926', :float] - # ['$^', :error] + # ..., '# It looks like this', :comment, ... + # ..., '3.1415926', :float, ... + # ..., '$^', :error, ... # # Some scanners also yield sub-tokens, represented by special - # token actions, namely begin_group and end_group. + # token actions, for example :begin_group and :end_group. # # The Ruby scanner, for example, splits "a string" into: # # [ - # [:begin_group, :string], - # ['"', :delimiter], - # ['a string', :content], - # ['"', :delimiter], - # [:end_group, :string] + # :begin_group, :string, + # '"', :delimiter, + # 'a string', :content, + # '"', :delimiter, + # :end_group, :string # ] # - # Tokens is the interface between Scanners and Encoders: - # The input is split and saved into a Tokens object. The Encoder - # then builds the output from this object. - # - # Thus, the syntax below becomes clear: + # Tokens can be used to save the output of a Scanners in a simple + # Ruby object that can be send to an Encoder later: # - # CodeRay.scan('price = 2.59', :ruby).html - # # the Tokens object is here -------^ - # - # See how small it is? ;) + # tokens = CodeRay.scan('price = 2.59', :ruby).tokens + # tokens.encode(:html) + # tokens.html + # CodeRay.encoder(:html).encode_tokens(tokens) # # Tokens gives you the power to handle pre-scanned code very easily: - # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string - # that you put in your DB. - # - # It also allows you to generate tokens directly (without using a scanner), - # to load them from a file, and still use any Encoder that CodeRay provides. + # You can serialize it to a JSON string and store it in a database, pass it + # around to encode it more than once, send it to other algorithms... class Tokens < Array # The Scanner instance that created the tokens. @@ -58,8 +46,7 @@ module CodeRay # Encode the tokens using encoder. # # encoder can be - # * a symbol like :html oder :statistic - # * an Encoder class + # * a plugin name like :html oder 'statistic' # * an Encoder object # # options are passed to the encoder. @@ -157,53 +144,11 @@ module CodeRay parts end - # Dumps the object into a String that can be saved - # in files or databases. - # - # The dump is created with Marshal.dump; - # In addition, it is gzipped using GZip.gzip. - # - # The returned String object includes Undumping - # so it has an #undump method. See Tokens.load. - # - # You can configure the level of compression, - # but the default value 7 should be what you want - # in most cases as it is a good compromise between - # speed and compression rate. - # - # See GZip module. - def dump gzip_level = 7 - dump = Marshal.dump self - dump = GZip.gzip dump, gzip_level - dump.extend Undumping - end - # Return the actual number of tokens. def count size / 2 end - # Include this module to give an object an #undump - # method. - # - # The string returned by Tokens.dump includes Undumping. - module Undumping - # Calls Tokens.load with itself. - def undump - Tokens.load self - end - end - - # Undump the object using Marshal.load, then - # unzip it using GZip.gunzip. - # - # The result is commonly a Tokens object, but - # this is not guaranteed. - def Tokens.load dump - dump = GZip.gunzip dump - @dump = Marshal.load dump - end - alias text_token push def begin_group kind; push :begin_group, kind end def end_group kind; push :end_group, kind end diff --git a/lib/coderay/version.rb b/lib/coderay/version.rb index 4b4f085..7ea3f70 100644 --- a/lib/coderay/version.rb +++ b/lib/coderay/version.rb @@ -1,3 +1,3 @@ module CodeRay - VERSION = '1.1.0' + VERSION = '1.1.1' end |