Merge branch 'master' into lua-scanner

Conflicts: lib/coderay/styles/alpha.rb lib/coderay/token_kinds.rb
author: Kornelius Kalnbach <murphy@rubychan.de> 2013-06-22 23:52:44 +0200
committer: Kornelius Kalnbach <murphy@rubychan.de> 2013-06-22 23:52:44 +0200
commit: 5b1a49fdd3bef559991242a4ad7d3d1ed9cb48c8 (patch)
tree: 9e74edadd344247e805f6bd3588a34fdf0dbfa21 /lib
parent: 69246fc8ed0344eae4dab35286813a00010a08cb (diff)
parent: 2abfc49bdc9a9f4e86c90aa968c302ca76c20812 (diff)
download: coderay-5b1a49fdd3bef559991242a4ad7d3d1ed9cb48c8.tar.gz
13 files changed, 329 insertions, 363 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb
index 88c7cc2..24ae5a2 100644
--- a/lib/coderay.rb
+++ b/lib/coderay.rb
@@ -166,7 +166,6 @@ module CodeRay
     #
     # See also demo/demo_simple.
     def scan code, lang, options = {}, &block
-      # FIXME: return a proxy for direct-stream encoding
       TokensProxy.new code, lang, options, block
     end
     
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index c03d3fb..f4db330 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -9,7 +9,6 @@ module Encoders
   #
   # You cannot fully restore the tokens information from the
   # output, because consecutive :space tokens are merged.
-  # Use Tokens#dump for caching purposes.
   # 
   # See also: Scanners::Debug
   class Debug < Encoder
@@ -18,38 +17,26 @@ module Encoders
     
     FILE_EXTENSION = 'raydebug'
     
-    def initialize options = {}
-      super
-      @opened = []
-    end
-    
     def text_token text, kind
-      raise 'empty token' if $CODERAY_DEBUG && text.empty?
       if kind == :space
         @out << text
       else
-        # TODO: Escape (
-        text = text.gsub(/[)\\]/, '\\\\\0') if text.index(/[)\\]/)
-        @out << kind.to_s << '(' << text << ')'
+        text = text.gsub('\\', '\\\\\\\\') if text.index('\\')
+        text = text.gsub(')',  '\\\\)')    if text.index(')')
+        @out << "#{kind}(#{text})"
       end
     end
     
     def begin_group kind
-      @opened << kind
-      @out << kind.to_s << '<'
+      @out << "#{kind}<"
     end
     
     def end_group kind
-      if @opened.last != kind
-        puts @out
-        raise "we are inside #{@opened.inspect}, not #{kind}"
-      end
-      @opened.pop
       @out << '>'
     end
     
     def begin_line kind
-      @out << kind.to_s << '['
+      @out << "#{kind}["
     end
     
     def end_line kind
diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb
new file mode 100644
index 0000000..eeb2a92
--- /dev/null
+++ b/lib/coderay/encoders/debug_lint.rb
@@ -0,0 +1,55 @@
+module CodeRay
+module Encoders
+  
+  # = Debug Lint Encoder
+  #
+  # Debug encoder with additional checks for:
+  # 
+  # - empty tokens
+  # - incorrect nesting
+  # 
+  # It will raise an InvalidTokenStream exception when any of the above occurs.
+  # 
+  # See also: Encoders::Debug
+  class DebugLint < Debug
+    
+    register_for :debug_lint
+    
+    InvalidTokenStream = Class.new StandardError
+    EmptyToken = Class.new InvalidTokenStream
+    IncorrectTokenGroupNesting = Class.new InvalidTokenStream
+    
+    def initialize options = {}
+      super
+      @opened = []
+    end
+    
+    def text_token text, kind
+      raise EmptyToken, 'empty token' if text.empty?
+      super
+    end
+    
+    def begin_group kind
+      @opened << kind
+      super
+    end
+    
+    def end_group kind
+      raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind}" if @opened.pop != kind
+      super
+    end
+    
+    def begin_line kind
+      @opened << kind
+      super
+    end
+    
+    def end_line kind
+      raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind}" if @opened.pop != kind
+      super
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/encoders/html/numbering.rb b/lib/coderay/encoders/html/numbering.rb
index 332145b..a1b9c04 100644
--- a/lib/coderay/encoders/html/numbering.rb
+++ b/lib/coderay/encoders/html/numbering.rb
@@ -26,7 +26,7 @@ module Encoders
               "<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
             end
           else
-            proc { |line| line.to_s }  # :to_s.to_proc in Ruby 1.8.7+
+            :to_s.to_proc
           end
         
         bold_every = options[:bold_every]
@@ -75,7 +75,7 @@ module Encoders
           line_number = start
           output.gsub!(/^.*$\n?/) do |line|
             line_number_text = bolding.call line_number
-            indent = ' ' * (max_width - line_number.to_s.size)  # TODO: Optimize (10^x)
+            indent = ' ' * (max_width - line_number.to_s.size)
             line_number += 1
             "<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{line}"
           end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index 2315d9e..b2f8b83 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -67,7 +67,6 @@ Token Types (%d):
       @type_stats['TOTAL'].count += 1
     end
     
-    # TODO Hierarchy handling
     def begin_group kind
       block_token ':begin_group', kind
     end
diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb
index 9894b91..c7ae014 100644
--- a/lib/coderay/encoders/terminal.rb
+++ b/lib/coderay/encoders/terminal.rb
@@ -19,105 +19,135 @@ module CodeRay
       register_for :terminal
       
       TOKEN_COLORS = {
-        :annotation => "\e[35m",
-        :attribute_name => "\e[33m",
+        :debug => "\e[1;37;44m",
+        
+        :annotation => "\e[34m",
+        :attribute_name => "\e[35m",
         :attribute_value => "\e[31m",
-        :binary => "\e[1;35m",
+        :binary => {
+          :self => "\e[31m",
+          :char => "\e[1;31m",
+          :delimiter => "\e[1;31m",
+        },
         :char => {
-          :self => "\e[36m", :delimiter => "\e[1;34m"
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m"
         },
-        :class => "\e[1;35m",
+        :class => "\e[1;35;4m",
         :class_variable => "\e[36m",
         :color => "\e[32m",
-        :comment => "\e[37m",
-        :complex => "\e[1;34m",
-        :constant => "\e[1;34m\e[4m",
-        :decoration => "\e[35m",
-        :definition => "\e[1;32m",
-        :directive => "\e[32m\e[4m",
-        :doc => "\e[46m",
-        :doctype => "\e[1;30m",
-        :docstring => "\e[31m\e[4m",
-        :entity => "\e[33m",
-        :error => "\e[1;33m\e[41m",
+        :comment => {
+          :self => "\e[1;30m",
+          :char => "\e[37m",
+          :delimiter => "\e[37m",
+        },
+        :constant => "\e[1;34;4m",
+        :decorator => "\e[35m",
+        :definition => "\e[1;33m",
+        :directive => "\e[33m",
+        :docstring => "\e[31m",
+        :doctype => "\e[1;34m",
+        :done => "\e[1;30;2m",
+        :entity => "\e[31m",
+        :error => "\e[1;37;41m",
         :exception => "\e[1;31m",
         :float => "\e[1;35m",
         :function => "\e[1;34m",
-        :global_variable => "\e[42m",
+        :global_variable => "\e[1;32m",
         :hex => "\e[1;36m",
-        :include => "\e[33m",
+        :id => "\e[1;34m",
+        :include => "\e[31m",
         :integer => "\e[1;34m",
-        :key => "\e[35m",
-        :label => "\e[1;15m",
+        :imaginary => "\e[1;34m",
+        :important => "\e[1;31m",
+        :key => {
+          :self => "\e[35m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;35m",
+        },
+        :keyword => "\e[32m",
+        :label => "\e[1;33m",
         :local_variable => "\e[33m",
-        :octal => "\e[1;35m",
-        :operator_name => "\e[1;29m",
+        :namespace => "\e[1;35m",
+        :octal => "\e[1;34m",
+        :predefined => "\e[36m",
         :predefined_constant => "\e[1;36m",
-        :predefined_type => "\e[1;30m",
-        :predefined => "\e[4m\e[1;34m",
-        :preprocessor => "\e[36m",
+        :predefined_type => "\e[1;32m",
+        :preprocessor => "\e[1;36m",
         :pseudo_class => "\e[1;34m",
         :regexp => {
-          :self => "\e[31m",
-          :content => "\e[31m",
-          :delimiter => "\e[1;29m",
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m",
           :modifier => "\e[35m",
+          :char => "\e[1;35m",
         },
-        :reserved => "\e[1;31m",
+        :reserved => "\e[32m",
         :shell => {
-          :self => "\e[42m",
-          :content => "\e[1;29m",
-          :delimiter => "\e[37m",
+          :self => "\e[33m",
+          :char => "\e[1;33m",
+          :delimiter => "\e[1;33m",
+          :escape => "\e[1;33m",
         },
         :string => {
-          :self => "\e[32m",
-          :modifier => "\e[1;32m",
-          :escape => "\e[1;36m",
-          :delimiter => "\e[1;32m",
-          :char => "\e[1;36m",
+          :self => "\e[31m",
+          :modifier => "\e[1;31m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;31m",
+          :escape => "\e[1;31m",
+        },
+        :symbol => {
+          :self => "\e[33m",
+          :delimiter => "\e[1;33m",
         },
-        :symbol => "\e[1;32m",
-        :tag => "\e[1;34m",
+        :tag => "\e[32m",
         :type => "\e[1;34m",
         :value => "\e[36m",
-        :variable => "\e[1;34m",
+        :variable => "\e[34m",
         
-        :insert => "\e[42m",
-        :delete => "\e[41m",
-        :change => "\e[44m",
-        :head => "\e[45m"
+        :insert => {
+          :self => "\e[42m",
+          :insert => "\e[1;32;42m",
+          :eyecatcher => "\e[102m",
+        },
+        :delete => {
+          :self => "\e[41m",
+          :delete => "\e[1;31;41m",
+          :eyecatcher => "\e[101m",
+        },
+        :change => {
+          :self => "\e[44m",
+          :change => "\e[37;44m",
+        },
+        :head => {
+          :self => "\e[45m",
+          :filename => "\e[37;45m"
+        },
       }
+      
       TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
       TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
-      TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
-      TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] =
-        TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
+      TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
       
     protected
       
       def setup(options)
         super
         @opened = []
-        @subcolors = nil
+        @color_scopes = [TOKEN_COLORS]
       end
       
     public
       
       def text_token text, kind
-        if color = (@subcolors || TOKEN_COLORS)[kind]
-          if Hash === color
-            if color[:self]
-              color = color[:self]
-            else
-              @out << text
-              return
-            end
-          end
+        if color = @color_scopes.last[kind]
+          color = color[:self] if color.is_a? Hash
           
           @out << color
-          @out << text.gsub("\n", "\e[0m\n" + color)
+          @out << (text.index("\n") ? text.gsub("\n", "\e[0m\n" + color) : text)
           @out << "\e[0m"
-          @out << @subcolors[:self] if @subcolors
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         else
           @out << text
         end
@@ -130,40 +160,33 @@ module CodeRay
       alias begin_line begin_group
       
       def end_group kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
+        if @opened.pop
+          @color_scopes.pop
           @out << "\e[0m"
-          @out << open_token(@opened.last)
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         end
       end
       
       def end_line kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
-          # whole lines to be highlighted,
-          # eg. added/modified/deleted lines in a diff
-          @out << (@line_filler ||= "\t" * 100 + "\e[0m")
-          @out << open_token(@opened.last)
-        end
+        @out << (@line_filler ||= "\t" * 100)
+        end_group kind
       end
       
     private
       
       def open_token kind
-        if color = TOKEN_COLORS[kind]
-          if Hash === color
-            @subcolors = color
+        if color = @color_scopes.last[kind]
+          if color.is_a? Hash
+            @color_scopes << color
             color[:self]
           else
-            @subcolors = {}
+            @color_scopes << @color_scopes.last
             color
           end
         else
-          @subcolors = nil
+          @color_scopes << @color_scopes.last
           ''
         end
       end
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index a5d83ff..19f27ac 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -77,54 +77,56 @@ module CodeRay
     end
     
     TypeFromExt = {
-      'c'        => :c,
-      'cfc'      => :xml,
-      'cfm'      => :xml,
-      'clj'      => :clojure,
-      'css'      => :css,
-      'diff'     => :diff,
-      'dpr'      => :delphi,
-      'erb'      => :erb,
-      'gemspec'  => :ruby,
-      'groovy'   => :groovy,
-      'gvy'      => :groovy,
-      'h'        => :c,
-      'haml'     => :haml,
-      'htm'      => :html,
-      'html'     => :html,
-      'html.erb' => :erb,
-      'java'     => :java,
-      'js'       => :java_script,
-      'json'     => :json,
-      'mab'      => :ruby,
-      'pas'      => :delphi,
-      'patch'    => :diff,
-      'phtml'    => :php,
-      'php'      => :php,
-      'php3'     => :php,
-      'php4'     => :php,
-      'php5'     => :php,
-      'prawn'    => :ruby,
-      'py'       => :python,
-      'py3'      => :python,
-      'pyw'      => :python,
-      'rake'     => :ruby,
-      'raydebug' => :raydebug,
-      'rb'       => :ruby,
-      'rbw'      => :ruby,
-      'rhtml'    => :erb,
-      'rjs'      => :ruby,
-      'rpdf'     => :ruby,
-      'ru'       => :ruby,
-      'rxml'     => :ruby,
-      'sass'     => :sass,
-      'sql'      => :sql,
-      'tmproj'   => :xml,
-      'xaml'     => :xml,
-      'xhtml'    => :html,
-      'xml'      => :xml,
-      'yaml'     => :yaml,
-      'yml'      => :yaml,
+      'c'         => :c,
+      'cfc'       => :xml,
+      'cfm'       => :xml,
+      'clj'       => :clojure,
+      'css'       => :css,
+      'diff'      => :diff,
+      'dpr'       => :delphi,
+      'erb'       => :erb,
+      'gemspec'   => :ruby,
+      'groovy'    => :groovy,
+      'gvy'       => :groovy,
+      'h'         => :c,
+      'haml'      => :haml,
+      'htm'       => :html,
+      'html'      => :html,
+      'html.erb'  => :erb,
+      'java'      => :java,
+      'js'        => :java_script,
+      'json'      => :json,
+      'mab'       => :ruby,
+      'pas'       => :delphi,
+      'patch'     => :diff,
+      'phtml'     => :php,
+      'php'       => :php,
+      'php3'      => :php,
+      'php4'      => :php,
+      'php5'      => :php,
+      'prawn'     => :ruby,
+      'py'        => :python,
+      'py3'       => :python,
+      'pyw'       => :python,
+      'rake'      => :ruby,
+      'raydebug'  => :raydebug,
+      'rb'        => :ruby,
+      'rbw'       => :ruby,
+      'rhtml'     => :erb,
+      'rjs'       => :ruby,
+      'rpdf'      => :ruby,
+      'ru'        => :ruby,
+      'rxml'      => :ruby,
+      'sass'      => :sass,
+      'sql'       => :sql,
+      'taskpaper' => :taskpaper,
+      'template'  => :json,  # AWS CloudFormation template
+      'tmproj'    => :xml,
+      'xaml'      => :xml,
+      'xhtml'     => :html,
+      'xml'       => :xml,
+      'yaml'      => :yaml,
+      'yml'       => :yaml,
     }
     for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
       TypeFromExt[cpp_alias] = :cpp
diff --git a/lib/coderay/helpers/gzip.rb b/lib/coderay/helpers/gzip.rb
deleted file mode 100644
index 245014a..0000000
--- a/lib/coderay/helpers/gzip.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-module CodeRay
-  
-  # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
-  module GZip
-    
-    require 'zlib'
-    
-    # The default zipping level. 7 zips good and fast.
-    DEFAULT_GZIP_LEVEL = 7
-    
-    # Unzips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   print GZip.gunzip(File.read('adresses.gz'))
-    def GZip.gunzip s
-      Zlib::Inflate.inflate s
-    end
-    
-    # Zips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   File.open('adresses.gz', 'w') do |file
-    #     file.write GZip.gzip('Mum: 0123 456 789', 9)
-    #   end
-    #
-    # If you provide a +level+, you can control how strong
-    # the string is compressed:
-    # - 0: no compression, only convert to gzip format
-    # - 1: compress fast
-    # - 7: compress more, but still fast (default)
-    # - 8: compress more, slower
-    # - 9: compress best, very slow
-    def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
-      Zlib::Deflate.new(level).deflate s, Zlib::FINISH
-    end
-    
-  end
-  
-end
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index 4e0f462..3754a9b 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -14,15 +14,17 @@ module Scanners
     
     ESCAPE = / [bfnrt\\"\/] /x  # :nodoc:
     UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x  # :nodoc:
+    KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     # See http://json.org/ for a definition of the JSON lexic/grammar.
     def scan_tokens encoder, options
-      
-      state = :initial
-      stack = []
-      key_expected = false
+      state = options[:state] || @state
       
       until eos?
         
@@ -32,18 +34,11 @@ module Scanners
           if match = scan(/ \s+ /x)
             encoder.text_token match, :space
           elsif match = scan(/"/)
-            state = key_expected ? :key : :string
+            state = check(/#{KEY}/o) ? :key : :string
             encoder.begin_group state
             encoder.text_token match, :delimiter
           elsif match = scan(/ [:,\[{\]}] /x)
             encoder.text_token match, :operator
-            case match
-            when ':' then key_expected = false
-            when ',' then key_expected = true if stack.last == :object
-            when '{' then stack << :object; key_expected = true
-            when '[' then stack << :array
-            when '}', ']' then stack.pop  # no error recovery, but works for valid JSON
-            end
           elsif match = scan(/ true | false | null /x)
             encoder.text_token match, :value
           elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
@@ -82,6 +77,10 @@ module Scanners
         end
       end
       
+      if options[:keep_state]
+        @state = state
+      end
+      
       if [:string, :key].include? state
         encoder.end_group state
       end
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index a9492ab..09c8b6e 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -157,12 +157,12 @@ module Scanners
             encoder.text_token match, :operator
           
           elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
+            modifiers = self[1]
             string_delimiter = self[2]
-            string_type = docstring_coming ? :docstring : :string
+            string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string)
             docstring_coming = false if docstring_coming
             encoder.begin_group string_type
             string_raw = false
-            modifiers = self[1]
             unless modifiers.empty?
               string_raw = !!modifiers.index(?r)
               encoder.text_token modifiers, :modifier
diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb
index 7b2381d..c05ffd5 100644
--- a/lib/coderay/styles/alpha.rb
+++ b/lib/coderay/styles/alpha.rb
@@ -56,25 +56,26 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .annotation { color:#007 }
 .attribute-name { color:#b48 }
 .attribute-value { color:#700 }
-.binary { color:#509 }
+.binary { color:#549 }
+.binary .char { color:#325 }
+.binary .delimiter { color:#325 }
+.char { color:#D20 }
 .char .content { color:#D20 }
 .char .delimiter { color:#710 }
-.char { color:#D20 }
 .class { color:#B06; font-weight:bold }
 .class-variable { color:#369 }
 .color { color:#0A0 }
 .comment { color:#777 }
 .comment .char { color:#444 }
 .comment .delimiter { color:#444 }
-.complex { color:#A08 }
 .constant { color:#036; font-weight:bold }
 .decorator { color:#B0B }
 .definition { color:#099; font-weight:bold }
 .delimiter { color:black }
 .directive { color:#088; font-weight:bold }
-.doc { color:#970 }
-.doc-string { color:#D42; font-weight:bold }
+.docstring { color:#D42; }
 .doctype { color:#34b }
+.done { text-decoration: line-through; color: gray }
 .entity { color:#800; font-weight:bold }
 .error { color:#F00; background-color:#FAA }
 .escape  { color:#666 }
@@ -85,16 +86,16 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .global-variable { color:#d70 }
 .hex { color:#02b }
 .id  { color:#33D; font-weight:bold }
-.imaginary { color:#f00 }
 .include { color:#B44; font-weight:bold }
 .inline { background-color: hsla(0,0%,0%,0.07); color: black }
 .inline-delimiter { font-weight: bold; color: #666 }
 .instance-variable { color:#33B }
 .integer  { color:#00D }
+.imaginary { color:#f00 }
 .important { color:#D00 }
+.key { color: #606 }
 .key .char { color: #60f }
 .key .delimiter { color: #404 }
-.key { color: #606 }
 .keyword { color:#080; font-weight:bold }
 .label { color:#970; font-weight:bold }
 .local-variable { color:#963 }
@@ -106,33 +107,33 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .predefined-type { color:#0a5; font-weight:bold }
 .preprocessor { color:#579 }
 .pseudo-class { color:#00C; font-weight:bold }
+.regexp { background-color:hsla(300,100%,50%,0.06); }
 .regexp .content { color:#808 }
 .regexp .delimiter { color:#404 }
 .regexp .modifier { color:#C2C }
-.regexp { background-color:hsla(300,100%,50%,0.06); }
 .reserved { color:#080; font-weight:bold }
+.shell { background-color:hsla(120,100%,50%,0.06); }
 .shell .content { color:#2B2 }
 .shell .delimiter { color:#161 }
-.shell { background-color:hsla(120,100%,50%,0.06); }
+.string { background-color:hsla(0,100%,50%,0.05); }
 .string .char { color: #b0b }
 .string .content { color: #D20 }
 .string .delimiter { color: #710 }
 .string .modifier { color: #E40 }
-.string { background-color:hsla(0,100%,50%,0.05); }
+.symbol { color:#A60 }
 .symbol .content { color:#A60 }
 .symbol .delimiter { color:#630 }
-.symbol { color:#A60 }
 .map .content { color:#808 }
 .map .delimiter { color:#40A}
 .map { background-color:hsla(200,100%,50%,0.06); }
-.tag { color:#070 }
+.tag { color:#070; font-weight:bold }
 .type { color:#339; font-weight:bold }
-.value { color: #088; }
-.variable  { color:#037 }
+.value { color: #088 }
+.variable { color:#037 }
 
 .insert { background: hsla(120,100%,50%,0.12) }
 .delete { background: hsla(0,100%,50%,0.12) }
-.change { color: #bbf; background: #007; }
+.change { color: #bbf; background: #007 }
 .head { color: #f8f; background: #505 }
 .head .filename { color: white; }
 
@@ -143,8 +144,6 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .delete .delete { color: #c00; background:transparent; font-weight:bold }
 .change .change { color: #88f }
 .head .head { color: #f4f }
-
-.done { text-decoration: line-through; color: gray }
     TOKENS
 
   end
diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb
index 923c859..f1696df 100755
--- a/lib/coderay/token_kinds.rb
+++ b/lib/coderay/token_kinds.rb
@@ -10,80 +10,78 @@ module CodeRay
   TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity
 
   TokenKinds.update(  # :nodoc:
-    :annotation          => 'annotation',
-    :attribute_name      => 'attribute-name',
-    :attribute_value     => 'attribute-value',
-    :binary              => 'bin',
-    :char                => 'char',
-    :class               => 'class',
-    :class_variable      => 'class-variable',
-    :color               => 'color',
-    :comment             => 'comment',
-    :complex             => 'complex',
-    :constant            => 'constant',
-    :content             => 'content',
-    :debug               => 'debug',
-    :decorator           => 'decorator',
-    :definition          => 'definition',
-    :delimiter           => 'delimiter',
-    :directive           => 'directive',
-    :doc                 => 'doc',
-    :doctype             => 'doctype',
-    :docstring           => 'doc-string',
-    :done                => 'done',
-    :entity              => 'entity',
-    :error               => 'error',
-    :escape              => 'escape',
-    :exception           => 'exception',
-    :filename            => 'filename',
-    :float               => 'float',
-    :function            => 'function',
-    :global_variable     => 'global-variable',
-    :hex                 => 'hex',
-    :id                  => 'id',
-    :imaginary           => 'imaginary',
-    :important           => 'important',
-    :include             => 'include',
-    :inline              => 'inline',
-    :inline_delimiter    => 'inline-delimiter',
-    :instance_variable   => 'instance-variable',
-    :integer             => 'integer',
-    :key                 => 'key',
-    :keyword             => 'keyword',
-    :label               => 'label',
-    :local_variable      => 'local-variable',
-    :map                 => 'map',
-    :modifier            => 'modifier',
-    :namespace           => 'namespace',
-    :octal               => 'octal',
-    :predefined          => 'predefined',
-    :predefined_constant => 'predefined-constant',
-    :predefined_type     => 'predefined-type',
-    :preprocessor        => 'preprocessor',
-    :pseudo_class        => 'pseudo-class',
-    :regexp              => 'regexp',
-    :reserved            => 'reserved',
-    :shell               => 'shell',
-    :string              => 'string',
-    :symbol              => 'symbol',
-    :tag                 => 'tag',
-    :type                => 'type',
-    :value               => 'value',
-    :variable            => 'variable',
-
-    :change              => 'change',
-    :delete              => 'delete',
-    :head                => 'head',
-    :insert              => 'insert',
-
-    :eyecatcher          => 'eyecatcher',
-
-    :ident               => false,
-    :operator            => false,
-
-    :space               => false,
-    :plain               => false
+    :debug               => 'debug',              # highlight for debugging (white on blue background)
+    
+    :annotation          => 'annotation',         # Groovy, Java
+    :attribute_name      => 'attribute-name',     # HTML, CSS
+    :attribute_value     => 'attribute-value',    # HTML
+    :binary              => 'binary',             # Python, Ruby
+    :char                => 'char',               # most scanners, also inside of strings
+    :class               => 'class',              # lots of scanners, for different purposes also in CSS
+    :class_variable      => 'class-variable',     # Ruby, YAML
+    :color               => 'color',              # CSS
+    :comment             => 'comment',            # most scanners
+    :constant            => 'constant',           # PHP, Ruby
+    :content             => 'content',            # inside of strings, most scanners
+    :decorator           => 'decorator',          # Python
+    :definition          => 'definition',         # CSS
+    :delimiter           => 'delimiter',          # inside strings, comments and other types
+    :directive           => 'directive',          # lots of scanners
+    :doctype             => 'doctype',            # Goorvy, HTML, Ruby, YAML
+    :docstring           => 'docstring',          # Python
+    :done                => 'done',               # Taskpaper
+    :entity              => 'entity',             # HTML
+    :error               => 'error',              # invalid token, most scanners
+    :escape              => 'escape',             # Ruby (string inline variables like #$foo, #@bar)
+    :exception           => 'exception',          # Java, PHP, Python
+    :filename            => 'filename',           # Diff
+    :float               => 'float',              # most scanners
+    :function            => 'function',           # CSS, JavaScript, PHP
+    :global_variable     => 'global-variable',    # Ruby, YAML
+    :hex                 => 'hex',                # hexadecimal number; lots of scanners
+    :id                  => 'id',                 # CSS
+    :imaginary           => 'imaginary',          # Python
+    :important           => 'important',          # CSS, Taskpaper
+    :include             => 'include',            # C, Groovy, Java, Python, Sass
+    :inline              => 'inline',             # nested code, eg. inline string evaluation; lots of scanners
+    :inline_delimiter    => 'inline-delimiter',   # used instead of :inline > :delimiter FIXME: Why use inline_delimiter?
+    :instance_variable   => 'instance-variable',  # Ruby
+    :integer             => 'integer',            # most scanners
+    :key                 => 'key',                # lots of scanners, used together with :value
+    :keyword             => 'keyword',            # reserved word that's actually implemented; most scanners
+    :label               => 'label',              # C, PHP
+    :local_variable      => 'local-variable',     # local and magic variables; some scanners
+    :map                 => 'map',                # Lua tables
+    :modifier            => 'modifier',           # used inside on strings; lots of scanners
+    :namespace           => 'namespace',          # Clojure, Java, Taskpaper
+    :octal               => 'octal',              # lots of scanners
+    :predefined          => 'predefined',         # predefined function: lots of scanners
+    :predefined_constant => 'predefined-constant',# lots of scanners
+    :predefined_type     => 'predefined-type',    # C, Java, PHP
+    :preprocessor        => 'preprocessor',       # C, Delphi, HTML
+    :pseudo_class        => 'pseudo-class',       # CSS
+    :regexp              => 'regexp',             # Groovy, JavaScript, Ruby
+    :reserved            => 'reserved',           # most scanners
+    :shell               => 'shell',              # Ruby
+    :string              => 'string',             # most scanners
+    :symbol              => 'symbol',             # Clojure, Ruby, YAML
+    :tag                 => 'tag',                # CSS, HTML
+    :type                => 'type',               # CSS, Java, SQL, YAML
+    :value               => 'value',              # used together with :key; CSS, JSON, YAML
+    :variable            => 'variable',           # Sass, SQL, YAML
+    
+    :change              => 'change',             # Diff
+    :delete              => 'delete',             # Diff
+    :head                => 'head',               # Diff, YAML
+    :insert              => 'insert',             # Diff
+    :eyecatcher          => 'eyecatcher',         # Diff
+    
+    :ident               => false,                # almost all scanners
+    :operator            => false,                # almost all scanners
+    
+    :space               => false,                # almost all scanners
+    :plain               => false                 # almost all scanners
   )
-
-  TokenKinds[:method]    = TokenKinds[:function]
+  
+  TokenKinds[:method] = TokenKinds[:function]
 end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 6957d69..54358d4 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,27 +1,22 @@
 module CodeRay
   
-  # GZip library for writing and reading token dumps.
-  autoload :GZip, coderay_path('helpers', 'gzip')
-  
-  # = Tokens  TODO: Rewrite!
-  #
-  # The Tokens class represents a list of tokens returnd from
-  # a Scanner.
+  # The Tokens class represents a list of tokens returned from
+  # a Scanner. It's actually just an Array with a few helper methods.
   #
-  # A token is not a special object, just a two-element Array
+  # A token itself is not a special object, just a two-element Array
   # consisting of
   # * the _token_ _text_ (the original source of the token in a String) or
   #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
   # * the _token_ _kind_ (a Symbol representing the type of the token)
   #
-  # A token looks like this:
+  # It looks like this:
   #
   #   ['# It looks like this', :comment]
   #   ['3.1415926', :float]
   #   ['$^', :error]
   #
   # Some scanners also yield sub-tokens, represented by special
-  # token actions, namely begin_group and end_group.
+  # token actions, for example :begin_group and :end_group.
   #
   # The Ruby scanner, for example, splits "a string" into:
   #
@@ -33,23 +28,17 @@ module CodeRay
   #   [:end_group, :string]
   #  ]
   #
-  # Tokens is the interface between Scanners and Encoders:
-  # The input is split and saved into a Tokens object. The Encoder
-  # then builds the output from this object.
-  #
-  # Thus, the syntax below becomes clear:
+  # Tokens can be used to save the output of a Scanners in a simple
+  # Ruby object that can be send to an Encoder later:
   #
-  #   CodeRay.scan('price = 2.59', :ruby).html
-  #   # the Tokens object is here -------^
-  #
-  # See how small it is? ;)
+  #   tokens = CodeRay.scan('price = 2.59', :ruby).tokens
+  #   tokens.encode(:html)
+  #   tokens.html
+  #   CodeRay.encoder(:html).encode_tokens(tokens)
   #
   # Tokens gives you the power to handle pre-scanned code very easily:
-  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
-  # that you put in your DB.
-  # 
-  # It also allows you to generate tokens directly (without using a scanner),
-  # to load them from a file, and still use any Encoder that CodeRay provides.
+  # You can serialize it to a JSON string and store it in a database, pass it
+  # around to encode it more than once, send it to other algorithms...
   class Tokens < Array
     
     # The Scanner instance that created the tokens.
@@ -58,8 +47,7 @@ module CodeRay
     # Encode the tokens using encoder.
     #
     # encoder can be
-    # * a symbol like :html oder :statistic
-    # * an Encoder class
+    # * a plugin name like :html oder 'statistic'
     # * an Encoder object
     #
     # options are passed to the encoder.
@@ -157,53 +145,11 @@ module CodeRay
       parts
     end
     
-    # Dumps the object into a String that can be saved
-    # in files or databases.
-    #
-    # The dump is created with Marshal.dump;
-    # In addition, it is gzipped using GZip.gzip.
-    #
-    # The returned String object includes Undumping
-    # so it has an #undump method. See Tokens.load.
-    #
-    # You can configure the level of compression,
-    # but the default value 7 should be what you want
-    # in most cases as it is a good compromise between
-    # speed and compression rate.
-    #
-    # See GZip module.
-    def dump gzip_level = 7
-      dump = Marshal.dump self
-      dump = GZip.gzip dump, gzip_level
-      dump.extend Undumping
-    end
-    
     # Return the actual number of tokens.
     def count
       size / 2
     end
     
-    # Include this module to give an object an #undump
-    # method.
-    #
-    # The string returned by Tokens.dump includes Undumping.
-    module Undumping
-      # Calls Tokens.load with itself.
-      def undump
-        Tokens.load self
-      end
-    end
-    
-    # Undump the object using Marshal.load, then
-    # unzip it using GZip.gunzip.
-    #
-    # The result is commonly a Tokens object, but
-    # this is not guaranteed.
-    def Tokens.load dump
-      dump = GZip.gunzip dump
-      @dump = Marshal.load dump
-    end
-    
     alias text_token push
     def begin_group kind; push :begin_group, kind end
     def end_group kind; push :end_group, kind end
author	Kornelius Kalnbach <murphy@rubychan.de>	2013-06-22 23:52:44 +0200
committer	Kornelius Kalnbach <murphy@rubychan.de>	2013-06-22 23:52:44 +0200
commit	5b1a49fdd3bef559991242a4ad7d3d1ed9cb48c8 (patch)
tree	9e74edadd344247e805f6bd3588a34fdf0dbfa21 /lib
parent	69246fc8ed0344eae4dab35286813a00010a08cb (diff)
parent	2abfc49bdc9a9f4e86c90aa968c302ca76c20812 (diff)
download	coderay-5b1a49fdd3bef559991242a4ad7d3d1ed9cb48c8.tar.gz