24 files changed, 1104 insertions, 549 deletions
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index 62f9f0a..f4db330 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -9,7 +9,6 @@ module Encoders
   #
   # You cannot fully restore the tokens information from the
   # output, because consecutive :space tokens are merged.
-  # Use Tokens#dump for caching purposes.
   # 
   # See also: Scanners::Debug
   class Debug < Encoder
@@ -18,37 +17,26 @@ module Encoders
     
     FILE_EXTENSION = 'raydebug'
     
-    def initialize options = {}
-      super
-      @opened = []
-    end
-    
     def text_token text, kind
       if kind == :space
         @out << text
       else
-        # TODO: Escape (
-        text = text.gsub(/[)\\]/, '\\\\\0') if text.index(/[)\\]/)
-        @out << kind.to_s << '(' << text << ')'
+        text = text.gsub('\\', '\\\\\\\\') if text.index('\\')
+        text = text.gsub(')',  '\\\\)')    if text.index(')')
+        @out << "#{kind}(#{text})"
       end
     end
     
     def begin_group kind
-      @opened << kind
-      @out << kind.to_s << '<'
+      @out << "#{kind}<"
     end
     
     def end_group kind
-      if @opened.last != kind
-        puts @out
-        raise "we are inside #{@opened.inspect}, not #{kind}"
-      end
-      @opened.pop
       @out << '>'
     end
     
     def begin_line kind
-      @out << kind.to_s << '['
+      @out << "#{kind}["
     end
     
     def end_line kind
diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb
new file mode 100644
index 0000000..0ac89ef
--- /dev/null
+++ b/lib/coderay/encoders/debug_lint.rb
@@ -0,0 +1,55 @@
+module CodeRay
+module Encoders
+  
+  # = Debug Lint Encoder
+  #
+  # Debug encoder with additional checks for:
+  # 
+  # - empty tokens
+  # - incorrect nesting
+  # 
+  # It will raise an InvalidTokenStream exception when any of the above occurs.
+  # 
+  # See also: Encoders::Debug
+  class DebugLint < Debug
+    
+    register_for :debug_lint
+    
+    InvalidTokenStream = Class.new StandardError
+    EmptyToken = Class.new InvalidTokenStream
+    IncorrectTokenGroupNesting = Class.new InvalidTokenStream
+    
+    def initialize options = {}
+      super
+      @opened = []
+    end
+    
+    def text_token text, kind
+      raise EmptyToken, 'empty token' if text.empty?
+      super
+    end
+    
+    def begin_group kind
+      @opened << kind
+      super
+    end
+    
+    def end_group kind
+      raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind} (end_group)" if @opened.pop != kind
+      super
+    end
+    
+    def begin_line kind
+      @opened << kind
+      super
+    end
+    
+    def end_line kind
+      raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind} (end_line)" if @opened.pop != kind
+      super
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index 635a4d8..b897f5e 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -126,22 +126,21 @@ module Encoders
     
   protected
     
-    HTML_ESCAPE = {  #:nodoc:
-      '&' => '&amp;',
-      '"' => '&quot;',
-      '>' => '&gt;',
-      '<' => '&lt;',
-    }
+    def self.make_html_escape_hash
+      {
+        '&' => '&amp;',
+        '"' => '&quot;',
+        '>' => '&gt;',
+        '<' => '&lt;',
+        # "\t" => will be set to ' ' * options[:tab_width] during setup
+      }.tap do |hash|
+        # Escape ASCII control codes except \x9 == \t and \xA == \n.
+        (Array(0x00..0x8) + Array(0xB..0x1F)).each { |invalid| hash[invalid.chr] = ' ' }
+      end
+    end
     
-    # This was to prevent illegal HTML.
-    # Strange chars should still be avoided in codes.
-    evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
-    evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
-    #ansi_chars = Array(0x7f..0xff)
-    #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
-    # \x9 (\t) and \xA (\n) not included
-    #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
-    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
+    HTML_ESCAPE = make_html_escape_hash
+    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1F]/
     
     TOKEN_KIND_TO_INFO = Hash.new do |h, kind|
       h[kind] = kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
@@ -172,59 +171,22 @@ module Encoders
     def setup options
       super
       
+      check_options! options
+      
       if options[:wrap] || options[:line_numbers]
         @real_out = @out
         @out = ''
       end
       
-      options[:break_lines] = true if options[:line_numbers] == :inline
-      
       @break_lines = (options[:break_lines] == true)
       
-      @HTML_ESCAPE = HTML_ESCAPE.dup
-      @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
+      @HTML_ESCAPE = HTML_ESCAPE.merge("\t" => ' ' * options[:tab_width])
       
       @opened = []
       @last_opened = nil
       @css = CSS.new options[:style]
       
-      hint = options[:hint]
-      if hint && ![:debug, :info, :info_long].include?(hint)
-        raise ArgumentError, "Unknown value %p for :hint; \
-          expected :info, :info_long, :debug, false, or nil." % hint
-      end
-      
-      css_classes = TokenKinds
-      case options[:css]
-      when :class
-        @span_for_kind = Hash.new do |h, k|
-          if k.is_a? ::Symbol
-            kind = k_dup = k
-          else
-            kind = k.first
-            k_dup = k.dup
-          end
-          if kind != :space && (hint || css_class = css_classes[kind])
-            title = HTML.token_path_to_hint hint, k if hint
-            css_class ||= css_classes[kind]
-            h[k_dup] = "<span#{title}#{" class=\"#{css_class}\"" if css_class}>"
-          else
-            h[k_dup] = nil
-          end
-        end
-      when :style
-        @span_for_kind = Hash.new do |h, k|
-          kind = k.is_a?(Symbol) ? k : k.first
-          h[k.is_a?(Symbol) ? k : k.dup] =
-            if kind != :space && (hint || css_classes[kind])
-              title = HTML.token_path_to_hint hint, k if hint
-              style = @css.get_style Array(k).map { |c| css_classes[c] }
-              "<span#{title}#{" style=\"#{style}\"" if style}>"
-            end
-        end
-      else
-        raise ArgumentError, "Unknown value %p for :css." % options[:css]
-      end
+      @span_for_kinds = make_span_for_kinds(options[:css], options[:hint])
       
       @set_last_opened = options[:hint] || options[:css] == :style
     end
@@ -255,20 +217,10 @@ module Encoders
   public
     
     def text_token text, kind
-      if text =~ /#{HTML_ESCAPE_PATTERN}/o
-        text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
-      end
+      style = @span_for_kinds[@last_opened ? [kind, *@opened] : kind]
       
-      style = @span_for_kind[@last_opened ? [kind, *@opened] : kind]
-      
-      if @break_lines && (i = text.index("\n")) && (c = @opened.size + (style ? 1 : 0)) > 0
-        close = '</span>' * c
-        reopen = ''
-        @opened.each_with_index do |k, index|
-          reopen << (@span_for_kind[index > 0 ? [k, *@opened[0 ... index ]] : k] || '<span>')
-        end
-        text[i .. -1] = text[i .. -1].gsub("\n", "#{close}\n#{reopen}#{style}")
-      end
+      text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } if text =~ /#{HTML_ESCAPE_PATTERN}/o
+      text = break_lines(text, style) if @break_lines && (style || @opened.size > 0) && text.index("\n")
       
       if style
         @out << style << text << '</span>'
@@ -279,25 +231,19 @@ module Encoders
     
     # token groups, eg. strings
     def begin_group kind
-      @out << (@span_for_kind[@last_opened ? [kind, *@opened] : kind] || '<span>')
+      @out << (@span_for_kinds[@last_opened ? [kind, *@opened] : kind] || '<span>')
       @opened << kind
       @last_opened = kind if @set_last_opened
     end
     
     def end_group kind
-      if $CODERAY_DEBUG && (@opened.empty? || @opened.last != kind)
-        warn 'Malformed token stream: Trying to close a token (%p) ' \
-          'that is not open. Open are: %p.' % [kind, @opened[1..-1]]
-      end
-      if @opened.pop
-        @out << '</span>'
-        @last_opened = @opened.last if @last_opened
-      end
+      check_group_nesting 'token group', kind if $CODERAY_DEBUG
+      close_span
     end
     
     # whole lines to be highlighted, eg. a deleted line in a diff
     def begin_line kind
-      if style = @span_for_kind[@last_opened ? [kind, *@opened] : kind]
+      if style = @span_for_kinds[@last_opened ? [kind, *@opened] : kind]
         if style['class="']
           @out << style.sub('class="', 'class="line ')
         else
@@ -311,16 +257,71 @@ module Encoders
     end
     
     def end_line kind
-      if $CODERAY_DEBUG && (@opened.empty? || @opened.last != kind)
-        warn 'Malformed token stream: Trying to close a line (%p) ' \
-          'that is not open. Open are: %p.' % [kind, @opened[1..-1]]
+      check_group_nesting 'line', kind if $CODERAY_DEBUG
+      close_span
+    end
+    
+  protected
+    
+    def check_options! options
+      unless [false, nil, :debug, :info, :info_long].include? options[:hint]
+        raise ArgumentError, "Unknown value %p for :hint; expected :info, :info_long, :debug, false, or nil." % [options[:hint]]
+      end
+      
+      unless [:class, :style].include? options[:css]
+        raise ArgumentError, 'Unknown value %p for :css.' % [options[:css]]
+      end
+      
+      options[:break_lines] = true if options[:line_numbers] == :inline
+    end
+    
+    def css_class_for_kinds kinds
+      TokenKinds[kinds.is_a?(Symbol) ? kinds : kinds.first]
+    end
+    
+    def style_for_kinds kinds
+      css_classes = kinds.is_a?(Array) ? kinds.map { |c| TokenKinds[c] } : [TokenKinds[kinds]]
+      @css.get_style_for_css_classes css_classes
+    end
+    
+    def make_span_for_kinds method, hint
+      Hash.new do |h, kinds|
+        h[kinds.is_a?(Symbol) ? kinds : kinds.dup] = begin
+          css_class = css_class_for_kinds(kinds)
+          title     = HTML.token_path_to_hint hint, kinds if hint
+          
+          if css_class || title
+            if method == :style
+              style = style_for_kinds(kinds)
+              "<span#{title}#{" style=\"#{style}\"" if style}>"
+            else
+              "<span#{title}#{" class=\"#{css_class}\"" if css_class}>"
+            end
+          end
+        end
+      end
+    end
+    
+    def check_group_nesting name, kind
+      if @opened.empty? || @opened.last != kind
+        warn "Malformed token stream: Trying to close a #{name} (%p) that is not open. Open are: %p." % [kind, @opened[1..-1]]
       end
+    end
+    
+    def break_lines text, style
+      reopen = ''
+      @opened.each_with_index do |k, index|
+        reopen << (@span_for_kinds[index > 0 ? [k, *@opened[0...index]] : k] || '<span>')
+      end
+      text.gsub("\n", "#{'</span>' * @opened.size}#{'</span>' if style}\n#{reopen}#{style}")
+    end
+    
+    def close_span
       if @opened.pop
         @out << '</span>'
         @last_opened = @opened.last if @last_opened
       end
     end
-    
   end
   
 end
diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb
index 6de4b46..164d7f8 100644
--- a/lib/coderay/encoders/html/css.rb
+++ b/lib/coderay/encoders/html/css.rb
@@ -11,7 +11,7 @@ module Encoders
       end
 
       def initialize style = :default
-        @classes = Hash.new
+        @styles = Hash.new
         style = CSS.load_stylesheet style
         @stylesheet = [
           style::CSS_MAIN_STYLES,
@@ -20,12 +20,12 @@ module Encoders
         parse style::TOKEN_COLORS
       end
 
-      def get_style styles
-        cl = @classes[styles.first]
+      def get_style_for_css_classes css_classes
+        cl = @styles[css_classes.first]
         return '' unless cl
         style = ''
-        1.upto styles.size do |offset|
-          break if style = cl[styles[offset .. -1]]
+        1.upto css_classes.size do |offset|
+          break if style = cl[css_classes[offset .. -1]]
         end
         # warn 'Style not found: %p' % [styles] if style.empty?
         return style
@@ -52,8 +52,8 @@ module Encoders
           for selector in selectors.split(',')
             classes = selector.scan(/[-\w]+/)
             cl = classes.pop
-            @classes[cl] ||= Hash.new
-            @classes[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
+            @styles[cl] ||= Hash.new
+            @styles[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
           end
         end
       end
diff --git a/lib/coderay/encoders/html/numbering.rb b/lib/coderay/encoders/html/numbering.rb
index 332145b..a1b9c04 100644
--- a/lib/coderay/encoders/html/numbering.rb
+++ b/lib/coderay/encoders/html/numbering.rb
@@ -26,7 +26,7 @@ module Encoders
               "<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
             end
           else
-            proc { |line| line.to_s }  # :to_s.to_proc in Ruby 1.8.7+
+            :to_s.to_proc
           end
         
         bold_every = options[:bold_every]
@@ -75,7 +75,7 @@ module Encoders
           line_number = start
           output.gsub!(/^.*$\n?/) do |line|
             line_number_text = bolding.call line_number
-            indent = ' ' * (max_width - line_number.to_s.size)  # TODO: Optimize (10^x)
+            indent = ' ' * (max_width - line_number.to_s.size)
             line_number += 1
             "<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{line}"
           end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index 2315d9e..b2f8b83 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -67,7 +67,6 @@ Token Types (%d):
       @type_stats['TOTAL'].count += 1
     end
     
-    # TODO Hierarchy handling
     def begin_group kind
       block_token ':begin_group', kind
     end
diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb
index a0ceb3c..c7ae014 100644
--- a/lib/coderay/encoders/terminal.rb
+++ b/lib/coderay/encoders/terminal.rb
@@ -19,105 +19,135 @@ module CodeRay
       register_for :terminal
       
       TOKEN_COLORS = {
-        :annotation => '35',
-        :attribute_name => '33',
-        :attribute_value => '31',
-        :binary => '1;35',
+        :debug => "\e[1;37;44m",
+        
+        :annotation => "\e[34m",
+        :attribute_name => "\e[35m",
+        :attribute_value => "\e[31m",
+        :binary => {
+          :self => "\e[31m",
+          :char => "\e[1;31m",
+          :delimiter => "\e[1;31m",
+        },
         :char => {
-          :self => '36', :delimiter => '1;34'
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m"
+        },
+        :class => "\e[1;35;4m",
+        :class_variable => "\e[36m",
+        :color => "\e[32m",
+        :comment => {
+          :self => "\e[1;30m",
+          :char => "\e[37m",
+          :delimiter => "\e[37m",
         },
-        :class => '1;35',
-        :class_variable => '36',
-        :color => '32',
-        :comment => '37',
-        :complex => '1;34',
-        :constant => ['1;34', '4'],
-        :decoration => '35',
-        :definition => '1;32',
-        :directive => ['32', '4'],
-        :doc => '46',
-        :doctype => '1;30',
-        :doc_string => ['31', '4'],
-        :entity => '33',
-        :error => ['1;33', '41'],
-        :exception => '1;31',
-        :float => '1;35',
-        :function => '1;34',
-        :global_variable => '42',
-        :hex => '1;36',
-        :include => '33',
-        :integer => '1;34',
-        :key => '35',
-        :label => '1;15',
-        :local_variable => '33',
-        :octal => '1;35',
-        :operator_name => '1;29',
-        :predefined_constant => '1;36',
-        :predefined_type => '1;30',
-        :predefined => ['4', '1;34'],
-        :preprocessor => '36',
-        :pseudo_class => '1;34',
+        :constant => "\e[1;34;4m",
+        :decorator => "\e[35m",
+        :definition => "\e[1;33m",
+        :directive => "\e[33m",
+        :docstring => "\e[31m",
+        :doctype => "\e[1;34m",
+        :done => "\e[1;30;2m",
+        :entity => "\e[31m",
+        :error => "\e[1;37;41m",
+        :exception => "\e[1;31m",
+        :float => "\e[1;35m",
+        :function => "\e[1;34m",
+        :global_variable => "\e[1;32m",
+        :hex => "\e[1;36m",
+        :id => "\e[1;34m",
+        :include => "\e[31m",
+        :integer => "\e[1;34m",
+        :imaginary => "\e[1;34m",
+        :important => "\e[1;31m",
+        :key => {
+          :self => "\e[35m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;35m",
+        },
+        :keyword => "\e[32m",
+        :label => "\e[1;33m",
+        :local_variable => "\e[33m",
+        :namespace => "\e[1;35m",
+        :octal => "\e[1;34m",
+        :predefined => "\e[36m",
+        :predefined_constant => "\e[1;36m",
+        :predefined_type => "\e[1;32m",
+        :preprocessor => "\e[1;36m",
+        :pseudo_class => "\e[1;34m",
         :regexp => {
-          :self => '31',
-          :content => '31',
-          :delimiter => '1;29',
-          :modifier => '35',
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m",
+          :modifier => "\e[35m",
+          :char => "\e[1;35m",
         },
-        :reserved => '1;31',
+        :reserved => "\e[32m",
         :shell => {
-          :self => '42',
-          :content => '1;29',
-          :delimiter => '37',
+          :self => "\e[33m",
+          :char => "\e[1;33m",
+          :delimiter => "\e[1;33m",
+          :escape => "\e[1;33m",
         },
         :string => {
-          :self => '32',
-          :modifier => '1;32',
-          :escape => '1;36',
-          :delimiter => '1;32',
-          :char => '1;36',
+          :self => "\e[31m",
+          :modifier => "\e[1;31m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;31m",
+          :escape => "\e[1;31m",
+        },
+        :symbol => {
+          :self => "\e[33m",
+          :delimiter => "\e[1;33m",
         },
-        :symbol => '1;32',
-        :tag => '1;34',
-        :type => '1;34',
-        :value => '36',
-        :variable => '1;34',
+        :tag => "\e[32m",
+        :type => "\e[1;34m",
+        :value => "\e[36m",
+        :variable => "\e[34m",
         
-        :insert => '42',
-        :delete => '41',
-        :change => '44',
-        :head => '45'
+        :insert => {
+          :self => "\e[42m",
+          :insert => "\e[1;32;42m",
+          :eyecatcher => "\e[102m",
+        },
+        :delete => {
+          :self => "\e[41m",
+          :delete => "\e[1;31;41m",
+          :eyecatcher => "\e[101m",
+        },
+        :change => {
+          :self => "\e[44m",
+          :change => "\e[37;44m",
+        },
+        :head => {
+          :self => "\e[45m",
+          :filename => "\e[37;45m"
+        },
       }
+      
       TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
       TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
-      TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
-      TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] =
-        TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
+      TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
       
     protected
       
       def setup(options)
         super
         @opened = []
-        @subcolors = nil
+        @color_scopes = [TOKEN_COLORS]
       end
       
     public
       
       def text_token text, kind
-        if color = (@subcolors || TOKEN_COLORS)[kind]
-          if Hash === color
-            if color[:self]
-              color = color[:self]
-            else
-              @out << text
-              return
-            end
-          end
+        if color = @color_scopes.last[kind]
+          color = color[:self] if color.is_a? Hash
           
-          @out << ansi_colorize(color)
-          @out << text.gsub("\n", ansi_clear + "\n" + ansi_colorize(color))
-          @out << ansi_clear
-          @out << ansi_colorize(@subcolors[:self]) if @subcolors && @subcolors[:self]
+          @out << color
+          @out << (text.index("\n") ? text.gsub("\n", "\e[0m\n" + color) : text)
+          @out << "\e[0m"
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         else
           @out << text
         end
@@ -130,50 +160,36 @@ module CodeRay
       alias begin_line begin_group
       
       def end_group kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
-          @out << ansi_clear
-          @out << open_token(@opened.last)
+        if @opened.pop
+          @color_scopes.pop
+          @out << "\e[0m"
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         end
       end
       
       def end_line kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
-          # whole lines to be highlighted,
-          # eg. added/modified/deleted lines in a diff
-          @out << "\t" * 100 + ansi_clear
-          @out << open_token(@opened.last)
-        end
+        @out << (@line_filler ||= "\t" * 100)
+        end_group kind
       end
       
     private
       
       def open_token kind
-        if color = TOKEN_COLORS[kind]
-          if Hash === color
-            @subcolors = color
-            ansi_colorize(color[:self]) if color[:self]
+        if color = @color_scopes.last[kind]
+          if color.is_a? Hash
+            @color_scopes << color
+            color[:self]
           else
-            @subcolors = {}
-            ansi_colorize(color)
+            @color_scopes << @color_scopes.last
+            color
           end
         else
-          @subcolors = nil
+          @color_scopes << @color_scopes.last
           ''
         end
       end
-      
-      def ansi_colorize(color)
-        Array(color).map { |c| "\e[#{c}m" }.join
-      end
-      def ansi_clear
-        ansi_colorize(0)
-      end
     end
   end
-end
-\ No newline at end of file
+end
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index f52f17e..6d4fa92 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -77,54 +77,57 @@ module CodeRay
     end
     
     TypeFromExt = {
-      'c'        => :c,
-      'cfc'      => :xml,
-      'cfm'      => :xml,
-      'clj'      => :clojure,
-      'css'      => :css,
-      'diff'     => :diff,
-      'dpr'      => :delphi,
-      'erb'      => :erb,
-      'gemspec'  => :ruby,
-      'groovy'   => :groovy,
-      'gvy'      => :groovy,
-      'h'        => :c,
-      'haml'     => :haml,
-      'htm'      => :html,
-      'html'     => :html,
-      'html.erb' => :erb,
-      'java'     => :java,
-      'js'       => :java_script,
-      'json'     => :json,
-      'mab'      => :ruby,
-      'pas'      => :delphi,
-      'patch'    => :diff,
-      'phtml'    => :php,
-      'php'      => :php,
-      'php3'     => :php,
-      'php4'     => :php,
-      'php5'     => :php,
-      'prawn'    => :ruby,
-      'py'       => :python,
-      'py3'      => :python,
-      'pyw'      => :python,
-      'rake'     => :ruby,
-      'raydebug' => :raydebug,
-      'rb'       => :ruby,
-      'rbw'      => :ruby,
-      'rhtml'    => :erb,
-      'rjs'      => :ruby,
-      'rpdf'     => :ruby,
-      'ru'       => :ruby,
-      'rxml'     => :ruby,
-      # 'sch'      => :scheme,
-      'sql'      => :sql,
-      # 'ss'       => :scheme,
-      'tmproj'   => :xml,
-      'xhtml'    => :html,
-      'xml'      => :xml,
-      'yaml'     => :yaml,
-      'yml'      => :yaml,
+      'c'         => :c,
+      'cfc'       => :xml,
+      'cfm'       => :xml,
+      'clj'       => :clojure,
+      'css'       => :css,
+      'diff'      => :diff,
+      'dpr'       => :delphi,
+      'erb'       => :erb,
+      'gemspec'   => :ruby,
+      'groovy'    => :groovy,
+      'gvy'       => :groovy,
+      'h'         => :c,
+      'haml'      => :haml,
+      'htm'       => :html,
+      'html'      => :html,
+      'html.erb'  => :erb,
+      'java'      => :java,
+      'js'        => :java_script,
+      'json'      => :json,
+      'lua'       => :lua,
+      'mab'       => :ruby,
+      'pas'       => :delphi,
+      'patch'     => :diff,
+      'phtml'     => :php,
+      'php'       => :php,
+      'php3'      => :php,
+      'php4'      => :php,
+      'php5'      => :php,
+      'prawn'     => :ruby,
+      'py'        => :python,
+      'py3'       => :python,
+      'pyw'       => :python,
+      'rake'      => :ruby,
+      'raydebug'  => :raydebug,
+      'rb'        => :ruby,
+      'rbw'       => :ruby,
+      'rhtml'     => :erb,
+      'rjs'       => :ruby,
+      'rpdf'      => :ruby,
+      'ru'        => :ruby,
+      'rxml'      => :ruby,
+      'sass'      => :sass,
+      'sql'       => :sql,
+      'taskpaper' => :taskpaper,
+      'template'  => :json,  # AWS CloudFormation template
+      'tmproj'    => :xml,
+      'xaml'      => :xml,
+      'xhtml'     => :html,
+      'xml'       => :xml,
+      'yaml'      => :yaml,
+      'yml'       => :yaml,
     }
     for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
       TypeFromExt[cpp_alias] = :cpp
diff --git a/lib/coderay/helpers/gzip.rb b/lib/coderay/helpers/gzip.rb
deleted file mode 100644
index 245014a..0000000
--- a/lib/coderay/helpers/gzip.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-module CodeRay
-  
-  # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
-  module GZip
-    
-    require 'zlib'
-    
-    # The default zipping level. 7 zips good and fast.
-    DEFAULT_GZIP_LEVEL = 7
-    
-    # Unzips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   print GZip.gunzip(File.read('adresses.gz'))
-    def GZip.gunzip s
-      Zlib::Inflate.inflate s
-    end
-    
-    # Zips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   File.open('adresses.gz', 'w') do |file
-    #     file.write GZip.gzip('Mum: 0123 456 789', 9)
-    #   end
-    #
-    # If you provide a +level+, you can control how strong
-    # the string is compressed:
-    # - 0: no compression, only convert to gzip format
-    # - 1: compress fast
-    # - 7: compress more, but still fast (default)
-    # - 8: compress more, slower
-    # - 9: compress best, very slow
-    def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
-      Zlib::Deflate.new(level).deflate s, Zlib::FINISH
-    end
-    
-  end
-  
-end
diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb
index 137c1ab..d14c5a9 100644
--- a/lib/coderay/helpers/plugin.rb
+++ b/lib/coderay/helpers/plugin.rb
@@ -131,7 +131,7 @@ module CodeRay
     
     # A Hash of plugion_id => Plugin pairs.
     def plugin_hash
-      @plugin_hash ||= make_plugin_hash
+      @plugin_hash ||= (@plugin_hash = make_plugin_hash).tap { load_plugin_map }
     end
     
     # Returns an array of all .rb files in the plugin path.
@@ -158,7 +158,6 @@ module CodeRay
     # This is done automatically when plugin_path is called.
     def load_plugin_map
       mapfile = path_to '_map'
-      @plugin_map_loaded = true
       if File.exist? mapfile
         require mapfile
         true
@@ -171,23 +170,16 @@ module CodeRay
     
     # Return a plugin hash that automatically loads plugins.
     def make_plugin_hash
-      @plugin_map_loaded ||= false
       Hash.new do |h, plugin_id|
         id = validate_id(plugin_id)
         path = path_to id
         begin
           require path
         rescue LoadError => boom
-          if @plugin_map_loaded
-            if h.has_key?(:default)
-              warn '%p could not load plugin %p; falling back to %p' % [self, id, h[:default]]
-              h[:default]
-            else
-              raise PluginNotFound, '%p could not load plugin %p: %s' % [self, id, boom]
-            end
+          if h.has_key?(:default)
+            h[:default]
           else
-            load_plugin_map
-            h[plugin_id]
+            raise PluginNotFound, '%p could not load plugin %p: %s' % [self, id, boom]
           end
         else
           # Plugin should have registered by now
@@ -271,7 +263,6 @@ module CodeRay
     end
     
     def aliases
-      plugin_host.load_plugin_map
       plugin_host.plugin_hash.inject [] do |aliases, (key, _)|
         aliases << key if plugin_host[key] == self
         aliases
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index 907cf00..b3f7e17 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -182,16 +182,9 @@ module CodeRay
       # Scan the code and returns all tokens in a Tokens object.
       def tokenize source = nil, options = {}
         options = @options.merge(options)
-        @tokens = options[:tokens] || @tokens || Tokens.new
-        @tokens.scanner = self if @tokens.respond_to? :scanner=
-        case source
-        when Array
-          self.string = self.class.normalize(source.join)
-        when nil
-          reset
-        else
-          self.string = self.class.normalize(source)
-        end
+        
+        set_tokens_from_options options
+        set_string_from_source source
         
         begin
           scan_tokens @tokens, options
@@ -261,6 +254,22 @@ module CodeRay
       def setup  # :doc:
       end
       
+      def set_string_from_source source
+        case source
+        when Array
+          self.string = self.class.normalize(source.join)
+        when nil
+          reset
+        else
+          self.string = self.class.normalize(source)
+        end
+      end
+      
+      def set_tokens_from_options options
+        @tokens = options[:tokens] || @tokens || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
+      end
+      
       # This is the central method, and commonly the only one a
       # subclass implements.
       #
@@ -277,19 +286,15 @@ module CodeRay
         @binary_string = nil if defined? @binary_string
       end
       
-      # Scanner error with additional status information
-      def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
-        raise ScanError, <<-EOE % [
+      SCAN_ERROR_MESSAGE = <<-MESSAGE
 
 
-***ERROR in %s: %s (after %d tokens)
+***ERROR in %s: %s (after %s tokens)
 
 tokens:
 %s
 
-current line: %d  column: %d  pos: %d
-matched: %p  state: %p
-bol? = %p,  eos? = %p
+%s
 
 surrounding code:
 %p  ~~  %p
@@ -297,16 +302,43 @@ surrounding code:
 
 ***ERROR***
 
-        EOE
-          File.basename(caller[0]),
-          msg,
-          tokens.respond_to?(:size) ? tokens.size : 0,
-          tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
-          line, column, pos,
-          matched, state, bol?, eos?,
+      MESSAGE
+      
+      def raise_inspect_arguments message, tokens, state, ambit
+        return File.basename(caller[0]),
+          message,
+          tokens_size(tokens),
+          tokens_last(tokens, 10).map(&:inspect).join("\n"),
+          scanner_state_info(state),
           binary_string[pos - ambit, ambit],
-          binary_string[pos, ambit],
-        ], backtrace
+          binary_string[pos, ambit]
+      end
+      
+      SCANNER_STATE_INFO = <<-INFO
+current line: %d  column: %d  pos: %d
+matched: %p  state: %p
+bol?: %p,  eos?: %p
+      INFO
+      
+      def scanner_state_info state
+        SCANNER_STATE_INFO % [
+          line, column, pos,
+          matched, state || 'No state given!',
+          bol?, eos?,
+        ]
+      end
+      
+      # Scanner error with additional status information
+      def raise_inspect message, tokens, state = self.state, ambit = 30, backtrace = caller
+        raise ScanError, SCAN_ERROR_MESSAGE % raise_inspect_arguments(message, tokens, state, ambit), backtrace
+      end
+      
+      def tokens_size tokens
+        tokens.size if tokens.respond_to?(:size)
+      end
+      
+      def tokens_last tokens, n
+        tokens.respond_to?(:last) ? tokens.last(n) : []
       end
       
       # Shorthand for scan_until(/\z/).
diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb
index 7b731ef..732f9c5 100644
--- a/lib/coderay/scanners/css.rb
+++ b/lib/coderay/scanners/css.rb
@@ -7,27 +7,25 @@ module Scanners
     
     KINDS_NOT_LOC = [
       :comment,
-      :class, :pseudo_class, :type,
-      :constant, :directive,
+      :class, :pseudo_class, :tag,
+      :id, :directive,
       :key, :value, :operator, :color, :float, :string,
-      :error, :important,
+      :error, :important, :type,
     ]  # :nodoc:
     
     module RE  # :nodoc:
       Hex = /[0-9a-fA-F]/
-      Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too
-      Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/
-      NMChar = /[-_a-zA-Z0-9]|#{Escape}/
-      NMStart = /[_a-zA-Z]|#{Escape}/
-      NL = /\r\n|\r|\n|\f/
-      String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"?/  # TODO: buggy regexp
-      String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'?/  # TODO: buggy regexp
+      Unicode = /\\#{Hex}{1,6}\b/ # differs from standard because it allows uppercase hex too
+      Escape = /#{Unicode}|\\[^\n0-9a-fA-F]/
+      NMChar = /[-_a-zA-Z0-9]/
+      NMStart = /[_a-zA-Z]/
+      String1 = /"(?:[^\n\\"]+|\\\n|#{Escape})*"?/  # TODO: buggy regexp
+      String2 = /'(?:[^\n\\']+|\\\n|#{Escape})*'?/  # TODO: buggy regexp
       String = /#{String1}|#{String2}/
       
       HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
-      Color = /#{HexColor}/
       
-      Num = /-?(?:[0-9]+|[0-9]*\.[0-9]+)/
+      Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)/
       Name = /#{NMChar}+/
       Ident = /-?#{NMStart}#{NMChar}*/
       AtKeyword = /@#{Ident}/
@@ -35,16 +33,15 @@ module Scanners
       
       reldimensions = %w[em ex px]
       absdimensions = %w[in cm mm pt pc]
-      Unit = Regexp.union(*(reldimensions + absdimensions + %w[s]))
+      Unit = Regexp.union(*(reldimensions + absdimensions + %w[s dpi dppx deg]))
       
       Dimension = /#{Num}#{Unit}/
       
-      Comment = %r! /\* (?: .*? \*/ | .* ) !mx
-      Function = /(?:url|alpha|attr|counters?)\((?:[^)\n\r\f]|\\\))*\)?/
+      Function = /(?:url|alpha|attr|counters?)\((?:[^)\n]|\\\))*\)?/
       
-      Id = /##{Name}/
+      Id = /(?!#{HexColor}\b(?!-))##{Name}/
       Class = /\.#{Name}/
-      PseudoClass = /:#{Name}/
+      PseudoClass = /::?#{Ident}/
       AttributeSelector = /\[[^\]]*\]?/
     end
     
@@ -52,7 +49,7 @@ module Scanners
     
     def setup
       @state = :initial
-      @value_expected = nil
+      @value_expected = false
     end
     
     def scan_tokens encoder, options
@@ -67,13 +64,13 @@ module Scanners
         elsif case states.last
           when :initial, :media
             if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox)
-              encoder.text_token match, :type
+              encoder.text_token match, :tag
               next
             elsif match = scan(RE::Class)
               encoder.text_token match, :class
               next
             elsif match = scan(RE::Id)
-              encoder.text_token match, :constant
+              encoder.text_token match, :id
               next
             elsif match = scan(RE::PseudoClass)
               encoder.text_token match, :pseudo_class
@@ -158,7 +155,7 @@ module Scanners
         elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
           encoder.text_token match, :float
           
-        elsif match = scan(/#{RE::Color}/o)
+        elsif match = scan(/#{RE::HexColor}/o)
           encoder.text_token match, :color
           
         elsif match = scan(/! *important/)
@@ -170,7 +167,7 @@ module Scanners
         elsif match = scan(RE::AtKeyword)
           encoder.text_token match, :directive
           
-        elsif match = scan(/ [+>:;,.=()\/] /x)
+        elsif match = scan(/ [+>~:;,.=()\/] /x)
           if match == ':'
             value_expected = true
           elsif match == ';'
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index 38efaf4..af0f755 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -45,7 +45,7 @@ module Scanners
           if match = scan(/--- |\+\+\+ |=+|_+/)
             encoder.begin_line line_kind = :head
             encoder.text_token match, :head
-            if match = scan(/.*?(?=$|[\t\n\x00]|  \(revision)/)
+            if match = scan(/[^\x00\n]+?(?=$|[\t\n]|  \(revision)/)
               encoder.text_token match, :filename
               if options[:highlight_code] && match != '/dev/null'
                 file_type = CodeRay::FileType.fetch(match, :text)
diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb
index 92e3dfa..9eb0a0a 100644
--- a/lib/coderay/scanners/java_script.rb
+++ b/lib/coderay/scanners/java_script.rb
@@ -54,10 +54,17 @@ module Scanners
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     def scan_tokens encoder, options
       
-      state = :initial
-      string_delimiter = nil
+      state, string_delimiter = options[:state] || @state
+      if string_delimiter
+        encoder.begin_group state
+      end
+      
       value_expected = true
       key_expected = false
       function_expected = false
@@ -72,9 +79,10 @@ module Scanners
             value_expected = true if !value_expected && match.index(?\n)
             encoder.text_token match, :space
             
-          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
             value_expected = true
             encoder.text_token match, :comment
+            state = :open_multi_line_comment if self[1]
             
           elsif check(/\.?\d/)
             key_expected = value_expected = false
@@ -176,19 +184,35 @@ module Scanners
           elsif match = scan(/ \\ | $ /x)
             encoder.end_group state
             encoder.text_token match, :error unless match.empty?
+            string_delimiter = nil
             key_expected = value_expected = false
             state = :initial
           else
             raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
           end
           
+        when :open_multi_line_comment
+          if match = scan(%r! .*? \*/ !mx)
+            state = :initial
+          else
+            match = scan(%r! .+ !mx)
+          end
+          value_expected = true
+          encoder.text_token match, :comment if match
+          
         else
-          raise_inspect 'Unknown state', encoder
+          #:nocov:
+          raise_inspect 'Unknown state: %p' % [state], encoder
+          #:nocov:
           
         end
         
       end
       
+      if options[:keep_state]
+        @state = state, string_delimiter
+      end
+      
       if [:string, :regexp].include? state
         encoder.end_group state
       end
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index 4e0f462..3754a9b 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -14,15 +14,17 @@ module Scanners
     
     ESCAPE = / [bfnrt\\"\/] /x  # :nodoc:
     UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x  # :nodoc:
+    KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     # See http://json.org/ for a definition of the JSON lexic/grammar.
     def scan_tokens encoder, options
-      
-      state = :initial
-      stack = []
-      key_expected = false
+      state = options[:state] || @state
       
       until eos?
         
@@ -32,18 +34,11 @@ module Scanners
           if match = scan(/ \s+ /x)
             encoder.text_token match, :space
           elsif match = scan(/"/)
-            state = key_expected ? :key : :string
+            state = check(/#{KEY}/o) ? :key : :string
             encoder.begin_group state
             encoder.text_token match, :delimiter
           elsif match = scan(/ [:,\[{\]}] /x)
             encoder.text_token match, :operator
-            case match
-            when ':' then key_expected = false
-            when ',' then key_expected = true if stack.last == :object
-            when '{' then stack << :object; key_expected = true
-            when '[' then stack << :array
-            when '}', ']' then stack.pop  # no error recovery, but works for valid JSON
-            end
           elsif match = scan(/ true | false | null /x)
             encoder.text_token match, :value
           elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
@@ -82,6 +77,10 @@ module Scanners
         end
       end
       
+      if options[:keep_state]
+        @state = state
+      end
+      
       if [:string, :key].include? state
         encoder.end_group state
       end
diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb
new file mode 100644
index 0000000..25bebbe
--- /dev/null
+++ b/lib/coderay/scanners/lua.rb
@@ -0,0 +1,275 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+  # Scanner for the Lua[http://lua.org] programming lanuage.
+  #
+  # The language’s complete syntax is defined in
+  # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+  # which is what this scanner tries to conform to.
+  class Lua < Scanner
+    
+    register_for :lua
+    file_extension 'lua'
+    title 'Lua'
+    
+    # Keywords used in Lua.
+    KEYWORDS = %w[and break do else elseif end
+      for function goto if in
+      local not or repeat return
+      then until while
+    ]
+    
+    # Constants set by the Lua core.
+    PREDEFINED_CONSTANTS = %w[false true nil]
+    
+    # The expressions contained in this array are parts of Lua’s `basic'
+    # library. Although it’s not entirely necessary to load that library,
+    # it is highly recommended and one would have to provide own implementations
+    # of some of these expressions if one does not do so. They however aren’t
+    # keywords, neither are they constants, but nearly predefined, so they
+    # get tagged as `predefined' rather than anything else.
+    #
+    # This list excludes values of form `_UPPERCASE' because the Lua manual
+    # requires such identifiers to be reserved by Lua anyway and they are
+    # highlighted directly accordingly, without the need for specific
+    # identifiers to be listed here.
+    PREDEFINED_EXPRESSIONS = %w[
+      assert collectgarbage dofile error getmetatable
+      ipairs load loadfile next pairs pcall print
+      rawequal rawget rawlen rawset select setmetatable
+      tonumber tostring type xpcall
+    ]
+    
+    # Automatic token kind selection for normal words.
+    IDENT_KIND = CodeRay::WordList.new(:ident).
+      add(KEYWORDS, :keyword).
+      add(PREDEFINED_CONSTANTS, :predefined_constant).
+      add(PREDEFINED_EXPRESSIONS, :predefined)
+    
+    protected
+    
+    # Scanner initialization.
+    def setup
+      @state = :initial
+      @brace_depth = 0
+    end
+    
+    # CodeRay entry hook. Starts parsing.
+    def scan_tokens(encoder, options)
+      state = options[:state] || @state
+      
+      until eos?
+        case state
+        
+        when :initial
+          if match = scan(/\-\-\[\=*\[/)   #--[[ long (possibly multiline) comment ]]
+            @num_equals = match.count("=") # Number must match for comment end
+            encoder.begin_group(:comment)
+            encoder.text_token(match, :delimiter)
+            state = :long_comment
+          
+          elsif match = scan(/--.*$/) # --Lua comment
+            encoder.text_token(match, :comment)
+          
+          elsif match = scan(/\[=*\[/)     # [[ long (possibly multiline) string ]]
+            @num_equals = match.count("=") # Number must match for comment end
+            encoder.begin_group(:string)
+            encoder.text_token(match, :delimiter)
+            state = :long_string
+          
+          elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
+            encoder.text_token(match, :label)
+          
+          elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
+            encoder.text_token(match, :predefined)
+          
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
+            kind = IDENT_KIND[match]
+            
+            # Extra highlighting for entities following certain keywords
+            if kind == :keyword and match == "function"
+              state = :function_expected
+            elsif kind == :keyword and match == "goto"
+              state = :goto_label_expected
+            elsif kind == :keyword and match == "local"
+              state = :local_var_expected
+            end
+            
+            encoder.text_token(match, kind)
+          
+          elsif match = scan(/\{/) # Opening table brace {
+            encoder.begin_group(:map)
+            encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter)
+            @brace_depth += 1
+            state        = :map
+          
+          elsif match = scan(/\}/) # Closing table brace }
+            if @brace_depth == 1
+              @brace_depth = 0
+              encoder.text_token(match, :delimiter)
+              encoder.end_group(:map)
+            elsif @brace_depth == 0 # Mismatched brace
+              encoder.text_token(match, :error)
+            else
+              @brace_depth -= 1
+              encoder.text_token(match, :inline_delimiter)
+              encoder.end_group(:map)
+              state = :map
+            end
+          
+          elsif match = scan(/["']/) # String delimiters " and '
+            encoder.begin_group(:string)
+            encoder.text_token(match, :delimiter)
+            @start_delim = match
+            state       = :string
+          
+                            # ↓Prefix                hex number ←|→ decimal number
+          elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+            encoder.text_token(match, :float)
+          
+                            # ↓Prefix         hex number ←|→ decimal number
+          elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+            encoder.text_token(match, :integer)
+          
+          elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
+            encoder.text_token(match, :operator)
+          
+          elsif match = scan(/\s+/) # Space
+            encoder.text_token(match, :space)
+          
+          else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
+            encoder.text_token(getch, :error)
+          end
+          
+          # It may be that we’re scanning a full-blown subexpression of a table
+          # (tables can contain full expressions in parts).
+          # If this is the case, return to :map scanning state.
+          state = :map if state == :initial && @brace_depth >= 1
+        
+        when :function_expected
+          if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
+            encoder.text_token(match, :operator)
+            state = :initial
+          elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+            encoder.text_token(match, :ident)
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
+            encoder.text_token(match, :function)
+            state = :initial
+          elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+            state = :initial
+          end
+        
+        when :goto_label_expected
+          if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+            encoder.text_token(match, :label)
+            state = :initial
+          elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :local_var_expected
+          if match = scan(/function/) # local function ...
+            encoder.text_token(match, :keyword)
+            state = :function_expected
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+            encoder.text_token(match, :local_variable)
+          elsif match = scan(/,/)
+            encoder.text_token(match, :operator)
+          elsif match = scan(/\=/)
+            encoder.text_token(match, :operator)
+            # After encountering the equal sign, arbitrary expressions are
+            # allowed again, so just return to the main state for further
+            # parsing.
+            state = :initial
+          elsif match = scan(/\n/)
+            encoder.text_token(match, :space)
+            state = :initial
+          elsif match = scan(/\s+/)
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :long_comment
+          if match = scan(/.*?(?=\]={#@num_equals}\])/m)
+            encoder.text_token(match, :content)
+            
+            delim = scan(/\]={#@num_equals}\]/)
+            encoder.text_token(delim, :delimiter)
+          else # No terminator found till EOF
+            encoder.text_token(rest, :error)
+            terminate
+          end
+          encoder.end_group(:comment)
+          state = :initial
+        
+        when :long_string
+          if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences
+            encoder.text_token(match, :content)
+            
+            delim = scan(/\]={#@num_equals}\]/)
+            encoder.text_token(delim, :delimiter)
+          else # No terminator found till EOF
+            encoder.text_token(rest, :error)
+            terminate
+          end
+          encoder.end_group(:string)
+          state = :initial
+        
+        when :string
+          if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+            encoder.text_token(match, :content)
+          elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
+            encoder.text_token(match, :char)
+          elsif match = scan(Regexp.compile(@start_delim))
+            encoder.text_token(match, :delimiter)
+            encoder.end_group(:string)
+            state = :initial
+          elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
+            encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+            encoder.end_group(:string)
+            state = :initial
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :map
+          if match = scan(/[,;]/)
+            encoder.text_token(match, :operator)
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
+            encoder.text_token(match, :key)
+            encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
+            encoder.text_token(scan(/\=/), :operator)
+            state = :initial
+          elsif match = scan(/\s+/m)
+            encoder.text_token(match, :space)
+          else
+            # Note this clause doesn’t advance the scan pointer, it’s a kind of
+            # "retry with other options" (the :initial state then of course
+            # advances the pointer).
+            state = :initial
+          end
+        else
+          raise
+        end
+        
+      end
+      
+      if options[:keep_state]
+        @state = state
+      end
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index a9492ab..09c8b6e 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -157,12 +157,12 @@ module Scanners
             encoder.text_token match, :operator
           
           elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
+            modifiers = self[1]
             string_delimiter = self[2]
-            string_type = docstring_coming ? :docstring : :string
+            string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string)
             docstring_coming = false if docstring_coming
             encoder.begin_group string_type
             string_raw = false
-            modifiers = self[1]
             unless modifiers.empty?
               string_raw = !!modifiers.index(?r)
               encoder.text_token modifiers, :modifier
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index c5cf1e2..c282f31 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -96,7 +96,7 @@ module Scanners
                                       /#{patterns::METHOD_NAME}/o)
               
               kind = patterns::IDENT_KIND[match]
-              if kind == :ident && value_expected != :colon_expected && scan(/:(?!:)/)
+              if value_expected != :colon_expected && scan(/:(?!:)/)
                 value_expected = true
                 encoder.text_token match, :key
                 encoder.text_token ':',   :operator
diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb
new file mode 100644
index 0000000..167051d
--- /dev/null
+++ b/lib/coderay/scanners/sass.rb
@@ -0,0 +1,227 @@
+module CodeRay
+module Scanners
+  
+  # A scanner for Sass.
+  class Sass < CSS
+    
+    register_for :sass
+    file_extension 'sass'
+    
+    STRING_CONTENT_PATTERN = {
+      "'" => /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/,
+      '"' => /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/,
+    }
+    
+  protected
+    
+    def setup
+      @state = :initial
+    end
+    
+    def scan_tokens encoder, options
+      states = Array(options[:state] || @state)
+      string_delimiter = nil
+      
+      until eos?
+        
+        if bol? && (match = scan(/(?>( +)?(\/[\*\/])(.+)?)(?=\n)/))
+          encoder.text_token self[1], :space if self[1]
+          encoder.begin_group :comment
+          encoder.text_token self[2], :delimiter
+          encoder.text_token self[3], :content if self[3]
+          if match = scan(/(?:\n+#{self[1]} .*)+/)
+            encoder.text_token match, :content
+          end
+          encoder.end_group :comment
+        elsif match = scan(/\n|[^\n\S]+\n?/)
+          encoder.text_token match, :space
+          if match.index(/\n/)
+            value_expected = false
+            states.pop if states.last == :include
+          end
+        
+        elsif states.last == :sass_inline && (match = scan(/\}/))
+          encoder.text_token match, :inline_delimiter
+          encoder.end_group :inline
+          states.pop
+        
+        elsif case states.last
+          when :initial, :media, :sass_inline
+            if match = scan(/(?>#{RE::Ident})(?!\()/ox)
+              encoder.text_token match, value_expected ? :value : (check(/.*:/) ? :key : :tag)
+              next
+            elsif !value_expected && (match = scan(/\*/))
+              encoder.text_token match, :tag
+              next
+            elsif match = scan(RE::Class)
+              encoder.text_token match, :class
+              next
+            elsif match = scan(RE::Id)
+              encoder.text_token match, :id
+              next
+            elsif match = scan(RE::PseudoClass)
+              encoder.text_token match, :pseudo_class
+              next
+            elsif match = scan(RE::AttributeSelector)
+              # TODO: Improve highlighting inside of attribute selectors.
+              encoder.text_token match[0,1], :operator
+              encoder.text_token match[1..-2], :attribute_name if match.size > 2
+              encoder.text_token match[-1,1], :operator if match[-1] == ?]
+              next
+            elsif match = scan(/(\=|@mixin +)#{RE::Ident}/o)
+              encoder.text_token match, :function
+              next
+            elsif match = scan(/@import\b/)
+              encoder.text_token match, :directive
+              states << :include
+              next
+            elsif match = scan(/@media\b/)
+              encoder.text_token match, :directive
+              # states.push :media_before_name
+              next
+            end
+          
+          when :block
+            if match = scan(/(?>#{RE::Ident})(?!\()/ox)
+              if value_expected
+                encoder.text_token match, :value
+              else
+                encoder.text_token match, :key
+              end
+              next
+            end
+            
+          when :string
+            if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+              encoder.text_token match, :content
+            elsif match = scan(/['"]/)
+              encoder.text_token match, :delimiter
+              encoder.end_group :string
+              string_delimiter = nil
+              states.pop
+            elsif match = scan(/#\{/)
+              encoder.begin_group :inline
+              encoder.text_token match, :inline_delimiter
+              states.push :sass_inline
+            elsif match = scan(/ \\ | $ /x)
+              encoder.end_group :string
+              encoder.text_token match, :error unless match.empty?
+              states.pop
+            else
+              raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
+            end
+          
+          when :include
+            if match = scan(/[^\s'",]+/)
+              encoder.text_token match, :include
+              next
+            end
+          
+          else
+            #:nocov:
+            raise_inspect 'Unknown state', encoder
+            #:nocov:
+            
+          end
+          
+        elsif match = scan(/\$#{RE::Ident}/o)
+          encoder.text_token match, :variable
+          next
+        
+        elsif match = scan(/&/)
+          encoder.text_token match, :local_variable
+          
+        elsif match = scan(/\+#{RE::Ident}/o)
+          encoder.text_token match, :include
+          value_expected = true
+          
+        elsif match = scan(/\/\*(?:.*?\*\/|.*)|\/\/.*/)
+          encoder.text_token match, :comment
+          
+        elsif match = scan(/#\{/)
+          encoder.begin_group :inline
+          encoder.text_token match, :inline_delimiter
+          states.push :sass_inline
+          
+        elsif match = scan(/\{/)
+          value_expected = false
+          encoder.text_token match, :operator
+          states.push :block
+          
+        elsif match = scan(/\}/)
+          value_expected = false
+          encoder.text_token match, :operator
+          if states.last == :block || states.last == :media
+            states.pop
+          end
+          
+        elsif match = scan(/['"]/)
+          encoder.begin_group :string
+          string_delimiter = match
+          encoder.text_token match, :delimiter
+          if states.include? :sass_inline
+            content = scan_until(/(?=#{string_delimiter}|\}|\z)/)
+            encoder.text_token content, :content unless content.empty?
+            encoder.text_token string_delimiter, :delimiter if scan(/#{string_delimiter}/)
+            encoder.end_group :string
+          else
+            states.push :string
+          end
+          
+        elsif match = scan(/#{RE::Function}/o)
+          encoder.begin_group :function
+          start = match[/^[-\w]+\(/]
+          encoder.text_token start, :delimiter
+          if match[-1] == ?)
+            encoder.text_token match[start.size..-2], :content
+            encoder.text_token ')', :delimiter
+          else
+            encoder.text_token match[start.size..-1], :content
+          end
+          encoder.end_group :function
+          
+        elsif match = scan(/[a-z][-a-z_]*(?=\()/o)
+          encoder.text_token match, :predefined
+          
+        elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
+          encoder.text_token match, :float
+          
+        elsif match = scan(/#{RE::HexColor}/o)
+          encoder.text_token match, :color
+          
+        elsif match = scan(/! *(?:important|optional)/)
+          encoder.text_token match, :important
+          
+        elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
+          encoder.text_token match, :color
+          
+        elsif match = scan(/@else if\b|#{RE::AtKeyword}/)
+          encoder.text_token match, :directive
+          value_expected = true
+          
+        elsif match = scan(/ == | != | [-+*\/>~:;,.=()] /x)
+          if match == ':'
+            value_expected = true
+          elsif match == ';'
+            value_expected = false
+          end
+          encoder.text_token match, :operator
+          
+        else
+          encoder.text_token getch, :error
+          
+        end
+        
+      end
+      
+      if options[:keep_state]
+        @state = states
+      end
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/scanners/taskpaper.rb b/lib/coderay/scanners/taskpaper.rb
new file mode 100644
index 0000000..42670bc
--- /dev/null
+++ b/lib/coderay/scanners/taskpaper.rb
@@ -0,0 +1,36 @@
+module CodeRay
+module Scanners
+  
+  class Taskpaper < Scanner
+    
+    register_for :taskpaper
+    file_extension 'taskpaper'
+    
+  protected
+    
+    def scan_tokens encoder, options
+      until eos?
+        if match = scan(/\S.*:.*$/)                  # project
+          encoder.text_token(match, :namespace)
+        elsif match = scan(/-.+@done.*/)             # completed task
+          encoder.text_token(match, :done)
+        elsif match = scan(/-(?:[^@\n]+|@(?!due))*/) # task
+          encoder.text_token(match, :plain)
+        elsif match = scan(/@due.*/)                 # comment
+          encoder.text_token(match, :important)
+        elsif match = scan(/.+/)                     # comment
+          encoder.text_token(match, :comment)
+        elsif match = scan(/\s+/)                    # space
+          encoder.text_token(match, :space)
+        else                                         # other
+          encoder.text_token getch, :error
+        end
+      end
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb
index 8506d10..a869d9e 100644
--- a/lib/coderay/styles/alpha.rb
+++ b/lib/coderay/styles/alpha.rb
@@ -39,6 +39,9 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
   color: gray !important;
   text-decoration: none !important;
 }
+.CodeRay .line-numbers pre {
+  word-break: normal;
+}
 .CodeRay .line-numbers a:target { color: blue !important; }
 .CodeRay .line-numbers .highlighted { color: red !important; }
 .CodeRay .line-numbers .highlighted a { color: red !important; }
@@ -53,45 +56,52 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .annotation { color:#007 }
 .attribute-name { color:#b48 }
 .attribute-value { color:#700 }
-.binary { color:#509 }
+.binary { color:#549 }
+.binary .char { color:#325 }
+.binary .delimiter { color:#325 }
+.char { color:#D20 }
 .char .content { color:#D20 }
 .char .delimiter { color:#710 }
-.char { color:#D20 }
 .class { color:#B06; font-weight:bold }
 .class-variable { color:#369 }
 .color { color:#0A0 }
 .comment { color:#777 }
 .comment .char { color:#444 }
 .comment .delimiter { color:#444 }
-.complex { color:#A08 }
 .constant { color:#036; font-weight:bold }
 .decorator { color:#B0B }
 .definition { color:#099; font-weight:bold }
 .delimiter { color:black }
 .directive { color:#088; font-weight:bold }
-.doc { color:#970 }
-.doc-string { color:#D42; font-weight:bold }
+.docstring { color:#D42; }
 .doctype { color:#34b }
+.done { text-decoration: line-through; color: gray }
 .entity { color:#800; font-weight:bold }
 .error { color:#F00; background-color:#FAA }
 .escape  { color:#666 }
 .exception { color:#C00; font-weight:bold }
 .float { color:#60E }
 .function { color:#06B; font-weight:bold }
+.function .delimiter { color:#024; font-weight:bold }
 .global-variable { color:#d70 }
 .hex { color:#02b }
-.imaginary { color:#f00 }
+.id  { color:#33D; font-weight:bold }
 .include { color:#B44; font-weight:bold }
 .inline { background-color: hsla(0,0%,0%,0.07); color: black }
 .inline-delimiter { font-weight: bold; color: #666 }
 .instance-variable { color:#33B }
 .integer  { color:#00D }
+.imaginary { color:#f00 }
+.important { color:#D00 }
+.key { color: #606 }
 .key .char { color: #60f }
 .key .delimiter { color: #404 }
-.key { color: #606 }
 .keyword { color:#080; font-weight:bold }
 .label { color:#970; font-weight:bold }
 .local-variable { color:#963 }
+.map .content { color:#808 }
+.map .delimiter { color:#40A}
+.map { background-color:hsla(200,100%,50%,0.06); }
 .namespace { color:#707; font-weight:bold }
 .octal { color:#40E }
 .operator { }
@@ -100,30 +110,30 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .predefined-type { color:#0a5; font-weight:bold }
 .preprocessor { color:#579 }
 .pseudo-class { color:#00C; font-weight:bold }
+.regexp { background-color:hsla(300,100%,50%,0.06); }
 .regexp .content { color:#808 }
 .regexp .delimiter { color:#404 }
 .regexp .modifier { color:#C2C }
-.regexp { background-color:hsla(300,100%,50%,0.06); }
 .reserved { color:#080; font-weight:bold }
+.shell { background-color:hsla(120,100%,50%,0.06); }
 .shell .content { color:#2B2 }
 .shell .delimiter { color:#161 }
-.shell { background-color:hsla(120,100%,50%,0.06); }
+.string { background-color:hsla(0,100%,50%,0.05); }
 .string .char { color: #b0b }
 .string .content { color: #D20 }
 .string .delimiter { color: #710 }
 .string .modifier { color: #E40 }
-.string { background-color:hsla(0,100%,50%,0.05); }
+.symbol { color:#A60 }
 .symbol .content { color:#A60 }
 .symbol .delimiter { color:#630 }
-.symbol { color:#A60 }
-.tag { color:#070 }
+.tag { color:#070; font-weight:bold }
 .type { color:#339; font-weight:bold }
-.value { color: #088; }
-.variable  { color:#037 }
+.value { color: #088 }
+.variable { color:#037 }
 
 .insert { background: hsla(120,100%,50%,0.12) }
 .delete { background: hsla(0,100%,50%,0.12) }
-.change { color: #bbf; background: #007; }
+.change { color: #bbf; background: #007 }
 .head { color: #f8f; background: #505 }
 .head .filename { color: white; }
 
diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb
index 3b8d07e..9137a49 100755
--- a/lib/coderay/token_kinds.rb
+++ b/lib/coderay/token_kinds.rb
@@ -10,81 +10,78 @@ module CodeRay
   TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity
   
   TokenKinds.update(  # :nodoc:
-    :annotation          => 'annotation',
-    :attribute_name      => 'attribute-name',
-    :attribute_value     => 'attribute-value',
-    :binary              => 'bin',
-    :char                => 'char',
-    :class               => 'class',
-    :class_variable      => 'class-variable',
-    :color               => 'color',
-    :comment             => 'comment',
-    :complex             => 'complex',
-    :constant            => 'constant',
-    :content             => 'content',
-    :debug               => 'debug',
-    :decorator           => 'decorator',
-    :definition          => 'definition',
-    :delimiter           => 'delimiter',
-    :directive           => 'directive',
-    :doc                 => 'doc',
-    :doctype             => 'doctype',
-    :doc_string          => 'doc-string',
-    :entity              => 'entity',
-    :error               => 'error',
-    :escape              => 'escape',
-    :exception           => 'exception',
-    :filename            => 'filename',
-    :float               => 'float',
-    :function            => 'function',
-    :global_variable     => 'global-variable',
-    :hex                 => 'hex',
-    :imaginary           => 'imaginary',
-    :important           => 'important',
-    :include             => 'include',
-    :inline              => 'inline',
-    :inline_delimiter    => 'inline-delimiter',
-    :instance_variable   => 'instance-variable',
-    :integer             => 'integer',
-    :key                 => 'key',
-    :keyword             => 'keyword',
-    :label               => 'label',
-    :local_variable      => 'local-variable',
-    :modifier            => 'modifier',
-    :namespace           => 'namespace',
-    :octal               => 'octal',
-    :predefined          => 'predefined',
-    :predefined_constant => 'predefined-constant',
-    :predefined_type     => 'predefined-type',
-    :preprocessor        => 'preprocessor',
-    :pseudo_class        => 'pseudo-class',
-    :regexp              => 'regexp',
-    :reserved            => 'reserved',
-    :shell               => 'shell',
-    :string              => 'string',
-    :symbol              => 'symbol',
-    :tag                 => 'tag',
-    :type                => 'type',
-    :value               => 'value',
-    :variable            => 'variable',
+    :debug               => 'debug',              # highlight for debugging (white on blue background)
     
-    :change              => 'change',
-    :delete              => 'delete',
-    :head                => 'head',
-    :insert              => 'insert',
+    :annotation          => 'annotation',         # Groovy, Java
+    :attribute_name      => 'attribute-name',     # HTML, CSS
+    :attribute_value     => 'attribute-value',    # HTML
+    :binary              => 'binary',             # Python, Ruby
+    :char                => 'char',               # most scanners, also inside of strings
+    :class               => 'class',              # lots of scanners, for different purposes also in CSS
+    :class_variable      => 'class-variable',     # Ruby, YAML
+    :color               => 'color',              # CSS
+    :comment             => 'comment',            # most scanners
+    :constant            => 'constant',           # PHP, Ruby
+    :content             => 'content',            # inside of strings, most scanners
+    :decorator           => 'decorator',          # Python
+    :definition          => 'definition',         # CSS
+    :delimiter           => 'delimiter',          # inside strings, comments and other types
+    :directive           => 'directive',          # lots of scanners
+    :doctype             => 'doctype',            # Goorvy, HTML, Ruby, YAML
+    :docstring           => 'docstring',          # Python
+    :done                => 'done',               # Taskpaper
+    :entity              => 'entity',             # HTML
+    :error               => 'error',              # invalid token, most scanners
+    :escape              => 'escape',             # Ruby (string inline variables like #$foo, #@bar)
+    :exception           => 'exception',          # Java, PHP, Python
+    :filename            => 'filename',           # Diff
+    :float               => 'float',              # most scanners
+    :function            => 'function',           # CSS, JavaScript, PHP
+    :global_variable     => 'global-variable',    # Ruby, YAML
+    :hex                 => 'hex',                # hexadecimal number; lots of scanners
+    :id                  => 'id',                 # CSS
+    :imaginary           => 'imaginary',          # Python
+    :important           => 'important',          # CSS, Taskpaper
+    :include             => 'include',            # C, Groovy, Java, Python, Sass
+    :inline              => 'inline',             # nested code, eg. inline string evaluation; lots of scanners
+    :inline_delimiter    => 'inline-delimiter',   # used instead of :inline > :delimiter FIXME: Why use inline_delimiter?
+    :instance_variable   => 'instance-variable',  # Ruby
+    :integer             => 'integer',            # most scanners
+    :key                 => 'key',                # lots of scanners, used together with :value
+    :keyword             => 'keyword',            # reserved word that's actually implemented; most scanners
+    :label               => 'label',              # C, PHP
+    :local_variable      => 'local-variable',     # local and magic variables; some scanners
+    :map                 => 'map',                # Lua tables
+    :modifier            => 'modifier',           # used inside on strings; lots of scanners
+    :namespace           => 'namespace',          # Clojure, Java, Taskpaper
+    :octal               => 'octal',              # lots of scanners
+    :predefined          => 'predefined',         # predefined function: lots of scanners
+    :predefined_constant => 'predefined-constant',# lots of scanners
+    :predefined_type     => 'predefined-type',    # C, Java, PHP
+    :preprocessor        => 'preprocessor',       # C, Delphi, HTML
+    :pseudo_class        => 'pseudo-class',       # CSS
+    :regexp              => 'regexp',             # Groovy, JavaScript, Ruby
+    :reserved            => 'reserved',           # most scanners
+    :shell               => 'shell',              # Ruby
+    :string              => 'string',             # most scanners
+    :symbol              => 'symbol',             # Clojure, Ruby, YAML
+    :tag                 => 'tag',                # CSS, HTML
+    :type                => 'type',               # CSS, Java, SQL, YAML
+    :value               => 'value',              # used together with :key; CSS, JSON, YAML
+    :variable            => 'variable',           # Sass, SQL, YAML
     
-    :eyecatcher          => 'eyecatcher',
+    :change              => 'change',             # Diff
+    :delete              => 'delete',             # Diff
+    :head                => 'head',               # Diff, YAML
+    :insert              => 'insert',             # Diff
+    :eyecatcher          => 'eyecatcher',         # Diff
     
-    :ident               => false,
-    :operator            => false,
+    :ident               => false,                # almost all scanners
+    :operator            => false,                # almost all scanners
     
-    :space               => false,
-    :plain               => false
+    :space               => false,                # almost all scanners
+    :plain               => false                 # almost all scanners
   )
   
-  TokenKinds[:method]    = TokenKinds[:function]
-  TokenKinds[:escape]    = TokenKinds[:delimiter]
-  TokenKinds[:docstring] = TokenKinds[:comment]
-  
-  TokenKinds.freeze
+  TokenKinds[:method] = TokenKinds[:function]
 end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index c747017..e7bffce 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,55 +1,43 @@
 module CodeRay
   
-  # GZip library for writing and reading token dumps.
-  autoload :GZip, coderay_path('helpers', 'gzip')
-  
-  # = Tokens  TODO: Rewrite!
-  #
-  # The Tokens class represents a list of tokens returnd from
-  # a Scanner.
+  # The Tokens class represents a list of tokens returned from
+  # a Scanner. It's actually just an Array with a few helper methods.
   #
-  # A token is not a special object, just a two-element Array
-  # consisting of
+  # A token itself is not a special object, just two elements in an Array:
   # * the _token_ _text_ (the original source of the token in a String) or
   #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
   # * the _token_ _kind_ (a Symbol representing the type of the token)
   #
-  # A token looks like this:
+  # It looks like this:
   #
-  #   ['# It looks like this', :comment]
-  #   ['3.1415926', :float]
-  #   ['$^', :error]
+  #   ..., '# It looks like this', :comment, ...
+  #   ..., '3.1415926', :float, ...
+  #   ..., '$^', :error, ...
   #
   # Some scanners also yield sub-tokens, represented by special
-  # token actions, namely begin_group and end_group.
+  # token actions, for example :begin_group and :end_group.
   #
   # The Ruby scanner, for example, splits "a string" into:
   #
   #  [
-  #   [:begin_group, :string],
-  #   ['"', :delimiter],
-  #   ['a string', :content],
-  #   ['"', :delimiter],
-  #   [:end_group, :string]
+  #   :begin_group, :string,
+  #   '"',          :delimiter,
+  #   'a string',   :content,
+  #   '"',          :delimiter,
+  #   :end_group,   :string
   #  ]
   #
-  # Tokens is the interface between Scanners and Encoders:
-  # The input is split and saved into a Tokens object. The Encoder
-  # then builds the output from this object.
-  #
-  # Thus, the syntax below becomes clear:
+  # Tokens can be used to save the output of a Scanners in a simple
+  # Ruby object that can be send to an Encoder later:
   #
-  #   CodeRay.scan('price = 2.59', :ruby).html
-  #   # the Tokens object is here -------^
-  #
-  # See how small it is? ;)
+  #   tokens = CodeRay.scan('price = 2.59', :ruby).tokens
+  #   tokens.encode(:html)
+  #   tokens.html
+  #   CodeRay.encoder(:html).encode_tokens(tokens)
   #
   # Tokens gives you the power to handle pre-scanned code very easily:
-  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
-  # that you put in your DB.
-  # 
-  # It also allows you to generate tokens directly (without using a scanner),
-  # to load them from a file, and still use any Encoder that CodeRay provides.
+  # You can serialize it to a JSON string and store it in a database, pass it
+  # around to encode it more than once, send it to other algorithms...
   class Tokens < Array
     
     # The Scanner instance that created the tokens.
@@ -58,8 +46,7 @@ module CodeRay
     # Encode the tokens using encoder.
     #
     # encoder can be
-    # * a symbol like :html oder :statistic
-    # * an Encoder class
+    # * a plugin name like :html oder 'statistic'
     # * an Encoder object
     #
     # options are passed to the encoder.
@@ -93,6 +80,7 @@ module CodeRay
     # This method is used by @Scanner#tokenize@ when called with an Array
     # of source strings. The Diff encoder uses it for inline highlighting.
     def split_into_parts *sizes
+      return Array.new(sizes.size) { Tokens.new } if size == 2 && first == ''
       parts = []
       opened = []
       content = nil
@@ -156,53 +144,11 @@ module CodeRay
       parts
     end
     
-    # Dumps the object into a String that can be saved
-    # in files or databases.
-    #
-    # The dump is created with Marshal.dump;
-    # In addition, it is gzipped using GZip.gzip.
-    #
-    # The returned String object includes Undumping
-    # so it has an #undump method. See Tokens.load.
-    #
-    # You can configure the level of compression,
-    # but the default value 7 should be what you want
-    # in most cases as it is a good compromise between
-    # speed and compression rate.
-    #
-    # See GZip module.
-    def dump gzip_level = 7
-      dump = Marshal.dump self
-      dump = GZip.gzip dump, gzip_level
-      dump.extend Undumping
-    end
-    
     # Return the actual number of tokens.
     def count
       size / 2
     end
     
-    # Include this module to give an object an #undump
-    # method.
-    #
-    # The string returned by Tokens.dump includes Undumping.
-    module Undumping
-      # Calls Tokens.load with itself.
-      def undump
-        Tokens.load self
-      end
-    end
-    
-    # Undump the object using Marshal.load, then
-    # unzip it using GZip.gunzip.
-    #
-    # The result is commonly a Tokens object, but
-    # this is not guaranteed.
-    def Tokens.load dump
-      dump = GZip.gunzip dump
-      @dump = Marshal.load dump
-    end
-    
     alias text_token push
     def begin_group kind; push :begin_group, kind end
     def end_group kind; push :end_group, kind end
diff --git a/lib/coderay/version.rb b/lib/coderay/version.rb
index bfb5f24..4b4f085 100644
--- a/lib/coderay/version.rb
+++ b/lib/coderay/version.rb
@@ -1,3 +1,3 @@
 module CodeRay
-  VERSION = '1.0.9'
+  VERSION = '1.1.0'
 end