34 files changed, 1384 insertions, 659 deletions
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index c03d3fb..f4db330 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -9,7 +9,6 @@ module Encoders
   #
   # You cannot fully restore the tokens information from the
   # output, because consecutive :space tokens are merged.
-  # Use Tokens#dump for caching purposes.
   # 
   # See also: Scanners::Debug
   class Debug < Encoder
@@ -18,38 +17,26 @@ module Encoders
     
     FILE_EXTENSION = 'raydebug'
     
-    def initialize options = {}
-      super
-      @opened = []
-    end
-    
     def text_token text, kind
-      raise 'empty token' if $CODERAY_DEBUG && text.empty?
       if kind == :space
         @out << text
       else
-        # TODO: Escape (
-        text = text.gsub(/[)\\]/, '\\\\\0') if text.index(/[)\\]/)
-        @out << kind.to_s << '(' << text << ')'
+        text = text.gsub('\\', '\\\\\\\\') if text.index('\\')
+        text = text.gsub(')',  '\\\\)')    if text.index(')')
+        @out << "#{kind}(#{text})"
       end
     end
     
     def begin_group kind
-      @opened << kind
-      @out << kind.to_s << '<'
+      @out << "#{kind}<"
     end
     
     def end_group kind
-      if @opened.last != kind
-        puts @out
-        raise "we are inside #{@opened.inspect}, not #{kind}"
-      end
-      @opened.pop
       @out << '>'
     end
     
     def begin_line kind
-      @out << kind.to_s << '['
+      @out << "#{kind}["
     end
     
     def end_line kind
diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb
new file mode 100644
index 0000000..a4eba2c
--- /dev/null
+++ b/lib/coderay/encoders/debug_lint.rb
@@ -0,0 +1,63 @@
+module CodeRay
+module Encoders
+  
+  load :lint
+  
+  # = Debug Lint Encoder
+  #
+  # Debug encoder with additional checks for:
+  # 
+  # - empty tokens
+  # - incorrect nesting
+  # 
+  # It will raise an InvalidTokenStream exception when any of the above occurs.
+  # 
+  # See also: Encoders::Debug
+  class DebugLint < Debug
+    
+    register_for :debug_lint
+    
+    def text_token text, kind
+      raise Lint::EmptyToken,       'empty token for %p' % [kind] if text.empty?
+      raise Lint::UnknownTokenKind, 'unknown token kind %p (text was %p)' % [kind, text] unless TokenKinds.has_key? kind
+      super
+    end
+    
+    def begin_group kind
+      @opened << kind
+      super
+    end
+    
+    def end_group kind
+      raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+      @opened.pop
+      super
+    end
+    
+    def begin_line kind
+      @opened << kind
+      super
+    end
+    
+    def end_line kind
+      raise Lint::IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+      @opened.pop
+      super
+    end
+    
+    protected
+    
+    def setup options
+      super
+      @opened = []
+    end
+    
+    def finish options
+      raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty?
+      super
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index 0fd1317..d2ebb5a 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -126,22 +126,21 @@ module Encoders
     
   protected
     
-    HTML_ESCAPE = {  #:nodoc:
-      '&' => '&amp;',
-      '"' => '&quot;',
-      '>' => '&gt;',
-      '<' => '&lt;',
-    }
+    def self.make_html_escape_hash
+      {
+        '&' => '&amp;',
+        '"' => '&quot;',
+        '>' => '&gt;',
+        '<' => '&lt;',
+        # "\t" => will be set to ' ' * options[:tab_width] during setup
+      }.tap do |hash|
+        # Escape ASCII control codes except \x9 == \t and \xA == \n.
+        (Array(0x00..0x8) + Array(0xB..0x1F)).each { |invalid| hash[invalid.chr] = ' ' }
+      end
+    end
     
-    # This was to prevent illegal HTML.
-    # Strange chars should still be avoided in codes.
-    evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
-    evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
-    #ansi_chars = Array(0x7f..0xff)
-    #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
-    # \x9 (\t) and \xA (\n) not included
-    #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
-    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
+    HTML_ESCAPE = make_html_escape_hash
+    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1F]/
     
     TOKEN_KIND_TO_INFO = Hash.new do |h, kind|
       h[kind] = kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
@@ -172,77 +171,41 @@ module Encoders
     def setup options
       super
       
+      check_options! options
+      
       if options[:wrap] || options[:line_numbers]
         @real_out = @out
         @out = ''
       end
       
-      options[:break_lines] = true if options[:line_numbers] == :inline
-      
       @break_lines = (options[:break_lines] == true)
       
-      @HTML_ESCAPE = HTML_ESCAPE.dup
-      @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
+      @HTML_ESCAPE = HTML_ESCAPE.merge("\t" => ' ' * options[:tab_width])
       
       @opened = []
       @last_opened = nil
       @css = CSS.new options[:style]
       
-      hint = options[:hint]
-      if hint && ![:debug, :info, :info_long].include?(hint)
-        raise ArgumentError, "Unknown value %p for :hint; \
-          expected :info, :info_long, :debug, false, or nil." % hint
-      end
-      
-      css_classes = TokenKinds
-      case options[:css]
-      when :class
-        @span_for_kind = Hash.new do |h, k|
-          if k.is_a? ::Symbol
-            kind = k_dup = k
-          else
-            kind = k.first
-            k_dup = k.dup
-          end
-          if kind != :space && (hint || css_class = css_classes[kind])
-            title = HTML.token_path_to_hint hint, k if hint
-            css_class ||= css_classes[kind]
-            h[k_dup] = "<span#{title}#{" class=\"#{css_class}\"" if css_class}>"
-          else
-            h[k_dup] = nil
-          end
-        end
-      when :style
-        @span_for_kind = Hash.new do |h, k|
-          kind = k.is_a?(Symbol) ? k : k.first
-          h[k.is_a?(Symbol) ? k : k.dup] =
-            if kind != :space && (hint || css_classes[kind])
-              title = HTML.token_path_to_hint hint, k if hint
-              style = @css.get_style Array(k).map { |c| css_classes[c] }
-              "<span#{title}#{" style=\"#{style}\"" if style}>"
-            end
-        end
-      else
-        raise ArgumentError, "Unknown value %p for :css." % options[:css]
-      end
+      @span_for_kinds = make_span_for_kinds(options[:css], options[:hint])
       
       @set_last_opened = options[:hint] || options[:css] == :style
     end
     
     def finish options
       unless @opened.empty?
-        warn '%d tokens still open: %p' % [@opened.size, @opened] if $CODERAY_DEBUG
         @out << '</span>' while @opened.pop
         @last_opened = nil
       end
       
-      @out.extend Output
-      @out.css = @css
-      if options[:line_numbers]
-        Numbering.number! @out, options[:line_numbers], options
+      if @out.respond_to? :to_str
+        @out.extend Output
+        @out.css = @css
+        if options[:line_numbers]
+          Numbering.number! @out, options[:line_numbers], options
+        end
+        @out.wrap! options[:wrap]
+        @out.apply_title! options[:title]
       end
-      @out.wrap! options[:wrap]
-      @out.apply_title! options[:title]
       
       if defined?(@real_out) && @real_out
         @real_out << @out
@@ -255,20 +218,10 @@ module Encoders
   public
     
     def text_token text, kind
-      if text =~ /#{HTML_ESCAPE_PATTERN}/o
-        text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
-      end
+      style = @span_for_kinds[@last_opened ? [kind, *@opened] : kind]
       
-      style = @span_for_kind[@last_opened ? [kind, *@opened] : kind]
-      
-      if @break_lines && (i = text.index("\n")) && (c = @opened.size + (style ? 1 : 0)) > 0
-        close = '</span>' * c
-        reopen = ''
-        @opened.each_with_index do |k, index|
-          reopen << (@span_for_kind[index > 0 ? [k, *@opened[0 ... index ]] : k] || '<span>')
-        end
-        text[i .. -1] = text[i .. -1].gsub("\n", "#{close}\n#{reopen}#{style}")
-      end
+      text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } if text =~ /#{HTML_ESCAPE_PATTERN}/o
+      text = break_lines(text, style) if @break_lines && (style || @opened.size > 0) && text.index("\n")
       
       if style
         @out << style << text << '</span>'
@@ -279,24 +232,19 @@ module Encoders
     
     # token groups, eg. strings
     def begin_group kind
-      @out << (@span_for_kind[@last_opened ? [kind, *@opened] : kind] || '<span>')
+      @out << (@span_for_kinds[@last_opened ? [kind, *@opened] : kind] || '<span>')
       @opened << kind
       @last_opened = kind if @set_last_opened
     end
     
     def end_group kind
-      if $CODERAY_DEBUG && (@opened.empty? || @opened.last != kind)
-        warn 'Malformed token stream: Trying to close a token group (%p) that is not open. Open are: %p.' % [kind, @opened[1..-1]]
-      end
-      if @opened.pop
-        @out << '</span>'
-        @last_opened = @opened.last if @last_opened
-      end
+      check_group_nesting 'token group', kind if $CODERAY_DEBUG
+      close_span
     end
     
     # whole lines to be highlighted, eg. a deleted line in a diff
     def begin_line kind
-      if style = @span_for_kind[@last_opened ? [kind, *@opened] : kind]
+      if style = @span_for_kinds[@last_opened ? [kind, *@opened] : kind]
         if style['class="']
           @out << style.sub('class="', 'class="line ')
         else
@@ -310,15 +258,74 @@ module Encoders
     end
     
     def end_line kind
-      if $CODERAY_DEBUG && (@opened.empty? || @opened.last != kind)
-        warn 'Malformed token stream: Trying to close a line (%p) that is not open. Open are: %p.' % [kind, @opened[1..-1]]
+      check_group_nesting 'line', kind if $CODERAY_DEBUG
+      close_span
+    end
+    
+  protected
+    
+    def check_options! options
+      unless [false, nil, :debug, :info, :info_long].include? options[:hint]
+        raise ArgumentError, "Unknown value %p for :hint; expected :info, :info_long, :debug, false, or nil." % [options[:hint]]
       end
+      
+      unless [:class, :style].include? options[:css]
+        raise ArgumentError, 'Unknown value %p for :css.' % [options[:css]]
+      end
+      
+      options[:break_lines] = true if options[:line_numbers] == :inline
+    end
+    
+    def css_class_for_kinds kinds
+      TokenKinds[kinds.is_a?(Symbol) ? kinds : kinds.first]
+    end
+    
+    def style_for_kinds kinds
+      css_classes = kinds.is_a?(Array) ? kinds.map { |c| TokenKinds[c] } : [TokenKinds[kinds]]
+      @css.get_style_for_css_classes css_classes
+    end
+    
+    def make_span_for_kinds method, hint
+      Hash.new do |h, kinds|
+        begin
+          css_class = css_class_for_kinds(kinds)
+          title     = HTML.token_path_to_hint hint, kinds if hint
+          
+          if css_class || title
+            if method == :style
+              style = style_for_kinds(kinds)
+              "<span#{title}#{" style=\"#{style}\"" if style}>"
+            else
+              "<span#{title}#{" class=\"#{css_class}\"" if css_class}>"
+            end
+          end
+        end.tap do |span|
+          h.clear if h.size >= 100
+          h[kinds] = span
+        end
+      end
+    end
+    
+    def check_group_nesting name, kind
+      if @opened.empty? || @opened.last != kind
+        warn "Malformed token stream: Trying to close a #{name} (%p) that is not open. Open are: %p." % [kind, @opened[1..-1]]
+      end
+    end
+    
+    def break_lines text, style
+      reopen = ''
+      @opened.each_with_index do |kind, index|
+        reopen << (@span_for_kinds[index > 0 ? [kind, *@opened[0...index]] : kind] || '<span>')
+      end
+      text.gsub("\n", "#{'</span>' * @opened.size}#{'</span>' if style}\n#{reopen}#{style}")
+    end
+    
+    def close_span
       if @opened.pop
         @out << '</span>'
         @last_opened = @opened.last if @last_opened
       end
     end
-    
   end
   
 end
diff --git a/lib/coderay/encoders/html/css.rb b/lib/coderay/encoders/html/css.rb
index 6de4b46..164d7f8 100644
--- a/lib/coderay/encoders/html/css.rb
+++ b/lib/coderay/encoders/html/css.rb
@@ -11,7 +11,7 @@ module Encoders
       end
 
       def initialize style = :default
-        @classes = Hash.new
+        @styles = Hash.new
         style = CSS.load_stylesheet style
         @stylesheet = [
           style::CSS_MAIN_STYLES,
@@ -20,12 +20,12 @@ module Encoders
         parse style::TOKEN_COLORS
       end
 
-      def get_style styles
-        cl = @classes[styles.first]
+      def get_style_for_css_classes css_classes
+        cl = @styles[css_classes.first]
         return '' unless cl
         style = ''
-        1.upto styles.size do |offset|
-          break if style = cl[styles[offset .. -1]]
+        1.upto css_classes.size do |offset|
+          break if style = cl[css_classes[offset .. -1]]
         end
         # warn 'Style not found: %p' % [styles] if style.empty?
         return style
@@ -52,8 +52,8 @@ module Encoders
           for selector in selectors.split(',')
             classes = selector.scan(/[-\w]+/)
             cl = classes.pop
-            @classes[cl] ||= Hash.new
-            @classes[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
+            @styles[cl] ||= Hash.new
+            @styles[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
           end
         end
       end
diff --git a/lib/coderay/encoders/html/numbering.rb b/lib/coderay/encoders/html/numbering.rb
index 332145b..a1b9c04 100644
--- a/lib/coderay/encoders/html/numbering.rb
+++ b/lib/coderay/encoders/html/numbering.rb
@@ -26,7 +26,7 @@ module Encoders
               "<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
             end
           else
-            proc { |line| line.to_s }  # :to_s.to_proc in Ruby 1.8.7+
+            :to_s.to_proc
           end
         
         bold_every = options[:bold_every]
@@ -75,7 +75,7 @@ module Encoders
           line_number = start
           output.gsub!(/^.*$\n?/) do |line|
             line_number_text = bolding.call line_number
-            indent = ' ' * (max_width - line_number.to_s.size)  # TODO: Optimize (10^x)
+            indent = ' ' * (max_width - line_number.to_s.size)
             line_number += 1
             "<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{line}"
           end
diff --git a/lib/coderay/encoders/lint.rb b/lib/coderay/encoders/lint.rb
new file mode 100644
index 0000000..88c8bd1
--- /dev/null
+++ b/lib/coderay/encoders/lint.rb
@@ -0,0 +1,59 @@
+module CodeRay
+module Encoders
+  
+  # = Lint Encoder
+  #
+  # Checks for:
+  # 
+  # - empty tokens
+  # - incorrect nesting
+  # 
+  # It will raise an InvalidTokenStream exception when any of the above occurs.
+  # 
+  # See also: Encoders::DebugLint
+  class Lint < Debug
+    
+    register_for :lint
+    
+    InvalidTokenStream         = Class.new StandardError
+    EmptyToken                 = Class.new InvalidTokenStream
+    UnknownTokenKind           = Class.new InvalidTokenStream
+    IncorrectTokenGroupNesting = Class.new InvalidTokenStream
+    
+    def text_token text, kind
+      raise EmptyToken,       'empty token for %p' % [kind] if text.empty?
+      raise UnknownTokenKind, 'unknown token kind %p (text was %p)' % [kind, text] unless TokenKinds.has_key? kind
+    end
+    
+    def begin_group kind
+      @opened << kind
+    end
+    
+    def end_group kind
+      raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+      @opened.pop
+    end
+    
+    def begin_line kind
+      @opened << kind
+    end
+    
+    def end_line kind
+      raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+      @opened.pop
+    end
+    
+    protected
+    
+    def setup options
+      @opened = []
+    end
+    
+    def finish options
+      raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty?
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index 2315d9e..b2f8b83 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -67,7 +67,6 @@ Token Types (%d):
       @type_stats['TOTAL'].count += 1
     end
     
-    # TODO Hierarchy handling
     def begin_group kind
       block_token ':begin_group', kind
     end
diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb
index a0ceb3c..c7ae014 100644
--- a/lib/coderay/encoders/terminal.rb
+++ b/lib/coderay/encoders/terminal.rb
@@ -19,105 +19,135 @@ module CodeRay
       register_for :terminal
       
       TOKEN_COLORS = {
-        :annotation => '35',
-        :attribute_name => '33',
-        :attribute_value => '31',
-        :binary => '1;35',
+        :debug => "\e[1;37;44m",
+        
+        :annotation => "\e[34m",
+        :attribute_name => "\e[35m",
+        :attribute_value => "\e[31m",
+        :binary => {
+          :self => "\e[31m",
+          :char => "\e[1;31m",
+          :delimiter => "\e[1;31m",
+        },
         :char => {
-          :self => '36', :delimiter => '1;34'
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m"
+        },
+        :class => "\e[1;35;4m",
+        :class_variable => "\e[36m",
+        :color => "\e[32m",
+        :comment => {
+          :self => "\e[1;30m",
+          :char => "\e[37m",
+          :delimiter => "\e[37m",
         },
-        :class => '1;35',
-        :class_variable => '36',
-        :color => '32',
-        :comment => '37',
-        :complex => '1;34',
-        :constant => ['1;34', '4'],
-        :decoration => '35',
-        :definition => '1;32',
-        :directive => ['32', '4'],
-        :doc => '46',
-        :doctype => '1;30',
-        :doc_string => ['31', '4'],
-        :entity => '33',
-        :error => ['1;33', '41'],
-        :exception => '1;31',
-        :float => '1;35',
-        :function => '1;34',
-        :global_variable => '42',
-        :hex => '1;36',
-        :include => '33',
-        :integer => '1;34',
-        :key => '35',
-        :label => '1;15',
-        :local_variable => '33',
-        :octal => '1;35',
-        :operator_name => '1;29',
-        :predefined_constant => '1;36',
-        :predefined_type => '1;30',
-        :predefined => ['4', '1;34'],
-        :preprocessor => '36',
-        :pseudo_class => '1;34',
+        :constant => "\e[1;34;4m",
+        :decorator => "\e[35m",
+        :definition => "\e[1;33m",
+        :directive => "\e[33m",
+        :docstring => "\e[31m",
+        :doctype => "\e[1;34m",
+        :done => "\e[1;30;2m",
+        :entity => "\e[31m",
+        :error => "\e[1;37;41m",
+        :exception => "\e[1;31m",
+        :float => "\e[1;35m",
+        :function => "\e[1;34m",
+        :global_variable => "\e[1;32m",
+        :hex => "\e[1;36m",
+        :id => "\e[1;34m",
+        :include => "\e[31m",
+        :integer => "\e[1;34m",
+        :imaginary => "\e[1;34m",
+        :important => "\e[1;31m",
+        :key => {
+          :self => "\e[35m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;35m",
+        },
+        :keyword => "\e[32m",
+        :label => "\e[1;33m",
+        :local_variable => "\e[33m",
+        :namespace => "\e[1;35m",
+        :octal => "\e[1;34m",
+        :predefined => "\e[36m",
+        :predefined_constant => "\e[1;36m",
+        :predefined_type => "\e[1;32m",
+        :preprocessor => "\e[1;36m",
+        :pseudo_class => "\e[1;34m",
         :regexp => {
-          :self => '31',
-          :content => '31',
-          :delimiter => '1;29',
-          :modifier => '35',
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m",
+          :modifier => "\e[35m",
+          :char => "\e[1;35m",
         },
-        :reserved => '1;31',
+        :reserved => "\e[32m",
         :shell => {
-          :self => '42',
-          :content => '1;29',
-          :delimiter => '37',
+          :self => "\e[33m",
+          :char => "\e[1;33m",
+          :delimiter => "\e[1;33m",
+          :escape => "\e[1;33m",
         },
         :string => {
-          :self => '32',
-          :modifier => '1;32',
-          :escape => '1;36',
-          :delimiter => '1;32',
-          :char => '1;36',
+          :self => "\e[31m",
+          :modifier => "\e[1;31m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;31m",
+          :escape => "\e[1;31m",
+        },
+        :symbol => {
+          :self => "\e[33m",
+          :delimiter => "\e[1;33m",
         },
-        :symbol => '1;32',
-        :tag => '1;34',
-        :type => '1;34',
-        :value => '36',
-        :variable => '1;34',
+        :tag => "\e[32m",
+        :type => "\e[1;34m",
+        :value => "\e[36m",
+        :variable => "\e[34m",
         
-        :insert => '42',
-        :delete => '41',
-        :change => '44',
-        :head => '45'
+        :insert => {
+          :self => "\e[42m",
+          :insert => "\e[1;32;42m",
+          :eyecatcher => "\e[102m",
+        },
+        :delete => {
+          :self => "\e[41m",
+          :delete => "\e[1;31;41m",
+          :eyecatcher => "\e[101m",
+        },
+        :change => {
+          :self => "\e[44m",
+          :change => "\e[37;44m",
+        },
+        :head => {
+          :self => "\e[45m",
+          :filename => "\e[37;45m"
+        },
       }
+      
       TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
       TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
-      TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
-      TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] =
-        TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
+      TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
       
     protected
       
       def setup(options)
         super
         @opened = []
-        @subcolors = nil
+        @color_scopes = [TOKEN_COLORS]
       end
       
     public
       
       def text_token text, kind
-        if color = (@subcolors || TOKEN_COLORS)[kind]
-          if Hash === color
-            if color[:self]
-              color = color[:self]
-            else
-              @out << text
-              return
-            end
-          end
+        if color = @color_scopes.last[kind]
+          color = color[:self] if color.is_a? Hash
           
-          @out << ansi_colorize(color)
-          @out << text.gsub("\n", ansi_clear + "\n" + ansi_colorize(color))
-          @out << ansi_clear
-          @out << ansi_colorize(@subcolors[:self]) if @subcolors && @subcolors[:self]
+          @out << color
+          @out << (text.index("\n") ? text.gsub("\n", "\e[0m\n" + color) : text)
+          @out << "\e[0m"
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         else
           @out << text
         end
@@ -130,50 +160,36 @@ module CodeRay
       alias begin_line begin_group
       
       def end_group kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
-          @out << ansi_clear
-          @out << open_token(@opened.last)
+        if @opened.pop
+          @color_scopes.pop
+          @out << "\e[0m"
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         end
       end
       
       def end_line kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
-          # whole lines to be highlighted,
-          # eg. added/modified/deleted lines in a diff
-          @out << "\t" * 100 + ansi_clear
-          @out << open_token(@opened.last)
-        end
+        @out << (@line_filler ||= "\t" * 100)
+        end_group kind
       end
       
     private
       
       def open_token kind
-        if color = TOKEN_COLORS[kind]
-          if Hash === color
-            @subcolors = color
-            ansi_colorize(color[:self]) if color[:self]
+        if color = @color_scopes.last[kind]
+          if color.is_a? Hash
+            @color_scopes << color
+            color[:self]
           else
-            @subcolors = {}
-            ansi_colorize(color)
+            @color_scopes << @color_scopes.last
+            color
           end
         else
-          @subcolors = nil
+          @color_scopes << @color_scopes.last
           ''
         end
       end
-      
-      def ansi_colorize(color)
-        Array(color).map { |c| "\e[#{c}m" }.join
-      end
-      def ansi_clear
-        ansi_colorize(0)
-      end
     end
   end
-end
-\ No newline at end of file
+end
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index 2f8cc8e..a9d7534 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -38,7 +38,7 @@ module CodeRay
           (TypeFromExt[ext2.downcase] if ext2) ||
           TypeFromName[name] ||
           TypeFromName[name.downcase]
-        type ||= shebang(filename) if read_shebang
+        type ||= type_from_shebang(filename) if read_shebang
         
         type
       end
@@ -63,7 +63,7 @@ module CodeRay
       
     protected
       
-      def shebang filename
+      def type_from_shebang filename
         return unless File.exist? filename
         File.open filename, 'r' do |f|
           if first_line = f.gets
@@ -77,55 +77,59 @@ module CodeRay
     end
     
     TypeFromExt = {
-      'c'        => :c,
-      'cfc'      => :xml,
-      'cfm'      => :xml,
-      'clj'      => :clojure,
-      'css'      => :css,
-      'diff'     => :diff,
-      'dpr'      => :delphi,
-      'erb'      => :erb,
-      'gemspec'  => :ruby,
-      'groovy'   => :groovy,
-      'gvy'      => :groovy,
-      'h'        => :c,
-      'haml'     => :haml,
-      'htm'      => :html,
-      'html'     => :html,
-      'html.erb' => :erb,
-      'java'     => :java,
-      'js'       => :java_script,
-      'json'     => :json,
-      'mab'      => :ruby,
-      'pas'      => :delphi,
-      'patch'    => :diff,
-      'phtml'    => :php,
-      'php'      => :php,
-      'php3'     => :php,
-      'php4'     => :php,
-      'php5'     => :php,
-      'prawn'    => :ruby,
-      'py'       => :python,
-      'py3'      => :python,
-      'pyw'      => :python,
-      'rake'     => :ruby,
-      'raydebug' => :raydebug,
-      'rb'       => :ruby,
-      'rbw'      => :ruby,
-      'rhtml'    => :erb,
-      'rjs'      => :ruby,
-      'rpdf'     => :ruby,
-      'ru'       => :ruby,
-      'rxml'     => :ruby,
-      'sass'     => :sass,
-      'sh'       => :bash,
-      'sql'      => :sql,
-      'tmproj'   => :xml,
-      'xaml'     => :xml,
-      'xhtml'    => :html,
-      'xml'      => :xml,
-      'yaml'     => :yaml,
-      'yml'      => :yaml,
+      'c'         => :c,
+      'cfc'       => :xml,
+      'cfm'       => :xml,
+      'clj'       => :clojure,
+      'css'       => :css,
+      'diff'      => :diff,
+      'dpr'       => :delphi,
+      'erb'       => :erb,
+      'gemspec'   => :ruby,
+      'go'        => :go, 
+      'groovy'    => :groovy,
+      'gvy'       => :groovy,
+      'h'         => :c,
+      'haml'      => :haml,
+      'htm'       => :html,
+      'html'      => :html,
+      'html.erb'  => :erb,
+      'java'      => :java,
+      'js'        => :java_script,
+      'json'      => :json,
+      'lua'       => :lua,
+      'mab'       => :ruby,
+      'pas'       => :delphi,
+      'patch'     => :diff,
+      'phtml'     => :php,
+      'php'       => :php,
+      'php3'      => :php,
+      'php4'      => :php,
+      'php5'      => :php,
+      'prawn'     => :ruby,
+      'py'        => :python,
+      'py3'       => :python,
+      'pyw'       => :python,
+      'rake'      => :ruby,
+      'raydebug'  => :raydebug,
+      'rb'        => :ruby,
+      'rbw'       => :ruby,
+      'rhtml'     => :erb,
+      'rjs'       => :ruby,
+      'rpdf'      => :ruby,
+      'ru'        => :ruby,  # config.ru
+      'rxml'      => :ruby,
+      'sass'      => :sass,
+      'sh'        => :bash,
+      'sql'       => :sql,
+      'taskpaper' => :taskpaper,
+      'template'  => :json,  # AWS CloudFormation template
+      'tmproj'    => :xml,
+      'xaml'      => :xml,
+      'xhtml'     => :html,
+      'xml'       => :xml,
+      'yaml'      => :yaml,
+      'yml'       => :yaml,
     }
     for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
       TypeFromExt[cpp_alias] = :cpp
@@ -138,6 +142,9 @@ module CodeRay
       'Rakefile' => :ruby,
       'Rantfile' => :ruby,
       'Gemfile'  => :ruby,
+      'Guardfile' => :ruby,
+      'Vagrantfile' => :ruby,
+      'Appraisals' => :ruby
     }
     
   end
diff --git a/lib/coderay/helpers/gzip.rb b/lib/coderay/helpers/gzip.rb
deleted file mode 100644
index 245014a..0000000
--- a/lib/coderay/helpers/gzip.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-module CodeRay
-  
-  # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
-  module GZip
-    
-    require 'zlib'
-    
-    # The default zipping level. 7 zips good and fast.
-    DEFAULT_GZIP_LEVEL = 7
-    
-    # Unzips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   print GZip.gunzip(File.read('adresses.gz'))
-    def GZip.gunzip s
-      Zlib::Inflate.inflate s
-    end
-    
-    # Zips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   File.open('adresses.gz', 'w') do |file
-    #     file.write GZip.gzip('Mum: 0123 456 789', 9)
-    #   end
-    #
-    # If you provide a +level+, you can control how strong
-    # the string is compressed:
-    # - 0: no compression, only convert to gzip format
-    # - 1: compress fast
-    # - 7: compress more, but still fast (default)
-    # - 8: compress more, slower
-    # - 9: compress best, very slow
-    def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
-      Zlib::Deflate.new(level).deflate s, Zlib::FINISH
-    end
-    
-  end
-  
-end
diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb
index dd4e830..9a724ff 100644
--- a/lib/coderay/helpers/plugin.rb
+++ b/lib/coderay/helpers/plugin.rb
@@ -30,7 +30,7 @@ module CodeRay
     # * a file could not be found
     # * the requested Plugin is not registered
     PluginNotFound = Class.new LoadError
-    HostNotFound = Class.new LoadError
+    HostNotFound   = Class.new LoadError
     
     PLUGIN_HOSTS = []
     PLUGIN_HOSTS_BY_ID = {}  # dummy hash
@@ -49,8 +49,8 @@ module CodeRay
     def [] id, *args, &blk
       plugin = validate_id(id)
       begin
-        plugin = plugin_hash.[] plugin, *args, &blk
-      end while plugin.is_a? Symbol
+        plugin = plugin_hash.[](plugin, *args, &blk)
+      end while plugin.is_a? String
       plugin
     end
     
@@ -95,7 +95,7 @@ module CodeRay
     def map hash
       for from, to in hash
         from = validate_id from
-        to = validate_id to
+        to   = validate_id to
         plugin_hash[from] = to unless plugin_hash.has_key? from
       end
     end
@@ -131,7 +131,7 @@ module CodeRay
     
     # A Hash of plugion_id => Plugin pairs.
     def plugin_hash
-      @plugin_hash ||= make_plugin_hash
+      @plugin_hash ||= (@plugin_hash = make_plugin_hash).tap { load_plugin_map }
     end
     
     # Returns an array of all .rb files in the plugin path.
@@ -158,7 +158,6 @@ module CodeRay
     # This is done automatically when plugin_path is called.
     def load_plugin_map
       mapfile = path_to '_map'
-      @plugin_map_loaded = true
       if File.exist? mapfile
         require mapfile
         true
@@ -171,22 +170,16 @@ module CodeRay
     
     # Return a plugin hash that automatically loads plugins.
     def make_plugin_hash
-      @plugin_map_loaded ||= false
       Hash.new do |h, plugin_id|
         id = validate_id(plugin_id)
         path = path_to id
         begin
           require path
         rescue LoadError => boom
-          if @plugin_map_loaded
-            if h.has_key?(:default)
-              h[:default]
-            else
-              raise PluginNotFound, '%p could not load plugin %p: %s' % [self, id, boom]
-            end
+          if h.has_key?(:default)
+            h[:default]
           else
-            load_plugin_map
-            h[plugin_id]
+            raise PluginNotFound, '%p could not load plugin %p: %s' % [self, id, boom]
           end
         else
           # Plugin should have registered by now
@@ -204,22 +197,22 @@ module CodeRay
       File.join plugin_path, "#{plugin_id}.rb"
     end
     
-    # Converts +id+ to a Symbol if it is a String,
-    # or returns +id+ if it already is a Symbol.
+    # Converts +id+ to a valid plugin ID String, or returns +nil+.
     #
     # Raises +ArgumentError+ for all other objects, or if the
     # given String includes non-alphanumeric characters (\W).
     def validate_id id
-      if id.is_a? Symbol or id.nil?
-        id
-      elsif id.is_a? String
+      case id
+      when Symbol
+        id.to_s
+      when String
         if id[/\w+/] == id
-          id.downcase.to_sym
+          id.downcase
         else
           raise ArgumentError, "Invalid id given: #{id}"
         end
       else
-        raise ArgumentError, "String or Symbol expected, but #{id.class} given."
+        raise ArgumentError, "Symbol or String expected, but #{id.class} given."
       end
     end
     
@@ -270,7 +263,6 @@ module CodeRay
     end
     
     def aliases
-      plugin_host.load_plugin_map
       plugin_host.plugin_hash.inject [] do |aliases, (key, _)|
         aliases << key if plugin_host[key] == self
         aliases
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index 907cf00..b3f7e17 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -182,16 +182,9 @@ module CodeRay
       # Scan the code and returns all tokens in a Tokens object.
       def tokenize source = nil, options = {}
         options = @options.merge(options)
-        @tokens = options[:tokens] || @tokens || Tokens.new
-        @tokens.scanner = self if @tokens.respond_to? :scanner=
-        case source
-        when Array
-          self.string = self.class.normalize(source.join)
-        when nil
-          reset
-        else
-          self.string = self.class.normalize(source)
-        end
+        
+        set_tokens_from_options options
+        set_string_from_source source
         
         begin
           scan_tokens @tokens, options
@@ -261,6 +254,22 @@ module CodeRay
       def setup  # :doc:
       end
       
+      def set_string_from_source source
+        case source
+        when Array
+          self.string = self.class.normalize(source.join)
+        when nil
+          reset
+        else
+          self.string = self.class.normalize(source)
+        end
+      end
+      
+      def set_tokens_from_options options
+        @tokens = options[:tokens] || @tokens || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
+      end
+      
       # This is the central method, and commonly the only one a
       # subclass implements.
       #
@@ -277,19 +286,15 @@ module CodeRay
         @binary_string = nil if defined? @binary_string
       end
       
-      # Scanner error with additional status information
-      def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
-        raise ScanError, <<-EOE % [
+      SCAN_ERROR_MESSAGE = <<-MESSAGE
 
 
-***ERROR in %s: %s (after %d tokens)
+***ERROR in %s: %s (after %s tokens)
 
 tokens:
 %s
 
-current line: %d  column: %d  pos: %d
-matched: %p  state: %p
-bol? = %p,  eos? = %p
+%s
 
 surrounding code:
 %p  ~~  %p
@@ -297,16 +302,43 @@ surrounding code:
 
 ***ERROR***
 
-        EOE
-          File.basename(caller[0]),
-          msg,
-          tokens.respond_to?(:size) ? tokens.size : 0,
-          tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
-          line, column, pos,
-          matched, state, bol?, eos?,
+      MESSAGE
+      
+      def raise_inspect_arguments message, tokens, state, ambit
+        return File.basename(caller[0]),
+          message,
+          tokens_size(tokens),
+          tokens_last(tokens, 10).map(&:inspect).join("\n"),
+          scanner_state_info(state),
           binary_string[pos - ambit, ambit],
-          binary_string[pos, ambit],
-        ], backtrace
+          binary_string[pos, ambit]
+      end
+      
+      SCANNER_STATE_INFO = <<-INFO
+current line: %d  column: %d  pos: %d
+matched: %p  state: %p
+bol?: %p,  eos?: %p
+      INFO
+      
+      def scanner_state_info state
+        SCANNER_STATE_INFO % [
+          line, column, pos,
+          matched, state || 'No state given!',
+          bol?, eos?,
+        ]
+      end
+      
+      # Scanner error with additional status information
+      def raise_inspect message, tokens, state = self.state, ambit = 30, backtrace = caller
+        raise ScanError, SCAN_ERROR_MESSAGE % raise_inspect_arguments(message, tokens, state, ambit), backtrace
+      end
+      
+      def tokens_size tokens
+        tokens.size if tokens.respond_to?(:size)
+      end
+      
+      def tokens_last tokens, n
+        tokens.respond_to?(:last) ? tokens.last(n) : []
       end
       
       # Shorthand for scan_until(/\z/).
diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb
index 003eed6..55d5239 100644
--- a/lib/coderay/scanners/css.rb
+++ b/lib/coderay/scanners/css.rb
@@ -7,10 +7,10 @@ module Scanners
     
     KINDS_NOT_LOC = [
       :comment,
-      :class, :pseudo_class, :type,
-      :constant, :directive,
+      :class, :pseudo_class, :tag,
+      :id, :directive,
       :key, :value, :operator, :color, :float, :string,
-      :error, :important,
+      :error, :important, :type,
     ]  # :nodoc:
     
     module RE  # :nodoc:
@@ -25,7 +25,7 @@ module Scanners
       
       HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
       
-      Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)/
+      Num = /-?(?:[0-9]*\.[0-9]+|[0-9]+)n?/
       Name = /#{NMChar}+/
       Ident = /-?#{NMStart}#{NMChar}*/
       AtKeyword = /@#{Ident}/
@@ -53,7 +53,7 @@ module Scanners
     end
     
     def scan_tokens encoder, options
-      states = Array(options[:state] || @state)
+      states = Array(options[:state] || @state).dup
       value_expected = @value_expected
       
       until eos?
@@ -64,13 +64,13 @@ module Scanners
         elsif case states.last
           when :initial, :media
             if match = scan(/(?>#{RE::Ident})(?!\()|\*/ox)
-              encoder.text_token match, :type
+              encoder.text_token match, :tag
               next
             elsif match = scan(RE::Class)
               encoder.text_token match, :class
               next
             elsif match = scan(RE::Id)
-              encoder.text_token match, :constant
+              encoder.text_token match, :id
               next
             elsif match = scan(RE::PseudoClass)
               encoder.text_token match, :pseudo_class
@@ -145,10 +145,10 @@ module Scanners
           start = match[/^\w+\(/]
           encoder.text_token start, :delimiter
           if match[-1] == ?)
-            encoder.text_token match[start.size..-2], :content
+            encoder.text_token match[start.size..-2], :content if match.size > start.size + 1
             encoder.text_token ')', :delimiter
           else
-            encoder.text_token match[start.size..-1], :content
+            encoder.text_token match[start.size..-1], :content if match.size > start.size
           end
           encoder.end_group :function
           
diff --git a/lib/coderay/scanners/debug.rb b/lib/coderay/scanners/debug.rb
index 566bfa7..83ede9a 100644
--- a/lib/coderay/scanners/debug.rb
+++ b/lib/coderay/scanners/debug.rb
@@ -1,9 +1,11 @@
+require 'set'
+
 module CodeRay
 module Scanners
   
   # = Debug Scanner
   # 
-  # Interprets the output of the Encoders::Debug encoder.
+  # Interprets the output of the Encoders::Debug encoder (basically the inverse function).
   class Debug < Scanner
     
     register_for :debug
@@ -11,6 +13,11 @@ module Scanners
     
   protected
     
+    def setup
+      super
+      @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set
+    end
+    
     def scan_tokens encoder, options
       
       opened_tokens = []
@@ -21,16 +28,19 @@ module Scanners
           encoder.text_token match, :space
           
         elsif match = scan(/ (\w+) \( ( [^\)\\]* ( \\. [^\)\\]* )* ) \)? /x)
-          kind = self[1].to_sym
-          match = self[2].gsub(/\\(.)/m, '\1')
-          unless TokenKinds.has_key? kind
-            kind = :error
-            match = matched
+          if @known_token_kinds.include? self[1]
+            encoder.text_token self[2].gsub(/\\(.)/m, '\1'), self[1].to_sym
+          else
+            encoder.text_token matched, :unknown
           end
-          encoder.text_token match, kind
           
         elsif match = scan(/ (\w+) ([<\[]) /x)
-          kind = self[1].to_sym
+          if @known_token_kinds.include? self[1]
+            kind = self[1].to_sym
+          else
+            kind = :unknown
+          end
+          
           opened_tokens << kind
           case self[2]
           when '<'
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index af0f755..fd1aed6 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -69,7 +69,7 @@ module Scanners
             state = :added
           elsif match = scan(/\\ .*/)
             encoder.text_token match, :comment
-          elsif match = scan(/@@(?>[^@\n]*)@@/)
+          elsif match = scan(/@@(?>[^@\n]+)@@/)
             content_scanner.state = :initial unless match?(/\n\+/)
             content_scanner_entry_state = nil
             if check(/\n|$/)
diff --git a/lib/coderay/scanners/go.rb b/lib/coderay/scanners/go.rb
new file mode 100644
index 0000000..99fdd63
--- /dev/null
+++ b/lib/coderay/scanners/go.rb
@@ -0,0 +1,208 @@
+module CodeRay
+module Scanners
+  
+  class Go < Scanner
+    
+    register_for :go
+    file_extension 'go'
+    
+    # http://golang.org/ref/spec#Keywords
+    KEYWORDS = [
+      'break', 'default', 'func', 'interface', 'select',
+      'case', 'defer', 'go', 'map', 'struct',
+      'chan', 'else', 'goto', 'package', 'switch',
+      'const', 'fallthrough', 'if', 'range', 'type',
+      'continue', 'for', 'import', 'return', 'var',
+    ]  # :nodoc:
+    
+    # http://golang.org/ref/spec#Types
+    PREDEFINED_TYPES = [
+      'bool',
+      'uint8', 'uint16', 'uint32', 'uint64',
+      'int8', 'int16', 'int32', 'int64',
+      'float32', 'float64',
+      'complex64', 'complex128',
+      'byte', 'rune', 'string', 'error',
+      'uint', 'int', 'uintptr',
+    ]  # :nodoc:
+    
+    PREDEFINED_CONSTANTS = [
+      'nil', 'iota',
+      'true', 'false',
+    ]  # :nodoc:
+    
+    PREDEFINED_FUNCTIONS = %w[
+      append cap close complex copy delete imag len
+      make new panic print println real recover
+    ] # :nodoc:
+    
+    IDENT_KIND = WordList.new(:ident).
+      add(KEYWORDS, :keyword).
+      add(PREDEFINED_TYPES, :predefined_type).
+      add(PREDEFINED_CONSTANTS, :predefined_constant).
+      add(PREDEFINED_FUNCTIONS, :predefined)  # :nodoc:
+    
+    ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
+    UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x  # :nodoc:
+    
+    protected
+    
+    def scan_tokens encoder, options
+      
+      state = :initial
+      label_expected = true
+      case_expected = false
+      label_expected_before_preproc_line = nil
+      in_preproc_line = false
+      
+      until eos?
+        
+        case state
+        
+        when :initial
+          
+          if match = scan(/ \s+ | \\\n /x)
+            if in_preproc_line && match != "\\\n" && match.index(?\n)
+              in_preproc_line = false
+              case_expected = false
+              label_expected = label_expected_before_preproc_line
+            end
+            encoder.text_token match, :space
+          
+          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+            encoder.text_token match, :comment
+          
+          elsif match = scan(/ <?- (?![\d.]) | [+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
+            if case_expected
+              label_expected = true if match == ':'
+              case_expected = false
+            end
+            encoder.text_token match, :operator
+          
+          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+            kind = IDENT_KIND[match]
+            if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
+              kind = :label
+              label_expected = false
+              match << matched
+            else
+              label_expected = false
+              if kind == :keyword
+                case match
+                when 'case', 'default'
+                  case_expected = true
+                end
+              end
+            end
+            encoder.text_token match, kind
+          
+          elsif match = scan(/L?"/)
+            encoder.begin_group :string
+            if match[0] == ?L
+              encoder.text_token 'L', :modifier
+              match = '"'
+            end
+            encoder.text_token match, :delimiter
+            state = :string
+          
+          elsif match = scan(/ ` ([^`]+)? (`)? /x)
+            encoder.begin_group :shell
+            encoder.text_token '`', :delimiter
+            encoder.text_token self[1], :content if self[1]
+            encoder.text_token self[2], :delimiter if self[2]
+            encoder.end_group :shell
+          
+          elsif match = scan(/ \# \s* if \s* 0 /x)
+            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
+            encoder.text_token match, :comment
+          
+          elsif match = scan(/#[ \t]*(\w*)/)
+            encoder.text_token match, :preprocessor
+            in_preproc_line = true
+            label_expected_before_preproc_line = label_expected
+            state = :include_expected if self[1] == 'include'
+          
+          elsif match = scan(/ L?' (?: [^\'\n\\] | \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) )? '? /ox)
+            label_expected = false
+            encoder.text_token match, :char
+          
+          elsif match = scan(/\$/)
+            encoder.text_token match, :ident
+          
+          elsif match = scan(/-?\d*(\.\d*)?([eE][+-]?\d+)?i/)
+            label_expected = false
+            encoder.text_token match, :imaginary
+          
+          elsif match = scan(/-?0[xX][0-9A-Fa-f]+/)
+            label_expected = false
+            encoder.text_token match, :hex
+          
+          elsif match = scan(/-?(?:0[0-7]+)(?![89.eEfF])/)
+            label_expected = false
+            encoder.text_token match, :octal
+          
+          elsif match = scan(/-?(?:\d*\.\d+|\d+\.)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
+            label_expected = false
+            encoder.text_token match, :float
+          
+          elsif match = scan(/-?(?:\d+)(?![.eEfF])L?L?/)
+            label_expected = false
+            encoder.text_token match, :integer
+          
+          else
+            encoder.text_token getch, :error
+          
+          end
+        
+        when :string
+          if match = scan(/[^\\\n"]+/)
+            encoder.text_token match, :content
+          elsif match = scan(/"/)
+            encoder.text_token match, :delimiter
+            encoder.end_group :string
+            state = :initial
+            label_expected = false
+          elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+            encoder.text_token match, :char
+          elsif match = scan(/ \\ /x)
+            encoder.text_token match, :error
+          elsif match = scan(/$/)
+            encoder.end_group :string
+            state = :initial
+            label_expected = false
+          else
+            raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
+          end
+        
+        when :include_expected
+          if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+            encoder.text_token match, :include
+            state = :initial
+          
+          elsif match = scan(/\s+/)
+            encoder.text_token match, :space
+            state = :initial if match.index ?\n
+          
+          else
+            state = :initial
+          
+          end
+        
+        else
+          raise_inspect 'Unknown state', encoder
+        
+        end
+        
+      end
+      
+      if state == :string
+        encoder.end_group :string
+      end
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb
index cf55daf..c64454f 100644
--- a/lib/coderay/scanners/groovy.rb
+++ b/lib/coderay/scanners/groovy.rb
@@ -36,9 +36,12 @@ module Scanners
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     def scan_tokens encoder, options
-      
-      state = :initial
+      state = options[:state] || @state
       inline_block_stack = []
       inline_block_paren_depth = nil
       string_delimiter = nil
@@ -223,7 +226,7 @@ module Scanners
             encoder.text_token match, :content  # TODO: Shouldn't this be :error?
             
           elsif match = scan(/ \\ | \n /x)
-            encoder.end_group state
+            encoder.end_group state == :regexp ? :regexp : :string
             encoder.text_token match, :error
             after_def = value_expected = false
             state = :initial
@@ -243,7 +246,17 @@ module Scanners
       end
       
       if [:multiline_string, :string, :regexp].include? state
-        encoder.end_group state
+        encoder.end_group state == :regexp ? :regexp : :string
+      end
+      
+      if options[:keep_state]
+        @state = state
+      end
+      
+      until inline_block_stack.empty?
+        state, = *inline_block_stack.pop
+        encoder.end_group :inline
+        encoder.end_group state == :regexp ? :regexp : :string
       end
       
       encoder
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 3ba3b79..ebe7b01 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -1,13 +1,13 @@
 module CodeRay
 module Scanners
-
+  
   # HTML Scanner
   # 
   # Alias: +xhtml+
   # 
   # See also: Scanners::XML
   class HTML < Scanner
-
+    
     register_for :html
     
     KINDS_NOT_LOC = [
@@ -33,7 +33,8 @@ module Scanners
     )
     
     IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
-      add(EVENT_ATTRIBUTES, :script)
+      add(EVENT_ATTRIBUTES, :script).
+      add(['style'], :style)
     
     ATTR_NAME = /[\w.:-]+/  # :nodoc:
     TAG_END = /\/?>/  # :nodoc:
@@ -75,9 +76,14 @@ module Scanners
     def scan_java_script encoder, code
       if code && !code.empty?
         @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
-        # encoder.begin_group :inline
         @java_script_scanner.tokenize code, :tokens => encoder
-        # encoder.end_group :inline
+      end
+    end
+    
+    def scan_css encoder, code, state = [:initial]
+      if code && !code.empty?
+        @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true
+        @css_scanner.tokenize code, :tokens => encoder, :state => state
       end
     end
     
@@ -99,7 +105,15 @@ module Scanners
           case state
           
           when :initial
-            if match = scan(/<!--(?:.*?-->|.*)/m)
+            if match = scan(/<!\[CDATA\[/)
+              encoder.text_token match, :inline_delimiter
+              if match = scan(/.*?\]\]>/m)
+                encoder.text_token match[0..-4], :plain
+                encoder.text_token ']]>', :inline_delimiter
+              elsif match = scan(/.+/)
+                encoder.text_token match, :error
+              end
+            elsif match = scan(/<!--(?:.*?-->|.*)/m)
               encoder.text_token match, :comment
             elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
               encoder.text_token match, :doctype
@@ -110,7 +124,7 @@ module Scanners
             elsif match = scan(/<\/[-\w.:]*>?/m)
               in_tag = nil
               encoder.text_token match, :tag
-            elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
+            elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m)
               encoder.text_token match, :tag
               in_tag = self[1]
               if self[2]
@@ -161,17 +175,21 @@ module Scanners
               encoder.text_token match, :attribute_value
               state = :attribute
             elsif match = scan(/["']/)
-              if in_attribute == :script
-                encoder.begin_group :inline
-                encoder.text_token match, :inline_delimiter
+              if in_attribute == :script || in_attribute == :style
+                encoder.begin_group :string
+                encoder.text_token match, :delimiter
                 if scan(/javascript:[ \t]*/)
                   encoder.text_token matched, :comment
                 end
                 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
-                scan_java_script encoder, code
+                if in_attribute == :script
+                  scan_java_script encoder, code
+                else
+                  scan_css encoder, code, [:block]
+                end
                 match = scan(/["']/)
-                encoder.text_token match, :inline_delimiter if match
-                encoder.end_group :inline
+                encoder.text_token match, :delimiter if match
+                encoder.end_group :string
                 state = :attribute
                 in_attribute = nil
               else
@@ -206,19 +224,23 @@ module Scanners
             
           when :in_special_tag
             case in_tag
-            when 'script'
+            when 'script', 'style'
               encoder.text_token match, :space if match = scan(/[ \t]*\n/)
               if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
                 code = self[2] || self[4]
                 closing = self[3]
                 encoder.text_token self[1], :comment
               else
-                code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
+                code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/)
                 closing = false
               end
               unless code.empty?
                 encoder.begin_group :inline
-                scan_java_script encoder, code
+                if in_tag == 'script'
+                  scan_java_script encoder, code
+                else
+                  scan_css encoder, code
+                end
                 encoder.end_group :inline
               end
               encoder.text_token closing, :comment if closing
diff --git a/lib/coderay/scanners/java_script.rb b/lib/coderay/scanners/java_script.rb
index 92e3dfa..9eb0a0a 100644
--- a/lib/coderay/scanners/java_script.rb
+++ b/lib/coderay/scanners/java_script.rb
@@ -54,10 +54,17 @@ module Scanners
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     def scan_tokens encoder, options
       
-      state = :initial
-      string_delimiter = nil
+      state, string_delimiter = options[:state] || @state
+      if string_delimiter
+        encoder.begin_group state
+      end
+      
       value_expected = true
       key_expected = false
       function_expected = false
@@ -72,9 +79,10 @@ module Scanners
             value_expected = true if !value_expected && match.index(?\n)
             encoder.text_token match, :space
             
-          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .*() ) !mx)
             value_expected = true
             encoder.text_token match, :comment
+            state = :open_multi_line_comment if self[1]
             
           elsif check(/\.?\d/)
             key_expected = value_expected = false
@@ -176,19 +184,35 @@ module Scanners
           elsif match = scan(/ \\ | $ /x)
             encoder.end_group state
             encoder.text_token match, :error unless match.empty?
+            string_delimiter = nil
             key_expected = value_expected = false
             state = :initial
           else
             raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
           end
           
+        when :open_multi_line_comment
+          if match = scan(%r! .*? \*/ !mx)
+            state = :initial
+          else
+            match = scan(%r! .+ !mx)
+          end
+          value_expected = true
+          encoder.text_token match, :comment if match
+          
         else
-          raise_inspect 'Unknown state', encoder
+          #:nocov:
+          raise_inspect 'Unknown state: %p' % [state], encoder
+          #:nocov:
           
         end
         
       end
       
+      if options[:keep_state]
+        @state = state, string_delimiter
+      end
+      
       if [:string, :regexp].include? state
         encoder.end_group state
       end
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index 4e0f462..b09970c 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -14,15 +14,21 @@ module Scanners
     
     ESCAPE = / [bfnrt\\"\/] /x  # :nodoc:
     UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x  # :nodoc:
+    KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     # See http://json.org/ for a definition of the JSON lexic/grammar.
     def scan_tokens encoder, options
+      state = options[:state] || @state
       
-      state = :initial
-      stack = []
-      key_expected = false
+      if [:string, :key].include? state
+        encoder.begin_group state
+      end
       
       until eos?
         
@@ -32,18 +38,11 @@ module Scanners
           if match = scan(/ \s+ /x)
             encoder.text_token match, :space
           elsif match = scan(/"/)
-            state = key_expected ? :key : :string
+            state = check(/#{KEY}/o) ? :key : :string
             encoder.begin_group state
             encoder.text_token match, :delimiter
           elsif match = scan(/ [:,\[{\]}] /x)
             encoder.text_token match, :operator
-            case match
-            when ':' then key_expected = false
-            when ',' then key_expected = true if stack.last == :object
-            when '{' then stack << :object; key_expected = true
-            when '[' then stack << :array
-            when '}', ']' then stack.pop  # no error recovery, but works for valid JSON
-            end
           elsif match = scan(/ true | false | null /x)
             encoder.text_token match, :value
           elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
@@ -82,6 +81,10 @@ module Scanners
         end
       end
       
+      if options[:keep_state]
+        @state = state
+      end
+      
       if [:string, :key].include? state
         encoder.end_group state
       end
diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb
new file mode 100644
index 0000000..fb1e45a
--- /dev/null
+++ b/lib/coderay/scanners/lua.rb
@@ -0,0 +1,280 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+  # Scanner for the Lua[http://lua.org] programming lanuage.
+  #
+  # The language’s complete syntax is defined in
+  # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+  # which is what this scanner tries to conform to.
+  class Lua < Scanner
+    
+    register_for :lua
+    file_extension 'lua'
+    title 'Lua'
+    
+    # Keywords used in Lua.
+    KEYWORDS = %w[and break do else elseif end
+      for function goto if in
+      local not or repeat return
+      then until while
+    ]
+    
+    # Constants set by the Lua core.
+    PREDEFINED_CONSTANTS = %w[false true nil]
+    
+    # The expressions contained in this array are parts of Lua’s `basic'
+    # library. Although it’s not entirely necessary to load that library,
+    # it is highly recommended and one would have to provide own implementations
+    # of some of these expressions if one does not do so. They however aren’t
+    # keywords, neither are they constants, but nearly predefined, so they
+    # get tagged as `predefined' rather than anything else.
+    #
+    # This list excludes values of form `_UPPERCASE' because the Lua manual
+    # requires such identifiers to be reserved by Lua anyway and they are
+    # highlighted directly accordingly, without the need for specific
+    # identifiers to be listed here.
+    PREDEFINED_EXPRESSIONS = %w[
+      assert collectgarbage dofile error getmetatable
+      ipairs load loadfile next pairs pcall print
+      rawequal rawget rawlen rawset select setmetatable
+      tonumber tostring type xpcall
+    ]
+    
+    # Automatic token kind selection for normal words.
+    IDENT_KIND = CodeRay::WordList.new(:ident).
+      add(KEYWORDS, :keyword).
+      add(PREDEFINED_CONSTANTS, :predefined_constant).
+      add(PREDEFINED_EXPRESSIONS, :predefined)
+    
+    protected
+    
+    # Scanner initialization.
+    def setup
+      @state = :initial
+      @brace_depth = 0
+    end
+    
+    # CodeRay entry hook. Starts parsing.
+    def scan_tokens(encoder, options)
+      state = options[:state] || @state
+      brace_depth = @brace_depth
+      num_equals = nil
+      
+      until eos?
+        case state
+        
+        when :initial
+          if match = scan(/\-\-\[\=*\[/)   #--[[ long (possibly multiline) comment ]]
+            num_equals = match.count("=") # Number must match for comment end
+            encoder.begin_group(:comment)
+            encoder.text_token(match, :delimiter)
+            state = :long_comment
+          
+          elsif match = scan(/--.*$/) # --Lua comment
+            encoder.text_token(match, :comment)
+          
+          elsif match = scan(/\[=*\[/)     # [[ long (possibly multiline) string ]]
+            num_equals = match.count("=") # Number must match for comment end
+            encoder.begin_group(:string)
+            encoder.text_token(match, :delimiter)
+            state = :long_string
+          
+          elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
+            encoder.text_token(match, :label)
+          
+          elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
+            encoder.text_token(match, :predefined)
+          
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
+            kind = IDENT_KIND[match]
+            
+            # Extra highlighting for entities following certain keywords
+            if kind == :keyword and match == "function"
+              state = :function_expected
+            elsif kind == :keyword and match == "goto"
+              state = :goto_label_expected
+            elsif kind == :keyword and match == "local"
+              state = :local_var_expected
+            end
+            
+            encoder.text_token(match, kind)
+          
+          elsif match = scan(/\{/) # Opening table brace {
+            encoder.begin_group(:map)
+            encoder.text_token(match, brace_depth >= 1 ? :inline_delimiter : :delimiter)
+            brace_depth += 1
+            state        = :map
+          
+          elsif match = scan(/\}/) # Closing table brace }
+            if brace_depth == 1
+              brace_depth = 0
+              encoder.text_token(match, :delimiter)
+              encoder.end_group(:map)
+            elsif brace_depth == 0 # Mismatched brace
+              encoder.text_token(match, :error)
+            else
+              brace_depth -= 1
+              encoder.text_token(match, :inline_delimiter)
+              encoder.end_group(:map)
+              state = :map
+            end
+          
+          elsif match = scan(/["']/) # String delimiters " and '
+            encoder.begin_group(:string)
+            encoder.text_token(match, :delimiter)
+            start_delim = match
+            state       = :string
+          
+                            # ↓Prefix                hex number ←|→ decimal number
+          elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+            encoder.text_token(match, :float)
+          
+                            # ↓Prefix         hex number ←|→ decimal number
+          elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+            encoder.text_token(match, :integer)
+          
+          elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
+            encoder.text_token(match, :operator)
+          
+          elsif match = scan(/\s+/) # Space
+            encoder.text_token(match, :space)
+          
+          else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
+            encoder.text_token(getch, :error)
+          end
+          
+          # It may be that we’re scanning a full-blown subexpression of a table
+          # (tables can contain full expressions in parts).
+          # If this is the case, return to :map scanning state.
+          state = :map if state == :initial && brace_depth >= 1
+        
+        when :function_expected
+          if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
+            encoder.text_token(match, :operator)
+            state = :initial
+          elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+            encoder.text_token(match, :ident)
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
+            encoder.text_token(match, :function)
+            state = :initial
+          elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+            state = :initial
+          end
+        
+        when :goto_label_expected
+          if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+            encoder.text_token(match, :label)
+            state = :initial
+          elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :local_var_expected
+          if match = scan(/function/) # local function ...
+            encoder.text_token(match, :keyword)
+            state = :function_expected
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+            encoder.text_token(match, :local_variable)
+          elsif match = scan(/,/)
+            encoder.text_token(match, :operator)
+          elsif match = scan(/\=/)
+            encoder.text_token(match, :operator)
+            # After encountering the equal sign, arbitrary expressions are
+            # allowed again, so just return to the main state for further
+            # parsing.
+            state = :initial
+          elsif match = scan(/\n/)
+            encoder.text_token(match, :space)
+            state = :initial
+          elsif match = scan(/\s+/)
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :long_comment
+          if match = scan(/.*?(?=\]={#{num_equals}}\])/m)
+            encoder.text_token(match, :content)
+            
+            delim = scan(/\]={#{num_equals}}\]/)
+            encoder.text_token(delim, :delimiter)
+          else # No terminator found till EOF
+            encoder.text_token(rest, :error)
+            terminate
+          end
+          encoder.end_group(:comment)
+          state = :initial
+        
+        when :long_string
+          if match = scan(/.*?(?=\]={#{num_equals}}\])/m) # Long strings do not interpret any escape sequences
+            encoder.text_token(match, :content)
+            
+            delim = scan(/\]={#{num_equals}}\]/)
+            encoder.text_token(delim, :delimiter)
+          else # No terminator found till EOF
+            encoder.text_token(rest, :error)
+            terminate
+          end
+          encoder.end_group(:string)
+          state = :initial
+        
+        when :string
+          if match = scan(/[^\\#{start_delim}\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+            encoder.text_token(match, :content)
+          elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
+            encoder.text_token(match, :char)
+          elsif match = scan(Regexp.compile(start_delim))
+            encoder.text_token(match, :delimiter)
+            encoder.end_group(:string)
+            state = :initial
+          elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
+            encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+            encoder.end_group(:string)
+            state = :initial
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :map
+          if match = scan(/[,;]/)
+            encoder.text_token(match, :operator)
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
+            encoder.text_token(match, :key)
+            encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
+            encoder.text_token(scan(/\=/), :operator)
+            state = :initial
+          elsif match = scan(/\s+/m)
+            encoder.text_token(match, :space)
+          else
+            # Note this clause doesn’t advance the scan pointer, it’s a kind of
+            # "retry with other options" (the :initial state then of course
+            # advances the pointer).
+            state = :initial
+          end
+        else
+          raise
+        end
+        
+      end
+      
+      if options[:keep_state]
+        @state = state
+      end
+      
+      encoder.end_group :string if [:string].include? state
+      brace_depth.times { encoder.end_group :map }
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
index 6c68834..7a8d75d 100644
--- a/lib/coderay/scanners/php.rb
+++ b/lib/coderay/scanners/php.rb
@@ -265,7 +265,7 @@ module Scanners
             @html_scanner.tokenize match unless match.empty?
           end
         
-        when :php
+        when :php, :php_inline
           if match = scan(/\s+/)
             encoder.text_token match, :space
           
@@ -332,7 +332,7 @@ module Scanners
             if states.size == 1
               encoder.text_token match, :error
             else
-              states.pop
+              state = states.pop
               if states.last.is_a?(::Array)
                 delimiter = states.last[1]
                 states[-1] = states.last[0]
@@ -340,6 +340,7 @@ module Scanners
                 encoder.end_group :inline
               else
                 encoder.text_token match, :operator
+                encoder.end_group :inline if state == :php_inline
                 label_expected = true
               end
             end
@@ -350,7 +351,14 @@ module Scanners
           
           elsif match = scan(RE::PHP_END)
             encoder.text_token match, :inline_delimiter
-            states = [:initial]
+            while state = states.pop
+              encoder.end_group :string if [:sqstring, :dqstring].include? state
+              if state.is_a? Array
+                encoder.end_group :inline
+                encoder.end_group :string if [:sqstring, :dqstring].include? state.first
+              end
+            end
+            states << :initial
           
           elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
             encoder.begin_group :string
@@ -400,6 +408,7 @@ module Scanners
           elsif match = scan(/\\/)
             encoder.text_token match, :error
           else
+            encoder.end_group :string
             states.pop
           end
         
@@ -459,7 +468,7 @@ module Scanners
               encoder.begin_group :inline
               states[-1] = [states.last, delimiter]
               delimiter = nil
-              states.push :php
+              states.push :php_inline
               encoder.text_token match, :delimiter
             else
               encoder.text_token match, :content
@@ -469,6 +478,7 @@ module Scanners
           elsif match = scan(/\$/)
             encoder.text_token match, :content
           else
+            encoder.end_group :string
             states.pop
           end
         
@@ -500,6 +510,14 @@ module Scanners
         
       end
       
+      while state = states.pop
+        encoder.end_group :string if [:sqstring, :dqstring].include? state
+        if state.is_a? Array
+          encoder.end_group :inline
+          encoder.end_group :string if [:sqstring, :dqstring].include? state.first
+        end
+      end
+      
       encoder
     end
     
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index a9492ab..09c8b6e 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -157,12 +157,12 @@ module Scanners
             encoder.text_token match, :operator
           
           elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
+            modifiers = self[1]
             string_delimiter = self[2]
-            string_type = docstring_coming ? :docstring : :string
+            string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string)
             docstring_coming = false if docstring_coming
             encoder.begin_group string_type
             string_raw = false
-            modifiers = self[1]
             unless modifiers.empty?
               string_raw = !!modifiers.index(?r)
               encoder.text_token modifiers, :modifier
diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb
index 7a21354..1effdc8 100644
--- a/lib/coderay/scanners/raydebug.rb
+++ b/lib/coderay/scanners/raydebug.rb
@@ -1,23 +1,30 @@
+require 'set'
+
 module CodeRay
 module Scanners
-
-  # = Debug Scanner
+  
+  # = Raydebug Scanner
   # 
-  # Parses the output of the Encoders::Debug encoder.
+  # Highlights the output of the Encoders::Debug encoder.
   class Raydebug < Scanner
-
+    
     register_for :raydebug
     file_extension 'raydebug'
     title 'CodeRay Token Dump'
     
   protected
     
+    def setup
+      super
+      @known_token_kinds = TokenKinds.keys.map(&:to_s).to_set
+    end
+    
     def scan_tokens encoder, options
-
+      
       opened_tokens = []
-
+      
       until eos?
-
+        
         if match = scan(/\s+/)
           encoder.text_token match, :space
           
@@ -26,20 +33,22 @@ module Scanners
           encoder.text_token kind, :class
           encoder.text_token '(', :operator
           match = self[2]
-          encoder.text_token match, kind.to_sym
+          unless match.empty?
+            if @known_token_kinds.include? kind
+              encoder.text_token match, kind.to_sym
+            else
+              encoder.text_token match, :plain
+            end
+          end
           encoder.text_token match, :operator if match = scan(/\)/)
           
         elsif match = scan(/ (\w+) ([<\[]) /x)
-          kind = self[1]
-          case self[2]
-          when '<'
-            encoder.text_token kind, :class
-          when '['
-            encoder.text_token kind, :class
+          encoder.text_token self[1], :class
+          if @known_token_kinds.include? self[1]
+            kind = self[1].to_sym
           else
-            raise 'CodeRay bug: This case should not be reached.'
+            kind = :unknown
           end
-          kind = kind.to_sym
           opened_tokens << kind
           encoder.begin_group kind
           encoder.text_token self[2], :operator
@@ -59,8 +68,8 @@ module Scanners
       
       encoder
     end
-
+    
   end
-
+  
 end
 end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index c5cf1e2..80165ca 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -96,7 +96,7 @@ module Scanners
                                       /#{patterns::METHOD_NAME}/o)
               
               kind = patterns::IDENT_KIND[match]
-              if kind == :ident && value_expected != :colon_expected && scan(/:(?!:)/)
+              if value_expected != :colon_expected && scan(/:(?!:)/)
                 value_expected = true
                 encoder.text_token match, :key
                 encoder.text_token ':',   :operator
@@ -269,7 +269,7 @@ module Scanners
             end
             
             if last_state
-              state = last_state
+              state = last_state unless state.is_a?(StringState)  # otherwise, a simple 'def"' results in unclosed tokens
               last_state = nil
             end
             
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index ed071d2..0b36e13 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -157,13 +157,16 @@ module Scanners
       yield
     ])
     
-    FANCY_STRING_START = / % ( [QqrsWwx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x
+    FANCY_STRING_START = / % ( [iIqQrswWx] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /x
     FANCY_STRING_KIND = Hash.new(:string).merge({
+      'i' => :symbol,
+      'I' => :symbol,
       'r' => :regexp,
       's' => :symbol,
       'x' => :shell,
     })
     FANCY_STRING_INTERPRETED = Hash.new(true).merge({
+      'i' => false,
       'q' => false,
       's' => false,
       'w' => false,
diff --git a/lib/coderay/scanners/ruby/string_state.rb b/lib/coderay/scanners/ruby/string_state.rb
index 2f398d1..28ddd6c 100644
--- a/lib/coderay/scanners/ruby/string_state.rb
+++ b/lib/coderay/scanners/ruby/string_state.rb
@@ -16,7 +16,6 @@ module Scanners
       
       STRING_PATTERN = Hash.new do |h, k|
         delim, interpreted = *k
-        # delim = delim.dup  # workaround for old Ruby
         delim_pattern = Regexp.escape(delim)
         if closing_paren = CLOSING_PAREN[delim]
           delim_pattern << Regexp.escape(closing_paren)
@@ -29,12 +28,13 @@ module Scanners
         #     '| [|?*+(){}\[\].^$]'
         #   end
         
-        h[k] =
-          if interpreted && delim != '#'
-            / (?= [#{delim_pattern}] | \# [{$@] ) /mx
-          else
-            / (?= [#{delim_pattern}] ) /mx
-          end
+        if interpreted && delim != '#'
+          / (?= [#{delim_pattern}] | \# [{$@] ) /mx
+        else
+          / (?= [#{delim_pattern}] ) /mx
+        end.tap do |pattern|
+          h[k] = pattern if (delim.respond_to?(:ord) ? delim.ord : delim[0]) < 256
+        end
       end
       
       def initialize kind, interpreted, delim, heredoc = false
diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb
index 0eb2caa..e3296b9 100644
--- a/lib/coderay/scanners/sass.rb
+++ b/lib/coderay/scanners/sass.rb
@@ -7,13 +7,6 @@ module Scanners
     register_for :sass
     file_extension 'sass'
     
-    SASS_FUNCTION = /(?:inline-image|linear-gradient|color-stops|mix|lighten|darken|rotate|image-url|image-width|image-height|sprite-url|sprite-path|sprite-file|sprite-map|sprite-position|sprite|unquote|join|round|ceil|floor|nth)/
-    
-    STRING_CONTENT_PATTERN = {
-      "'" => /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/,
-      '"' => /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/,
-    }
-    
   protected
     
     def setup
@@ -21,15 +14,28 @@ module Scanners
     end
     
     def scan_tokens encoder, options
-      states = Array(options[:state] || @state)
-      string_delimiter = nil
+      states = Array(options[:state] || @state).dup
+      
+      encoder.begin_group :string if states.last == :sqstring || states.last == :dqstring
       
       until eos?
         
-        if match = scan(/\s+/)
+        if bol? && (match = scan(/(?>( +)?(\/[\*\/])(.+)?)(?=\n)/))
+          encoder.text_token self[1], :space if self[1]
+          encoder.begin_group :comment
+          encoder.text_token self[2], :delimiter
+          encoder.text_token self[3], :content if self[3]
+          if match = scan(/(?:\n+#{self[1]} .*)+/)
+            encoder.text_token match, :content
+          end
+          encoder.end_group :comment
+        elsif match = scan(/\n|[^\n\S]+\n?/)
           encoder.text_token match, :space
-          value_expected = false if match.index(/\n/)
-          
+          if match.index(/\n/)
+            value_expected = false
+            states.pop if states.last == :include
+          end
+        
         elsif states.last == :sass_inline && (match = scan(/\}/))
           encoder.text_token match, :inline_delimiter
           encoder.end_group :inline
@@ -38,16 +44,16 @@ module Scanners
         elsif case states.last
           when :initial, :media, :sass_inline
             if match = scan(/(?>#{RE::Ident})(?!\()/ox)
-              encoder.text_token match, value_expected ? :value : (check(/.*:/) ? :key : :type)
+              encoder.text_token match, value_expected ? :value : (check(/.*:(?![a-z])/) ? :key : :tag)
               next
             elsif !value_expected && (match = scan(/\*/))
-              encoder.text_token match, :type
+              encoder.text_token match, :tag
               next
             elsif match = scan(RE::Class)
               encoder.text_token match, :class
               next
             elsif match = scan(RE::Id)
-              encoder.text_token match, :constant
+              encoder.text_token match, :id
               next
             elsif match = scan(RE::PseudoClass)
               encoder.text_token match, :pseudo_class
@@ -61,7 +67,11 @@ module Scanners
             elsif match = scan(/(\=|@mixin +)#{RE::Ident}/o)
               encoder.text_token match, :function
               next
-            elsif match = scan(/@media/)
+            elsif match = scan(/@import\b/)
+              encoder.text_token match, :directive
+              states << :include
+              next
+            elsif match = scan(/@media\b/)
               encoder.text_token match, :directive
               # states.push :media_before_name
               next
@@ -77,29 +87,34 @@ module Scanners
               next
             end
             
-          when :string
-            if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
+          when :sqstring, :dqstring
+            if match = scan(states.last == :sqstring ? /(?:[^\n\'\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o : /(?:[^\n\"\#]+|\\\n|#{RE::Escape}|#(?!\{))+/o)
               encoder.text_token match, :content
             elsif match = scan(/['"]/)
               encoder.text_token match, :delimiter
               encoder.end_group :string
-              string_delimiter = nil
               states.pop
             elsif match = scan(/#\{/)
               encoder.begin_group :inline
               encoder.text_token match, :inline_delimiter
               states.push :sass_inline
             elsif match = scan(/ \\ | $ /x)
-              encoder.end_group state
+              encoder.end_group states.last
               encoder.text_token match, :error unless match.empty?
               states.pop
             else
-              raise_inspect "else case #{string_delimiter} reached; %p not handled." % peek(1), encoder
+              raise_inspect "else case #{states.last} reached; %p not handled." % peek(1), encoder
+            end
+          
+          when :include
+            if match = scan(/[^\s'",]+/)
+              encoder.text_token match, :include
+              next
             end
           
           else
             #:nocov:
-            raise_inspect 'Unknown state', encoder
+            raise_inspect 'Unknown state: %p' % [states.last], encoder
             #:nocov:
             
           end
@@ -137,20 +152,17 @@ module Scanners
           
         elsif match = scan(/['"]/)
           encoder.begin_group :string
-          string_delimiter = match
           encoder.text_token match, :delimiter
           if states.include? :sass_inline
-            content = scan_until(/(?=#{string_delimiter}|\}|\z)/)
+            # no nesting, just scan the string until delimiter
+            content = scan_until(/(?=#{match}|\}|\z)/)
             encoder.text_token content, :content unless content.empty?
-            encoder.text_token string_delimiter, :delimiter if scan(/#{string_delimiter}/)
+            encoder.text_token match, :delimiter if scan(/#{match}/)
             encoder.end_group :string
           else
-            states.push :string
+            states.push match == "'" ? :sqstring : :dqstring
           end
           
-        elsif match = scan(/#{SASS_FUNCTION}/o)
-          encoder.text_token match, :predefined
-          
         elsif match = scan(/#{RE::Function}/o)
           encoder.begin_group :function
           start = match[/^[-\w]+\(/]
@@ -159,10 +171,13 @@ module Scanners
             encoder.text_token match[start.size..-2], :content
             encoder.text_token ')', :delimiter
           else
-            encoder.text_token match[start.size..-1], :content
+            encoder.text_token match[start.size..-1], :content if start.size < match.size
           end
           encoder.end_group :function
           
+        elsif match = scan(/[a-z][-a-z_]*(?=\()/o)
+          encoder.text_token match, :predefined
+          
         elsif match = scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
           encoder.text_token match, :float
           
@@ -175,7 +190,7 @@ module Scanners
         elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
           encoder.text_token match, :color
           
-        elsif match = scan(/@else if\b|#{RE::AtKeyword}/)
+        elsif match = scan(/@else if\b|#{RE::AtKeyword}/o)
           encoder.text_token match, :directive
           value_expected = true
           
@@ -194,8 +209,18 @@ module Scanners
         
       end
       
+      states.pop if states.last == :include
+      
       if options[:keep_state]
-        @state = states
+        @state = states.dup
+      end
+      
+      while state = states.pop
+        if state == :sass_inline
+          encoder.end_group :inline
+        elsif state == :sqstring || state == :dqstring
+          encoder.end_group :string
+        end
       end
       
       encoder
diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb
index b757278..93aeaf3 100644
--- a/lib/coderay/scanners/sql.rb
+++ b/lib/coderay/scanners/sql.rb
@@ -1,8 +1,9 @@
-module CodeRay module Scanners
+module CodeRay
+module Scanners
   
   # by Josh Goebel
   class SQL < Scanner
-
+    
     register_for :sql
     
     KEYWORDS = %w(
@@ -149,6 +150,7 @@ module CodeRay module Scanners
               string_content = ''
             end
             encoder.text_token match, :error unless match.empty?
+            encoder.end_group :string
             state = :initial
           else
             raise "else case \" reached; %p not handled." % peek(1), encoder
@@ -171,4 +173,5 @@ module CodeRay module Scanners
     
   end
   
-end end
-\ No newline at end of file
+end
+end
diff --git a/lib/coderay/scanners/taskpaper.rb b/lib/coderay/scanners/taskpaper.rb
new file mode 100644
index 0000000..42670bc
--- /dev/null
+++ b/lib/coderay/scanners/taskpaper.rb
@@ -0,0 +1,36 @@
+module CodeRay
+module Scanners
+  
+  class Taskpaper < Scanner
+    
+    register_for :taskpaper
+    file_extension 'taskpaper'
+    
+  protected
+    
+    def scan_tokens encoder, options
+      until eos?
+        if match = scan(/\S.*:.*$/)                  # project
+          encoder.text_token(match, :namespace)
+        elsif match = scan(/-.+@done.*/)             # completed task
+          encoder.text_token(match, :done)
+        elsif match = scan(/-(?:[^@\n]+|@(?!due))*/) # task
+          encoder.text_token(match, :plain)
+        elsif match = scan(/@due.*/)                 # comment
+          encoder.text_token(match, :important)
+        elsif match = scan(/.+/)                     # comment
+          encoder.text_token(match, :comment)
+        elsif match = scan(/\s+/)                    # space
+          encoder.text_token(match, :space)
+        else                                         # other
+          encoder.text_token getch, :error
+        end
+      end
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/scanners/yaml.rb b/lib/coderay/scanners/yaml.rb
index 96f4e93..32c8e2c 100644
--- a/lib/coderay/scanners/yaml.rb
+++ b/lib/coderay/scanners/yaml.rb
@@ -47,7 +47,7 @@ module Scanners
           when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/)
             encoder.begin_group :string
             encoder.text_token match, :delimiter
-            encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
+            encoder.text_token match, :content if (match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)) && !match.empty?
             encoder.text_token match, :delimiter if match = scan(/"/)
             encoder.end_group :string
             next
@@ -84,7 +84,7 @@ module Scanners
           when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/)
             encoder.begin_group :key
             encoder.text_token match[0,1], :delimiter
-            encoder.text_token match[1..-2], :content
+            encoder.text_token match[1..-2], :content if match.size > 2
             encoder.text_token match[-1,1], :delimiter
             encoder.end_group :key
             key_indent = column(pos - match.size) - 1
diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb
index 1f073b6..d304dc4 100644
--- a/lib/coderay/styles/alpha.rb
+++ b/lib/coderay/styles/alpha.rb
@@ -3,14 +3,14 @@ module Styles
   
   # A colorful theme using CSS 3 colors (with alpha channel).
   class Alpha < Style
-
+    
     register_for :alpha
-
+    
     code_background = 'hsl(0,0%,95%)'
     numbers_background = 'hsl(180,65%,90%)'
     border_color = 'silver'
     normal_color = 'black'
-
+    
     CSS_MAIN_STYLES = <<-MAIN  # :nodoc:
 .CodeRay {
   background-color: #{code_background};
@@ -39,6 +39,9 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
   color: gray !important;
   text-decoration: none !important;
 }
+.CodeRay .line-numbers pre {
+  word-break: normal;
+}
 .CodeRay .line-numbers a:target { color: blue !important; }
 .CodeRay .line-numbers .highlighted { color: red !important; }
 .CodeRay .line-numbers .highlighted a { color: red !important; }
@@ -53,25 +56,26 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .annotation { color:#007 }
 .attribute-name { color:#b48 }
 .attribute-value { color:#700 }
-.binary { color:#509 }
+.binary { color:#549 }
+.binary .char { color:#325 }
+.binary .delimiter { color:#325 }
+.char { color:#D20 }
 .char .content { color:#D20 }
 .char .delimiter { color:#710 }
-.char { color:#D20 }
 .class { color:#B06; font-weight:bold }
 .class-variable { color:#369 }
 .color { color:#0A0 }
 .comment { color:#777 }
 .comment .char { color:#444 }
 .comment .delimiter { color:#444 }
-.complex { color:#A08 }
 .constant { color:#036; font-weight:bold }
 .decorator { color:#B0B }
 .definition { color:#099; font-weight:bold }
 .delimiter { color:black }
 .directive { color:#088; font-weight:bold }
-.doc { color:#970 }
-.doc-string { color:#D42; font-weight:bold }
+.docstring { color:#D42; }
 .doctype { color:#34b }
+.done { text-decoration: line-through; color: gray }
 .entity { color:#800; font-weight:bold }
 .error { color:#F00; background-color:#FAA }
 .escape  { color:#666 }
@@ -81,51 +85,55 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .function .delimiter { color:#024; font-weight:bold }
 .global-variable { color:#d70 }
 .hex { color:#02b }
-.imaginary { color:#f00 }
+.id  { color:#33D; font-weight:bold }
 .include { color:#B44; font-weight:bold }
 .inline { background-color: hsla(0,0%,0%,0.07); color: black }
 .inline-delimiter { font-weight: bold; color: #666 }
 .instance-variable { color:#33B }
 .integer  { color:#00D }
+.imaginary { color:#f00 }
 .important { color:#D00 }
+.key { color: #606 }
 .key .char { color: #60f }
 .key .delimiter { color: #404 }
-.key { color: #606 }
 .keyword { color:#080; font-weight:bold }
 .label { color:#970; font-weight:bold }
-.local-variable { color:#963 }
+.local-variable { color:#950 }
+.map .content { color:#808 }
+.map .delimiter { color:#40A}
+.map { background-color:hsla(200,100%,50%,0.06); }
 .namespace { color:#707; font-weight:bold }
 .octal { color:#40E }
 .operator { }
 .predefined { color:#369; font-weight:bold }
 .predefined-constant { color:#069 }
-.predefined-type { color:#0a5; font-weight:bold }
+.predefined-type { color:#0a8; font-weight:bold }
 .preprocessor { color:#579 }
 .pseudo-class { color:#00C; font-weight:bold }
+.regexp { background-color:hsla(300,100%,50%,0.06); }
 .regexp .content { color:#808 }
 .regexp .delimiter { color:#404 }
 .regexp .modifier { color:#C2C }
-.regexp { background-color:hsla(300,100%,50%,0.06); }
 .reserved { color:#080; font-weight:bold }
+.shell { background-color:hsla(120,100%,50%,0.06); }
 .shell .content { color:#2B2 }
 .shell .delimiter { color:#161 }
-.shell { background-color:hsla(120,100%,50%,0.06); }
+.string { background-color:hsla(0,100%,50%,0.05); }
 .string .char { color: #b0b }
 .string .content { color: #D20 }
 .string .delimiter { color: #710 }
 .string .modifier { color: #E40 }
-.string { background-color:hsla(0,100%,50%,0.05); }
-.symbol .content { color:#A60 }
-.symbol .delimiter { color:#630 }
 .symbol { color:#A60 }
-.tag { color:#070 }
+.symbol .content { color:#A60 }
+.symbol .delimiter { color:#740 }
+.tag { color:#070; font-weight:bold }
 .type { color:#339; font-weight:bold }
-.value { color: #088; }
-.variable  { color:#037 }
+.value { color: #088 }
+.variable { color:#037 }
 
 .insert { background: hsla(120,100%,50%,0.12) }
 .delete { background: hsla(0,100%,50%,0.12) }
-.change { color: #bbf; background: #007; }
+.change { color: #bbf; background: #007 }
 .head { color: #f8f; background: #505 }
 .head .filename { color: white; }
 
@@ -137,8 +145,8 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .change .change { color: #88f }
 .head .head { color: #f4f }
     TOKENS
-
+    
   end
-
+  
 end
 end
diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb
index 8db8530..f911862 100755
--- a/lib/coderay/token_kinds.rb
+++ b/lib/coderay/token_kinds.rb
@@ -1,88 +1,85 @@
 module CodeRay
   
   # A Hash of all known token kinds and their associated CSS classes.
-  TokenKinds = Hash.new do |h, k|
-    warn 'Undefined Token kind: %p' % [k] if $CODERAY_DEBUG
-    false
-  end
+  TokenKinds = Hash.new(false)
   
   # speedup
   TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity
   
   TokenKinds.update(  # :nodoc:
-    :annotation          => 'annotation',
-    :attribute_name      => 'attribute-name',
-    :attribute_value     => 'attribute-value',
-    :binary              => 'bin',
-    :char                => 'char',
-    :class               => 'class',
-    :class_variable      => 'class-variable',
-    :color               => 'color',
-    :comment             => 'comment',
-    :complex             => 'complex',
-    :constant            => 'constant',
-    :content             => 'content',
-    :debug               => 'debug',
-    :decorator           => 'decorator',
-    :definition          => 'definition',
-    :delimiter           => 'delimiter',
-    :directive           => 'directive',
-    :doc                 => 'doc',
-    :doctype             => 'doctype',
-    :doc_string          => 'doc-string',
-    :entity              => 'entity',
-    :error               => 'error',
-    :escape              => 'escape',
-    :exception           => 'exception',
-    :filename            => 'filename',
-    :float               => 'float',
-    :function            => 'function',
-    :global_variable     => 'global-variable',
-    :hex                 => 'hex',
-    :imaginary           => 'imaginary',
-    :important           => 'important',
-    :include             => 'include',
-    :inline              => 'inline',
-    :inline_delimiter    => 'inline-delimiter',
-    :instance_variable   => 'instance-variable',
-    :integer             => 'integer',
-    :key                 => 'key',
-    :keyword             => 'keyword',
-    :label               => 'label',
-    :local_variable      => 'local-variable',
-    :modifier            => 'modifier',
-    :namespace           => 'namespace',
-    :octal               => 'octal',
-    :predefined          => 'predefined',
-    :predefined_constant => 'predefined-constant',
-    :predefined_type     => 'predefined-type',
-    :preprocessor        => 'preprocessor',
-    :pseudo_class        => 'pseudo-class',
-    :regexp              => 'regexp',
-    :reserved            => 'reserved',
-    :shell               => 'shell',
-    :string              => 'string',
-    :symbol              => 'symbol',
-    :tag                 => 'tag',
-    :type                => 'type',
-    :value               => 'value',
-    :variable            => 'variable',
+    :debug               => 'debug',              # highlight for debugging (white on blue background)
     
-    :change              => 'change',
-    :delete              => 'delete',
-    :head                => 'head',
-    :insert              => 'insert',
+    :annotation          => 'annotation',         # Groovy, Java
+    :attribute_name      => 'attribute-name',     # HTML, CSS
+    :attribute_value     => 'attribute-value',    # HTML
+    :binary              => 'binary',             # Python, Ruby
+    :char                => 'char',               # most scanners, also inside of strings
+    :class               => 'class',              # lots of scanners, for different purposes also in CSS
+    :class_variable      => 'class-variable',     # Ruby, YAML
+    :color               => 'color',              # CSS
+    :comment             => 'comment',            # most scanners
+    :constant            => 'constant',           # PHP, Ruby
+    :content             => 'content',            # inside of strings, most scanners
+    :decorator           => 'decorator',          # Python
+    :definition          => 'definition',         # CSS
+    :delimiter           => 'delimiter',          # inside strings, comments and other types
+    :directive           => 'directive',          # lots of scanners
+    :doctype             => 'doctype',            # Goorvy, HTML, Ruby, YAML
+    :docstring           => 'docstring',          # Python
+    :done                => 'done',               # Taskpaper
+    :entity              => 'entity',             # HTML
+    :error               => 'error',              # invalid token, most scanners
+    :escape              => 'escape',             # Ruby (string inline variables like #$foo, #@bar)
+    :exception           => 'exception',          # Java, PHP, Python
+    :filename            => 'filename',           # Diff
+    :float               => 'float',              # most scanners
+    :function            => 'function',           # CSS, JavaScript, PHP
+    :global_variable     => 'global-variable',    # Ruby, YAML
+    :hex                 => 'hex',                # hexadecimal number; lots of scanners
+    :id                  => 'id',                 # CSS
+    :imaginary           => 'imaginary',          # Python
+    :important           => 'important',          # CSS, Taskpaper
+    :include             => 'include',            # C, Groovy, Java, Python, Sass
+    :inline              => 'inline',             # nested code, eg. inline string evaluation; lots of scanners
+    :inline_delimiter    => 'inline-delimiter',   # used instead of :inline > :delimiter FIXME: Why use inline_delimiter?
+    :instance_variable   => 'instance-variable',  # Ruby
+    :integer             => 'integer',            # most scanners
+    :key                 => 'key',                # lots of scanners, used together with :value
+    :keyword             => 'keyword',            # reserved word that's actually implemented; most scanners
+    :label               => 'label',              # C, PHP
+    :local_variable      => 'local-variable',     # local and magic variables; some scanners
+    :map                 => 'map',                # Lua tables
+    :modifier            => 'modifier',           # used inside on strings; lots of scanners
+    :namespace           => 'namespace',          # Clojure, Java, Taskpaper
+    :octal               => 'octal',              # lots of scanners
+    :predefined          => 'predefined',         # predefined function: lots of scanners
+    :predefined_constant => 'predefined-constant',# lots of scanners
+    :predefined_type     => 'predefined-type',    # C, Java, PHP
+    :preprocessor        => 'preprocessor',       # C, Delphi, HTML
+    :pseudo_class        => 'pseudo-class',       # CSS
+    :regexp              => 'regexp',             # Groovy, JavaScript, Ruby
+    :reserved            => 'reserved',           # most scanners
+    :shell               => 'shell',              # Ruby
+    :string              => 'string',             # most scanners
+    :symbol              => 'symbol',             # Clojure, Ruby, YAML
+    :tag                 => 'tag',                # CSS, HTML
+    :type                => 'type',               # CSS, Java, SQL, YAML
+    :value               => 'value',              # used together with :key; CSS, JSON, YAML
+    :variable            => 'variable',           # Sass, SQL, YAML
     
-    :eyecatcher          => 'eyecatcher',
+    :change              => 'change',             # Diff
+    :delete              => 'delete',             # Diff
+    :head                => 'head',               # Diff, YAML
+    :insert              => 'insert',             # Diff
+    :eyecatcher          => 'eyecatcher',         # Diff
     
-    :ident               => false,
-    :operator            => false,
+    :ident               => false,                # almost all scanners
+    :operator            => false,                # almost all scanners
     
-    :space               => false,
-    :plain               => false
+    :space               => false,                # almost all scanners
+    :plain               => false                 # almost all scanners
   )
   
-  TokenKinds[:method]    = TokenKinds[:function]
-  TokenKinds[:escape]    = TokenKinds[:delimiter]
-  TokenKinds[:docstring] = TokenKinds[:comment]
+  TokenKinds[:method]  = TokenKinds[:function]
+  TokenKinds[:unknown] = TokenKinds[:plain]
 end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 6957d69..e7bffce 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,55 +1,43 @@
 module CodeRay
   
-  # GZip library for writing and reading token dumps.
-  autoload :GZip, coderay_path('helpers', 'gzip')
-  
-  # = Tokens  TODO: Rewrite!
-  #
-  # The Tokens class represents a list of tokens returnd from
-  # a Scanner.
+  # The Tokens class represents a list of tokens returned from
+  # a Scanner. It's actually just an Array with a few helper methods.
   #
-  # A token is not a special object, just a two-element Array
-  # consisting of
+  # A token itself is not a special object, just two elements in an Array:
   # * the _token_ _text_ (the original source of the token in a String) or
   #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
   # * the _token_ _kind_ (a Symbol representing the type of the token)
   #
-  # A token looks like this:
+  # It looks like this:
   #
-  #   ['# It looks like this', :comment]
-  #   ['3.1415926', :float]
-  #   ['$^', :error]
+  #   ..., '# It looks like this', :comment, ...
+  #   ..., '3.1415926', :float, ...
+  #   ..., '$^', :error, ...
   #
   # Some scanners also yield sub-tokens, represented by special
-  # token actions, namely begin_group and end_group.
+  # token actions, for example :begin_group and :end_group.
   #
   # The Ruby scanner, for example, splits "a string" into:
   #
   #  [
-  #   [:begin_group, :string],
-  #   ['"', :delimiter],
-  #   ['a string', :content],
-  #   ['"', :delimiter],
-  #   [:end_group, :string]
+  #   :begin_group, :string,
+  #   '"',          :delimiter,
+  #   'a string',   :content,
+  #   '"',          :delimiter,
+  #   :end_group,   :string
   #  ]
   #
-  # Tokens is the interface between Scanners and Encoders:
-  # The input is split and saved into a Tokens object. The Encoder
-  # then builds the output from this object.
-  #
-  # Thus, the syntax below becomes clear:
+  # Tokens can be used to save the output of a Scanners in a simple
+  # Ruby object that can be send to an Encoder later:
   #
-  #   CodeRay.scan('price = 2.59', :ruby).html
-  #   # the Tokens object is here -------^
-  #
-  # See how small it is? ;)
+  #   tokens = CodeRay.scan('price = 2.59', :ruby).tokens
+  #   tokens.encode(:html)
+  #   tokens.html
+  #   CodeRay.encoder(:html).encode_tokens(tokens)
   #
   # Tokens gives you the power to handle pre-scanned code very easily:
-  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
-  # that you put in your DB.
-  # 
-  # It also allows you to generate tokens directly (without using a scanner),
-  # to load them from a file, and still use any Encoder that CodeRay provides.
+  # You can serialize it to a JSON string and store it in a database, pass it
+  # around to encode it more than once, send it to other algorithms...
   class Tokens < Array
     
     # The Scanner instance that created the tokens.
@@ -58,8 +46,7 @@ module CodeRay
     # Encode the tokens using encoder.
     #
     # encoder can be
-    # * a symbol like :html oder :statistic
-    # * an Encoder class
+    # * a plugin name like :html oder 'statistic'
     # * an Encoder object
     #
     # options are passed to the encoder.
@@ -157,53 +144,11 @@ module CodeRay
       parts
     end
     
-    # Dumps the object into a String that can be saved
-    # in files or databases.
-    #
-    # The dump is created with Marshal.dump;
-    # In addition, it is gzipped using GZip.gzip.
-    #
-    # The returned String object includes Undumping
-    # so it has an #undump method. See Tokens.load.
-    #
-    # You can configure the level of compression,
-    # but the default value 7 should be what you want
-    # in most cases as it is a good compromise between
-    # speed and compression rate.
-    #
-    # See GZip module.
-    def dump gzip_level = 7
-      dump = Marshal.dump self
-      dump = GZip.gzip dump, gzip_level
-      dump.extend Undumping
-    end
-    
     # Return the actual number of tokens.
     def count
       size / 2
     end
     
-    # Include this module to give an object an #undump
-    # method.
-    #
-    # The string returned by Tokens.dump includes Undumping.
-    module Undumping
-      # Calls Tokens.load with itself.
-      def undump
-        Tokens.load self
-      end
-    end
-    
-    # Undump the object using Marshal.load, then
-    # unzip it using GZip.gunzip.
-    #
-    # The result is commonly a Tokens object, but
-    # this is not guaranteed.
-    def Tokens.load dump
-      dump = GZip.gunzip dump
-      @dump = Marshal.load dump
-    end
-    
     alias text_token push
     def begin_group kind; push :begin_group, kind end
     def end_group kind; push :end_group, kind end