Merge branch 'master' into go-scanner

Conflicts: lib/coderay/helpers/file_type.rb
author: Kornelius Kalnbach <murphy@rubychan.de> 2013-06-23 16:06:02 +0200
committer: Kornelius Kalnbach <murphy@rubychan.de> 2013-06-23 16:06:02 +0200
commit: 0013b649f714f23eef0859921fa7804ca7caef76 (patch)
tree: 7c278ee7c420729b4738fe2a195e529ffd2bb6da /lib/coderay
parent: addcbd446066d0da1627112814e3ce1b8d404da0 (diff)
parent: 64ca2ae8ad5130bdcf652aa7aa08298de00f20f4 (diff)
download: coderay-0013b649f714f23eef0859921fa7804ca7caef76.tar.gz
24 files changed, 742 insertions, 415 deletions
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index c03d3fb..f4db330 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -9,7 +9,6 @@ module Encoders
   #
   # You cannot fully restore the tokens information from the
   # output, because consecutive :space tokens are merged.
-  # Use Tokens#dump for caching purposes.
   # 
   # See also: Scanners::Debug
   class Debug < Encoder
@@ -18,38 +17,26 @@ module Encoders
     
     FILE_EXTENSION = 'raydebug'
     
-    def initialize options = {}
-      super
-      @opened = []
-    end
-    
     def text_token text, kind
-      raise 'empty token' if $CODERAY_DEBUG && text.empty?
       if kind == :space
         @out << text
       else
-        # TODO: Escape (
-        text = text.gsub(/[)\\]/, '\\\\\0') if text.index(/[)\\]/)
-        @out << kind.to_s << '(' << text << ')'
+        text = text.gsub('\\', '\\\\\\\\') if text.index('\\')
+        text = text.gsub(')',  '\\\\)')    if text.index(')')
+        @out << "#{kind}(#{text})"
       end
     end
     
     def begin_group kind
-      @opened << kind
-      @out << kind.to_s << '<'
+      @out << "#{kind}<"
     end
     
     def end_group kind
-      if @opened.last != kind
-        puts @out
-        raise "we are inside #{@opened.inspect}, not #{kind}"
-      end
-      @opened.pop
       @out << '>'
     end
     
     def begin_line kind
-      @out << kind.to_s << '['
+      @out << "#{kind}["
     end
     
     def end_line kind
diff --git a/lib/coderay/encoders/debug_lint.rb b/lib/coderay/encoders/debug_lint.rb
new file mode 100644
index 0000000..17a0795
--- /dev/null
+++ b/lib/coderay/encoders/debug_lint.rb
@@ -0,0 +1,64 @@
+module CodeRay
+module Encoders
+  
+  # = Debug Lint Encoder
+  #
+  # Debug encoder with additional checks for:
+  # 
+  # - empty tokens
+  # - incorrect nesting
+  # 
+  # It will raise an InvalidTokenStream exception when any of the above occurs.
+  # 
+  # See also: Encoders::Debug
+  class DebugLint < Debug
+    
+    register_for :debug_lint
+    
+    InvalidTokenStream = Class.new StandardError
+    EmptyToken = Class.new InvalidTokenStream
+    IncorrectTokenGroupNesting = Class.new InvalidTokenStream
+    
+    def text_token text, kind
+      raise EmptyToken, 'empty token' if text.empty?
+      super
+    end
+    
+    def begin_group kind
+      @opened << kind
+      super
+    end
+    
+    def end_group kind
+      raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_group)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+      @opened.pop
+      super
+    end
+    
+    def begin_line kind
+      @opened << kind
+      super
+    end
+    
+    def end_line kind
+      raise IncorrectTokenGroupNesting, 'We are inside %s, not %p (end_line)' % [@opened.reverse.map(&:inspect).join(' < '), kind] if @opened.last != kind
+      @opened.pop
+      super
+    end
+    
+    protected
+    
+    def setup options
+      super
+      @opened = []
+    end
+    
+    def finish options
+      raise 'Some tokens still open at end of token stream: %p' % [@opened] unless @opened.empty?
+      super
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index b897f5e..20f2409 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -193,7 +193,6 @@ module Encoders
     
     def finish options
       unless @opened.empty?
-        warn '%d tokens still open: %p' % [@opened.size, @opened] if $CODERAY_DEBUG
         @out << '</span>' while @opened.pop
         @last_opened = nil
       end
diff --git a/lib/coderay/encoders/html/numbering.rb b/lib/coderay/encoders/html/numbering.rb
index 332145b..a1b9c04 100644
--- a/lib/coderay/encoders/html/numbering.rb
+++ b/lib/coderay/encoders/html/numbering.rb
@@ -26,7 +26,7 @@ module Encoders
               "<a href=\"##{anchor}\" name=\"#{anchor}\">#{line}</a>"
             end
           else
-            proc { |line| line.to_s }  # :to_s.to_proc in Ruby 1.8.7+
+            :to_s.to_proc
           end
         
         bold_every = options[:bold_every]
@@ -75,7 +75,7 @@ module Encoders
           line_number = start
           output.gsub!(/^.*$\n?/) do |line|
             line_number_text = bolding.call line_number
-            indent = ' ' * (max_width - line_number.to_s.size)  # TODO: Optimize (10^x)
+            indent = ' ' * (max_width - line_number.to_s.size)
             line_number += 1
             "<span class=\"line-numbers\">#{indent}#{line_number_text}</span>#{line}"
           end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index 2315d9e..b2f8b83 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -67,7 +67,6 @@ Token Types (%d):
       @type_stats['TOTAL'].count += 1
     end
     
-    # TODO Hierarchy handling
     def begin_group kind
       block_token ':begin_group', kind
     end
diff --git a/lib/coderay/encoders/terminal.rb b/lib/coderay/encoders/terminal.rb
index 9894b91..c7ae014 100644
--- a/lib/coderay/encoders/terminal.rb
+++ b/lib/coderay/encoders/terminal.rb
@@ -19,105 +19,135 @@ module CodeRay
       register_for :terminal
       
       TOKEN_COLORS = {
-        :annotation => "\e[35m",
-        :attribute_name => "\e[33m",
+        :debug => "\e[1;37;44m",
+        
+        :annotation => "\e[34m",
+        :attribute_name => "\e[35m",
         :attribute_value => "\e[31m",
-        :binary => "\e[1;35m",
+        :binary => {
+          :self => "\e[31m",
+          :char => "\e[1;31m",
+          :delimiter => "\e[1;31m",
+        },
         :char => {
-          :self => "\e[36m", :delimiter => "\e[1;34m"
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m"
         },
-        :class => "\e[1;35m",
+        :class => "\e[1;35;4m",
         :class_variable => "\e[36m",
         :color => "\e[32m",
-        :comment => "\e[37m",
-        :complex => "\e[1;34m",
-        :constant => "\e[1;34m\e[4m",
-        :decoration => "\e[35m",
-        :definition => "\e[1;32m",
-        :directive => "\e[32m\e[4m",
-        :doc => "\e[46m",
-        :doctype => "\e[1;30m",
-        :docstring => "\e[31m\e[4m",
-        :entity => "\e[33m",
-        :error => "\e[1;33m\e[41m",
+        :comment => {
+          :self => "\e[1;30m",
+          :char => "\e[37m",
+          :delimiter => "\e[37m",
+        },
+        :constant => "\e[1;34;4m",
+        :decorator => "\e[35m",
+        :definition => "\e[1;33m",
+        :directive => "\e[33m",
+        :docstring => "\e[31m",
+        :doctype => "\e[1;34m",
+        :done => "\e[1;30;2m",
+        :entity => "\e[31m",
+        :error => "\e[1;37;41m",
         :exception => "\e[1;31m",
         :float => "\e[1;35m",
         :function => "\e[1;34m",
-        :global_variable => "\e[42m",
+        :global_variable => "\e[1;32m",
         :hex => "\e[1;36m",
-        :include => "\e[33m",
+        :id => "\e[1;34m",
+        :include => "\e[31m",
         :integer => "\e[1;34m",
-        :key => "\e[35m",
-        :label => "\e[1;15m",
+        :imaginary => "\e[1;34m",
+        :important => "\e[1;31m",
+        :key => {
+          :self => "\e[35m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;35m",
+        },
+        :keyword => "\e[32m",
+        :label => "\e[1;33m",
         :local_variable => "\e[33m",
-        :octal => "\e[1;35m",
-        :operator_name => "\e[1;29m",
+        :namespace => "\e[1;35m",
+        :octal => "\e[1;34m",
+        :predefined => "\e[36m",
         :predefined_constant => "\e[1;36m",
-        :predefined_type => "\e[1;30m",
-        :predefined => "\e[4m\e[1;34m",
-        :preprocessor => "\e[36m",
+        :predefined_type => "\e[1;32m",
+        :preprocessor => "\e[1;36m",
         :pseudo_class => "\e[1;34m",
         :regexp => {
-          :self => "\e[31m",
-          :content => "\e[31m",
-          :delimiter => "\e[1;29m",
+          :self => "\e[35m",
+          :delimiter => "\e[1;35m",
           :modifier => "\e[35m",
+          :char => "\e[1;35m",
         },
-        :reserved => "\e[1;31m",
+        :reserved => "\e[32m",
         :shell => {
-          :self => "\e[42m",
-          :content => "\e[1;29m",
-          :delimiter => "\e[37m",
+          :self => "\e[33m",
+          :char => "\e[1;33m",
+          :delimiter => "\e[1;33m",
+          :escape => "\e[1;33m",
         },
         :string => {
-          :self => "\e[32m",
-          :modifier => "\e[1;32m",
-          :escape => "\e[1;36m",
-          :delimiter => "\e[1;32m",
-          :char => "\e[1;36m",
+          :self => "\e[31m",
+          :modifier => "\e[1;31m",
+          :char => "\e[1;35m",
+          :delimiter => "\e[1;31m",
+          :escape => "\e[1;31m",
+        },
+        :symbol => {
+          :self => "\e[33m",
+          :delimiter => "\e[1;33m",
         },
-        :symbol => "\e[1;32m",
-        :tag => "\e[1;34m",
+        :tag => "\e[32m",
         :type => "\e[1;34m",
         :value => "\e[36m",
-        :variable => "\e[1;34m",
+        :variable => "\e[34m",
         
-        :insert => "\e[42m",
-        :delete => "\e[41m",
-        :change => "\e[44m",
-        :head => "\e[45m"
+        :insert => {
+          :self => "\e[42m",
+          :insert => "\e[1;32;42m",
+          :eyecatcher => "\e[102m",
+        },
+        :delete => {
+          :self => "\e[41m",
+          :delete => "\e[1;31;41m",
+          :eyecatcher => "\e[101m",
+        },
+        :change => {
+          :self => "\e[44m",
+          :change => "\e[37;44m",
+        },
+        :head => {
+          :self => "\e[45m",
+          :filename => "\e[37;45m"
+        },
       }
+      
       TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
       TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
-      TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
-      TOKEN_COLORS[:begin_group] = TOKEN_COLORS[:end_group] =
-        TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
+      TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
       
     protected
       
       def setup(options)
         super
         @opened = []
-        @subcolors = nil
+        @color_scopes = [TOKEN_COLORS]
       end
       
     public
       
       def text_token text, kind
-        if color = (@subcolors || TOKEN_COLORS)[kind]
-          if Hash === color
-            if color[:self]
-              color = color[:self]
-            else
-              @out << text
-              return
-            end
-          end
+        if color = @color_scopes.last[kind]
+          color = color[:self] if color.is_a? Hash
           
           @out << color
-          @out << text.gsub("\n", "\e[0m\n" + color)
+          @out << (text.index("\n") ? text.gsub("\n", "\e[0m\n" + color) : text)
           @out << "\e[0m"
-          @out << @subcolors[:self] if @subcolors
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         else
           @out << text
         end
@@ -130,40 +160,33 @@ module CodeRay
       alias begin_line begin_group
       
       def end_group kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
+        if @opened.pop
+          @color_scopes.pop
           @out << "\e[0m"
-          @out << open_token(@opened.last)
+          if outer_color = @color_scopes.last[:self]
+            @out << outer_color
+          end
         end
       end
       
       def end_line kind
-        if @opened.empty?
-          # nothing to close
-        else
-          @opened.pop
-          # whole lines to be highlighted,
-          # eg. added/modified/deleted lines in a diff
-          @out << (@line_filler ||= "\t" * 100 + "\e[0m")
-          @out << open_token(@opened.last)
-        end
+        @out << (@line_filler ||= "\t" * 100)
+        end_group kind
       end
       
     private
       
       def open_token kind
-        if color = TOKEN_COLORS[kind]
-          if Hash === color
-            @subcolors = color
+        if color = @color_scopes.last[kind]
+          if color.is_a? Hash
+            @color_scopes << color
             color[:self]
           else
-            @subcolors = {}
+            @color_scopes << @color_scopes.last
             color
           end
         else
-          @subcolors = nil
+          @color_scopes << @color_scopes.last
           ''
         end
       end
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index 1a43924..9c36b62 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -77,55 +77,58 @@ module CodeRay
     end
     
     TypeFromExt = {
-      'c'        => :c,
-      'cfc'      => :xml,
-      'cfm'      => :xml,
-      'clj'      => :clojure,
-      'css'      => :css,
-      'diff'     => :diff,
-      'dpr'      => :delphi,
-      'erb'      => :erb,
-      'gemspec'  => :ruby,
-      'go'       => :go,
-      'groovy'   => :groovy,
-      'gvy'      => :groovy,
-      'h'        => :c,
-      'haml'     => :haml,
-      'htm'      => :html,
-      'html'     => :html,
-      'html.erb' => :erb,
-      'java'     => :java,
-      'js'       => :java_script,
-      'json'     => :json,
-      'mab'      => :ruby,
-      'pas'      => :delphi,
-      'patch'    => :diff,
-      'phtml'    => :php,
-      'php'      => :php,
-      'php3'     => :php,
-      'php4'     => :php,
-      'php5'     => :php,
-      'prawn'    => :ruby,
-      'py'       => :python,
-      'py3'      => :python,
-      'pyw'      => :python,
-      'rake'     => :ruby,
-      'raydebug' => :raydebug,
-      'rb'       => :ruby,
-      'rbw'      => :ruby,
-      'rhtml'    => :erb,
-      'rjs'      => :ruby,
-      'rpdf'     => :ruby,
-      'ru'       => :ruby,
-      'rxml'     => :ruby,
-      'sass'     => :sass,
-      'sql'      => :sql,
-      'tmproj'   => :xml,
-      'xaml'     => :xml,
-      'xhtml'    => :html,
-      'xml'      => :xml,
-      'yaml'     => :yaml,
-      'yml'      => :yaml,
+      'c'         => :c,
+      'cfc'       => :xml,
+      'cfm'       => :xml,
+      'clj'       => :clojure,
+      'css'       => :css,
+      'diff'      => :diff,
+      'dpr'       => :delphi,
+      'erb'       => :erb,
+      'gemspec'   => :ruby,
+      'go'        => :go, 
+      'groovy'    => :groovy,
+      'gvy'       => :groovy,
+      'h'         => :c,
+      'haml'      => :haml,
+      'htm'       => :html,
+      'html'      => :html,
+      'html.erb'  => :erb,
+      'java'      => :java,
+      'js'        => :java_script,
+      'json'      => :json,
+      'lua'       => :lua,
+      'mab'       => :ruby,
+      'pas'       => :delphi,
+      'patch'     => :diff,
+      'phtml'     => :php,
+      'php'       => :php,
+      'php3'      => :php,
+      'php4'      => :php,
+      'php5'      => :php,
+      'prawn'     => :ruby,
+      'py'        => :python,
+      'py3'       => :python,
+      'pyw'       => :python,
+      'rake'      => :ruby,
+      'raydebug'  => :raydebug,
+      'rb'        => :ruby,
+      'rbw'       => :ruby,
+      'rhtml'     => :erb,
+      'rjs'       => :ruby,
+      'rpdf'      => :ruby,
+      'ru'        => :ruby,
+      'rxml'      => :ruby,
+      'sass'      => :sass,
+      'sql'       => :sql,
+      'taskpaper' => :taskpaper,
+      'template'  => :json,  # AWS CloudFormation template
+      'tmproj'    => :xml,
+      'xaml'      => :xml,
+      'xhtml'     => :html,
+      'xml'       => :xml,
+      'yaml'      => :yaml,
+      'yml'       => :yaml,
     }
     for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
       TypeFromExt[cpp_alias] = :cpp
diff --git a/lib/coderay/helpers/gzip.rb b/lib/coderay/helpers/gzip.rb
deleted file mode 100644
index 245014a..0000000
--- a/lib/coderay/helpers/gzip.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-module CodeRay
-  
-  # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
-  module GZip
-    
-    require 'zlib'
-    
-    # The default zipping level. 7 zips good and fast.
-    DEFAULT_GZIP_LEVEL = 7
-    
-    # Unzips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   print GZip.gunzip(File.read('adresses.gz'))
-    def GZip.gunzip s
-      Zlib::Inflate.inflate s
-    end
-    
-    # Zips the given string +s+.
-    #
-    # Example:
-    #   require 'gzip_simple'
-    #   File.open('adresses.gz', 'w') do |file
-    #     file.write GZip.gzip('Mum: 0123 456 789', 9)
-    #   end
-    #
-    # If you provide a +level+, you can control how strong
-    # the string is compressed:
-    # - 0: no compression, only convert to gzip format
-    # - 1: compress fast
-    # - 7: compress more, but still fast (default)
-    # - 8: compress more, slower
-    # - 9: compress best, very slow
-    def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
-      Zlib::Deflate.new(level).deflate s, Zlib::FINISH
-    end
-    
-  end
-  
-end
diff --git a/lib/coderay/scanners/css.rb b/lib/coderay/scanners/css.rb
index 732f9c5..9ed4618 100644
--- a/lib/coderay/scanners/css.rb
+++ b/lib/coderay/scanners/css.rb
@@ -145,10 +145,10 @@ module Scanners
           start = match[/^\w+\(/]
           encoder.text_token start, :delimiter
           if match[-1] == ?)
-            encoder.text_token match[start.size..-2], :content
+            encoder.text_token match[start.size..-2], :content if match.size > start.size + 1
             encoder.text_token ')', :delimiter
           else
-            encoder.text_token match[start.size..-1], :content
+            encoder.text_token match[start.size..-1], :content if match.size > start.size
           end
           encoder.end_group :function
           
diff --git a/lib/coderay/scanners/diff.rb b/lib/coderay/scanners/diff.rb
index af0f755..fd1aed6 100644
--- a/lib/coderay/scanners/diff.rb
+++ b/lib/coderay/scanners/diff.rb
@@ -69,7 +69,7 @@ module Scanners
             state = :added
           elsif match = scan(/\\ .*/)
             encoder.text_token match, :comment
-          elsif match = scan(/@@(?>[^@\n]*)@@/)
+          elsif match = scan(/@@(?>[^@\n]+)@@/)
             content_scanner.state = :initial unless match?(/\n\+/)
             content_scanner_entry_state = nil
             if check(/\n|$/)
diff --git a/lib/coderay/scanners/groovy.rb b/lib/coderay/scanners/groovy.rb
index cf55daf..c64454f 100644
--- a/lib/coderay/scanners/groovy.rb
+++ b/lib/coderay/scanners/groovy.rb
@@ -36,9 +36,12 @@ module Scanners
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     def scan_tokens encoder, options
-      
-      state = :initial
+      state = options[:state] || @state
       inline_block_stack = []
       inline_block_paren_depth = nil
       string_delimiter = nil
@@ -223,7 +226,7 @@ module Scanners
             encoder.text_token match, :content  # TODO: Shouldn't this be :error?
             
           elsif match = scan(/ \\ | \n /x)
-            encoder.end_group state
+            encoder.end_group state == :regexp ? :regexp : :string
             encoder.text_token match, :error
             after_def = value_expected = false
             state = :initial
@@ -243,7 +246,17 @@ module Scanners
       end
       
       if [:multiline_string, :string, :regexp].include? state
-        encoder.end_group state
+        encoder.end_group state == :regexp ? :regexp : :string
+      end
+      
+      if options[:keep_state]
+        @state = state
+      end
+      
+      until inline_block_stack.empty?
+        state, = *inline_block_stack.pop
+        encoder.end_group :inline
+        encoder.end_group state == :regexp ? :regexp : :string
       end
       
       encoder
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 3ba3b79..ebe7b01 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -1,13 +1,13 @@
 module CodeRay
 module Scanners
-
+  
   # HTML Scanner
   # 
   # Alias: +xhtml+
   # 
   # See also: Scanners::XML
   class HTML < Scanner
-
+    
     register_for :html
     
     KINDS_NOT_LOC = [
@@ -33,7 +33,8 @@ module Scanners
     )
     
     IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
-      add(EVENT_ATTRIBUTES, :script)
+      add(EVENT_ATTRIBUTES, :script).
+      add(['style'], :style)
     
     ATTR_NAME = /[\w.:-]+/  # :nodoc:
     TAG_END = /\/?>/  # :nodoc:
@@ -75,9 +76,14 @@ module Scanners
     def scan_java_script encoder, code
       if code && !code.empty?
         @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
-        # encoder.begin_group :inline
         @java_script_scanner.tokenize code, :tokens => encoder
-        # encoder.end_group :inline
+      end
+    end
+    
+    def scan_css encoder, code, state = [:initial]
+      if code && !code.empty?
+        @css_scanner ||= Scanners::CSS.new '', :keep_tokens => true
+        @css_scanner.tokenize code, :tokens => encoder, :state => state
       end
     end
     
@@ -99,7 +105,15 @@ module Scanners
           case state
           
           when :initial
-            if match = scan(/<!--(?:.*?-->|.*)/m)
+            if match = scan(/<!\[CDATA\[/)
+              encoder.text_token match, :inline_delimiter
+              if match = scan(/.*?\]\]>/m)
+                encoder.text_token match[0..-4], :plain
+                encoder.text_token ']]>', :inline_delimiter
+              elsif match = scan(/.+/)
+                encoder.text_token match, :error
+              end
+            elsif match = scan(/<!--(?:.*?-->|.*)/m)
               encoder.text_token match, :comment
             elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
               encoder.text_token match, :doctype
@@ -110,7 +124,7 @@ module Scanners
             elsif match = scan(/<\/[-\w.:]*>?/m)
               in_tag = nil
               encoder.text_token match, :tag
-            elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
+            elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m)
               encoder.text_token match, :tag
               in_tag = self[1]
               if self[2]
@@ -161,17 +175,21 @@ module Scanners
               encoder.text_token match, :attribute_value
               state = :attribute
             elsif match = scan(/["']/)
-              if in_attribute == :script
-                encoder.begin_group :inline
-                encoder.text_token match, :inline_delimiter
+              if in_attribute == :script || in_attribute == :style
+                encoder.begin_group :string
+                encoder.text_token match, :delimiter
                 if scan(/javascript:[ \t]*/)
                   encoder.text_token matched, :comment
                 end
                 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
-                scan_java_script encoder, code
+                if in_attribute == :script
+                  scan_java_script encoder, code
+                else
+                  scan_css encoder, code, [:block]
+                end
                 match = scan(/["']/)
-                encoder.text_token match, :inline_delimiter if match
-                encoder.end_group :inline
+                encoder.text_token match, :delimiter if match
+                encoder.end_group :string
                 state = :attribute
                 in_attribute = nil
               else
@@ -206,19 +224,23 @@ module Scanners
             
           when :in_special_tag
             case in_tag
-            when 'script'
+            when 'script', 'style'
               encoder.text_token match, :space if match = scan(/[ \t]*\n/)
               if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
                 code = self[2] || self[4]
                 closing = self[3]
                 encoder.text_token self[1], :comment
               else
-                code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
+                code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/)
                 closing = false
               end
               unless code.empty?
                 encoder.begin_group :inline
-                scan_java_script encoder, code
+                if in_tag == 'script'
+                  scan_java_script encoder, code
+                else
+                  scan_css encoder, code
+                end
                 encoder.end_group :inline
               end
               encoder.text_token closing, :comment if closing
diff --git a/lib/coderay/scanners/json.rb b/lib/coderay/scanners/json.rb
index 4e0f462..b09970c 100644
--- a/lib/coderay/scanners/json.rb
+++ b/lib/coderay/scanners/json.rb
@@ -14,15 +14,21 @@ module Scanners
     
     ESCAPE = / [bfnrt\\"\/] /x  # :nodoc:
     UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x  # :nodoc:
+    KEY = / (?> (?: [^\\"]+ | \\. )* ) " \s* : /x
     
   protected
     
+    def setup
+      @state = :initial
+    end
+    
     # See http://json.org/ for a definition of the JSON lexic/grammar.
     def scan_tokens encoder, options
+      state = options[:state] || @state
       
-      state = :initial
-      stack = []
-      key_expected = false
+      if [:string, :key].include? state
+        encoder.begin_group state
+      end
       
       until eos?
         
@@ -32,18 +38,11 @@ module Scanners
           if match = scan(/ \s+ /x)
             encoder.text_token match, :space
           elsif match = scan(/"/)
-            state = key_expected ? :key : :string
+            state = check(/#{KEY}/o) ? :key : :string
             encoder.begin_group state
             encoder.text_token match, :delimiter
           elsif match = scan(/ [:,\[{\]}] /x)
             encoder.text_token match, :operator
-            case match
-            when ':' then key_expected = false
-            when ',' then key_expected = true if stack.last == :object
-            when '{' then stack << :object; key_expected = true
-            when '[' then stack << :array
-            when '}', ']' then stack.pop  # no error recovery, but works for valid JSON
-            end
           elsif match = scan(/ true | false | null /x)
             encoder.text_token match, :value
           elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
@@ -82,6 +81,10 @@ module Scanners
         end
       end
       
+      if options[:keep_state]
+        @state = state
+      end
+      
       if [:string, :key].include? state
         encoder.end_group state
       end
diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb
new file mode 100644
index 0000000..fb1e45a
--- /dev/null
+++ b/lib/coderay/scanners/lua.rb
@@ -0,0 +1,280 @@
+# encoding: utf-8
+
+module CodeRay
+module Scanners
+
+  # Scanner for the Lua[http://lua.org] programming lanuage.
+  #
+  # The language’s complete syntax is defined in
+  # {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
+  # which is what this scanner tries to conform to.
+  class Lua < Scanner
+    
+    register_for :lua
+    file_extension 'lua'
+    title 'Lua'
+    
+    # Keywords used in Lua.
+    KEYWORDS = %w[and break do else elseif end
+      for function goto if in
+      local not or repeat return
+      then until while
+    ]
+    
+    # Constants set by the Lua core.
+    PREDEFINED_CONSTANTS = %w[false true nil]
+    
+    # The expressions contained in this array are parts of Lua’s `basic'
+    # library. Although it’s not entirely necessary to load that library,
+    # it is highly recommended and one would have to provide own implementations
+    # of some of these expressions if one does not do so. They however aren’t
+    # keywords, neither are they constants, but nearly predefined, so they
+    # get tagged as `predefined' rather than anything else.
+    #
+    # This list excludes values of form `_UPPERCASE' because the Lua manual
+    # requires such identifiers to be reserved by Lua anyway and they are
+    # highlighted directly accordingly, without the need for specific
+    # identifiers to be listed here.
+    PREDEFINED_EXPRESSIONS = %w[
+      assert collectgarbage dofile error getmetatable
+      ipairs load loadfile next pairs pcall print
+      rawequal rawget rawlen rawset select setmetatable
+      tonumber tostring type xpcall
+    ]
+    
+    # Automatic token kind selection for normal words.
+    IDENT_KIND = CodeRay::WordList.new(:ident).
+      add(KEYWORDS, :keyword).
+      add(PREDEFINED_CONSTANTS, :predefined_constant).
+      add(PREDEFINED_EXPRESSIONS, :predefined)
+    
+    protected
+    
+    # Scanner initialization.
+    def setup
+      @state = :initial
+      @brace_depth = 0
+    end
+    
+    # CodeRay entry hook. Starts parsing.
+    def scan_tokens(encoder, options)
+      state = options[:state] || @state
+      brace_depth = @brace_depth
+      num_equals = nil
+      
+      until eos?
+        case state
+        
+        when :initial
+          if match = scan(/\-\-\[\=*\[/)   #--[[ long (possibly multiline) comment ]]
+            num_equals = match.count("=") # Number must match for comment end
+            encoder.begin_group(:comment)
+            encoder.text_token(match, :delimiter)
+            state = :long_comment
+          
+          elsif match = scan(/--.*$/) # --Lua comment
+            encoder.text_token(match, :comment)
+          
+          elsif match = scan(/\[=*\[/)     # [[ long (possibly multiline) string ]]
+            num_equals = match.count("=") # Number must match for comment end
+            encoder.begin_group(:string)
+            encoder.text_token(match, :delimiter)
+            state = :long_string
+          
+          elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
+            encoder.text_token(match, :label)
+          
+          elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
+            encoder.text_token(match, :predefined)
+          
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
+            kind = IDENT_KIND[match]
+            
+            # Extra highlighting for entities following certain keywords
+            if kind == :keyword and match == "function"
+              state = :function_expected
+            elsif kind == :keyword and match == "goto"
+              state = :goto_label_expected
+            elsif kind == :keyword and match == "local"
+              state = :local_var_expected
+            end
+            
+            encoder.text_token(match, kind)
+          
+          elsif match = scan(/\{/) # Opening table brace {
+            encoder.begin_group(:map)
+            encoder.text_token(match, brace_depth >= 1 ? :inline_delimiter : :delimiter)
+            brace_depth += 1
+            state        = :map
+          
+          elsif match = scan(/\}/) # Closing table brace }
+            if brace_depth == 1
+              brace_depth = 0
+              encoder.text_token(match, :delimiter)
+              encoder.end_group(:map)
+            elsif brace_depth == 0 # Mismatched brace
+              encoder.text_token(match, :error)
+            else
+              brace_depth -= 1
+              encoder.text_token(match, :inline_delimiter)
+              encoder.end_group(:map)
+              state = :map
+            end
+          
+          elsif match = scan(/["']/) # String delimiters " and '
+            encoder.begin_group(:string)
+            encoder.text_token(match, :delimiter)
+            start_delim = match
+            state       = :string
+          
+                            # ↓Prefix                hex number ←|→ decimal number
+          elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+            encoder.text_token(match, :float)
+          
+                            # ↓Prefix         hex number ←|→ decimal number
+          elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
+            encoder.text_token(match, :integer)
+          
+          elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
+            encoder.text_token(match, :operator)
+          
+          elsif match = scan(/\s+/) # Space
+            encoder.text_token(match, :space)
+          
+          else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
+            encoder.text_token(getch, :error)
+          end
+          
+          # It may be that we’re scanning a full-blown subexpression of a table
+          # (tables can contain full expressions in parts).
+          # If this is the case, return to :map scanning state.
+          state = :map if state == :initial && brace_depth >= 1
+        
+        when :function_expected
+          if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
+            encoder.text_token(match, :operator)
+            state = :initial
+          elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
+            encoder.text_token(match, :ident)
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
+            encoder.text_token(match, :function)
+            state = :initial
+          elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+            state = :initial
+          end
+        
+        when :goto_label_expected
+          if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+            encoder.text_token(match, :label)
+            state = :initial
+          elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :local_var_expected
+          if match = scan(/function/) # local function ...
+            encoder.text_token(match, :keyword)
+            state = :function_expected
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
+            encoder.text_token(match, :local_variable)
+          elsif match = scan(/,/)
+            encoder.text_token(match, :operator)
+          elsif match = scan(/\=/)
+            encoder.text_token(match, :operator)
+            # After encountering the equal sign, arbitrary expressions are
+            # allowed again, so just return to the main state for further
+            # parsing.
+            state = :initial
+          elsif match = scan(/\n/)
+            encoder.text_token(match, :space)
+            state = :initial
+          elsif match = scan(/\s+/)
+            encoder.text_token(match, :space)
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :long_comment
+          if match = scan(/.*?(?=\]={#{num_equals}}\])/m)
+            encoder.text_token(match, :content)
+            
+            delim = scan(/\]={#{num_equals}}\]/)
+            encoder.text_token(delim, :delimiter)
+          else # No terminator found till EOF
+            encoder.text_token(rest, :error)
+            terminate
+          end
+          encoder.end_group(:comment)
+          state = :initial
+        
+        when :long_string
+          if match = scan(/.*?(?=\]={#{num_equals}}\])/m) # Long strings do not interpret any escape sequences
+            encoder.text_token(match, :content)
+            
+            delim = scan(/\]={#{num_equals}}\]/)
+            encoder.text_token(delim, :delimiter)
+          else # No terminator found till EOF
+            encoder.text_token(rest, :error)
+            terminate
+          end
+          encoder.end_group(:string)
+          state = :initial
+        
+        when :string
+          if match = scan(/[^\\#{start_delim}\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
+            encoder.text_token(match, :content)
+          elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
+            encoder.text_token(match, :char)
+          elsif match = scan(Regexp.compile(start_delim))
+            encoder.text_token(match, :delimiter)
+            encoder.end_group(:string)
+            state = :initial
+          elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
+            encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
+            encoder.end_group(:string)
+            state = :initial
+          else
+            encoder.text_token(getch, :error)
+          end
+        
+        when :map
+          if match = scan(/[,;]/)
+            encoder.text_token(match, :operator)
+          elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
+            encoder.text_token(match, :key)
+            encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
+            encoder.text_token(scan(/\=/), :operator)
+            state = :initial
+          elsif match = scan(/\s+/m)
+            encoder.text_token(match, :space)
+          else
+            # Note this clause doesn’t advance the scan pointer, it’s a kind of
+            # "retry with other options" (the :initial state then of course
+            # advances the pointer).
+            state = :initial
+          end
+        else
+          raise
+        end
+        
+      end
+      
+      if options[:keep_state]
+        @state = state
+      end
+      
+      encoder.end_group :string if [:string].include? state
+      brace_depth.times { encoder.end_group :map }
+      
+      encoder
+    end
+    
+  end
+  
+end
+end
diff --git a/lib/coderay/scanners/php.rb b/lib/coderay/scanners/php.rb
index 6c68834..7a8d75d 100644
--- a/lib/coderay/scanners/php.rb
+++ b/lib/coderay/scanners/php.rb
@@ -265,7 +265,7 @@ module Scanners
             @html_scanner.tokenize match unless match.empty?
           end
         
-        when :php
+        when :php, :php_inline
           if match = scan(/\s+/)
             encoder.text_token match, :space
           
@@ -332,7 +332,7 @@ module Scanners
             if states.size == 1
               encoder.text_token match, :error
             else
-              states.pop
+              state = states.pop
               if states.last.is_a?(::Array)
                 delimiter = states.last[1]
                 states[-1] = states.last[0]
@@ -340,6 +340,7 @@ module Scanners
                 encoder.end_group :inline
               else
                 encoder.text_token match, :operator
+                encoder.end_group :inline if state == :php_inline
                 label_expected = true
               end
             end
@@ -350,7 +351,14 @@ module Scanners
           
           elsif match = scan(RE::PHP_END)
             encoder.text_token match, :inline_delimiter
-            states = [:initial]
+            while state = states.pop
+              encoder.end_group :string if [:sqstring, :dqstring].include? state
+              if state.is_a? Array
+                encoder.end_group :inline
+                encoder.end_group :string if [:sqstring, :dqstring].include? state.first
+              end
+            end
+            states << :initial
           
           elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
             encoder.begin_group :string
@@ -400,6 +408,7 @@ module Scanners
           elsif match = scan(/\\/)
             encoder.text_token match, :error
           else
+            encoder.end_group :string
             states.pop
           end
         
@@ -459,7 +468,7 @@ module Scanners
               encoder.begin_group :inline
               states[-1] = [states.last, delimiter]
               delimiter = nil
-              states.push :php
+              states.push :php_inline
               encoder.text_token match, :delimiter
             else
               encoder.text_token match, :content
@@ -469,6 +478,7 @@ module Scanners
           elsif match = scan(/\$/)
             encoder.text_token match, :content
           else
+            encoder.end_group :string
             states.pop
           end
         
@@ -500,6 +510,14 @@ module Scanners
         
       end
       
+      while state = states.pop
+        encoder.end_group :string if [:sqstring, :dqstring].include? state
+        if state.is_a? Array
+          encoder.end_group :inline
+          encoder.end_group :string if [:sqstring, :dqstring].include? state.first
+        end
+      end
+      
       encoder
     end
     
diff --git a/lib/coderay/scanners/python.rb b/lib/coderay/scanners/python.rb
index a9492ab..09c8b6e 100644
--- a/lib/coderay/scanners/python.rb
+++ b/lib/coderay/scanners/python.rb
@@ -157,12 +157,12 @@ module Scanners
             encoder.text_token match, :operator
           
           elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
+            modifiers = self[1]
             string_delimiter = self[2]
-            string_type = docstring_coming ? :docstring : :string
+            string_type = docstring_coming ? :docstring : (modifiers == 'b' ? :binary : :string)
             docstring_coming = false if docstring_coming
             encoder.begin_group string_type
             string_raw = false
-            modifiers = self[1]
             unless modifiers.empty?
               string_raw = !!modifiers.index(?r)
               encoder.text_token modifiers, :modifier
diff --git a/lib/coderay/scanners/raydebug.rb b/lib/coderay/scanners/raydebug.rb
index 7a21354..d39d962 100644
--- a/lib/coderay/scanners/raydebug.rb
+++ b/lib/coderay/scanners/raydebug.rb
@@ -1,11 +1,11 @@
 module CodeRay
 module Scanners
-
+  
   # = Debug Scanner
   # 
   # Parses the output of the Encoders::Debug encoder.
   class Raydebug < Scanner
-
+    
     register_for :raydebug
     file_extension 'raydebug'
     title 'CodeRay Token Dump'
@@ -13,11 +13,11 @@ module Scanners
   protected
     
     def scan_tokens encoder, options
-
+      
       opened_tokens = []
-
+      
       until eos?
-
+        
         if match = scan(/\s+/)
           encoder.text_token match, :space
           
@@ -26,7 +26,7 @@ module Scanners
           encoder.text_token kind, :class
           encoder.text_token '(', :operator
           match = self[2]
-          encoder.text_token match, kind.to_sym
+          encoder.text_token match, kind.to_sym unless match.empty?
           encoder.text_token match, :operator if match = scan(/\)/)
           
         elsif match = scan(/ (\w+) ([<\[]) /x)
@@ -59,8 +59,8 @@ module Scanners
       
       encoder
     end
-
+    
   end
-
+  
 end
 end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index c282f31..80165ca 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -269,7 +269,7 @@ module Scanners
             end
             
             if last_state
-              state = last_state
+              state = last_state unless state.is_a?(StringState)  # otherwise, a simple 'def"' results in unclosed tokens
               last_state = nil
             end
             
diff --git a/lib/coderay/scanners/sass.rb b/lib/coderay/scanners/sass.rb
index 167051d..e20bebe 100644
--- a/lib/coderay/scanners/sass.rb
+++ b/lib/coderay/scanners/sass.rb
@@ -176,7 +176,7 @@ module Scanners
             encoder.text_token match[start.size..-2], :content
             encoder.text_token ')', :delimiter
           else
-            encoder.text_token match[start.size..-1], :content
+            encoder.text_token match[start.size..-1], :content if start.size < match.size
           end
           encoder.end_group :function
           
@@ -195,7 +195,7 @@ module Scanners
         elsif match = scan(/(?:rgb|hsl)a?\([^()\n]*\)?/)
           encoder.text_token match, :color
           
-        elsif match = scan(/@else if\b|#{RE::AtKeyword}/)
+        elsif match = scan(/@else if\b|#{RE::AtKeyword}/o)
           encoder.text_token match, :directive
           value_expected = true
           
@@ -218,6 +218,14 @@ module Scanners
         @state = states
       end
       
+      while state = states.pop
+        if state == :sass_inline
+          encoder.end_group :inline
+        elsif state == :string
+          encoder.end_group :string
+        end
+      end
+      
       encoder
     end
     
diff --git a/lib/coderay/scanners/sql.rb b/lib/coderay/scanners/sql.rb
index b757278..93aeaf3 100644
--- a/lib/coderay/scanners/sql.rb
+++ b/lib/coderay/scanners/sql.rb
@@ -1,8 +1,9 @@
-module CodeRay module Scanners
+module CodeRay
+module Scanners
   
   # by Josh Goebel
   class SQL < Scanner
-
+    
     register_for :sql
     
     KEYWORDS = %w(
@@ -149,6 +150,7 @@ module CodeRay module Scanners
               string_content = ''
             end
             encoder.text_token match, :error unless match.empty?
+            encoder.end_group :string
             state = :initial
           else
             raise "else case \" reached; %p not handled." % peek(1), encoder
@@ -171,4 +173,5 @@ module CodeRay module Scanners
     
   end
   
-end end
-\ No newline at end of file
+end
+end
diff --git a/lib/coderay/scanners/yaml.rb b/lib/coderay/scanners/yaml.rb
index 96f4e93..32c8e2c 100644
--- a/lib/coderay/scanners/yaml.rb
+++ b/lib/coderay/scanners/yaml.rb
@@ -47,7 +47,7 @@ module Scanners
           when !check(/(?:"[^"]*")(?=: |:$)/) && match = scan(/"/)
             encoder.begin_group :string
             encoder.text_token match, :delimiter
-            encoder.text_token match, :content if match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
+            encoder.text_token match, :content if (match = scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)) && !match.empty?
             encoder.text_token match, :delimiter if match = scan(/"/)
             encoder.end_group :string
             next
@@ -84,7 +84,7 @@ module Scanners
           when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?= *:(?: |$))/)
             encoder.begin_group :key
             encoder.text_token match[0,1], :delimiter
-            encoder.text_token match[1..-2], :content
+            encoder.text_token match[1..-2], :content if match.size > 2
             encoder.text_token match[-1,1], :delimiter
             encoder.end_group :key
             key_indent = column(pos - match.size) - 1
diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb
index f57e4a1..ff85ecc 100644
--- a/lib/coderay/styles/alpha.rb
+++ b/lib/coderay/styles/alpha.rb
@@ -3,14 +3,14 @@ module Styles
   
   # A colorful theme using CSS 3 colors (with alpha channel).
   class Alpha < Style
-
+    
     register_for :alpha
-
+    
     code_background = 'hsl(0,0%,95%)'
     numbers_background = 'hsl(180,65%,90%)'
     border_color = 'silver'
     normal_color = 'black'
-
+    
     CSS_MAIN_STYLES = <<-MAIN  # :nodoc:
 .CodeRay {
   background-color: #{code_background};
@@ -56,25 +56,26 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .annotation { color:#007 }
 .attribute-name { color:#b48 }
 .attribute-value { color:#700 }
-.binary { color:#509 }
+.binary { color:#549 }
+.binary .char { color:#325 }
+.binary .delimiter { color:#325 }
+.char { color:#D20 }
 .char .content { color:#D20 }
 .char .delimiter { color:#710 }
-.char { color:#D20 }
 .class { color:#B06; font-weight:bold }
 .class-variable { color:#369 }
 .color { color:#0A0 }
 .comment { color:#777 }
 .comment .char { color:#444 }
 .comment .delimiter { color:#444 }
-.complex { color:#A08 }
 .constant { color:#036; font-weight:bold }
 .decorator { color:#B0B }
 .definition { color:#099; font-weight:bold }
 .delimiter { color:black }
 .directive { color:#088; font-weight:bold }
-.doc { color:#970 }
-.doc-string { color:#D42; font-weight:bold }
+.docstring { color:#D42; }
 .doctype { color:#34b }
+.done { text-decoration: line-through; color: gray }
 .entity { color:#800; font-weight:bold }
 .error { color:#F00; background-color:#FAA }
 .escape  { color:#666 }
@@ -85,19 +86,22 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .global-variable { color:#d70 }
 .hex { color:#02b }
 .id  { color:#33D; font-weight:bold }
-.imaginary { color:#f00 }
 .include { color:#B44; font-weight:bold }
 .inline { background-color: hsla(0,0%,0%,0.07); color: black }
 .inline-delimiter { font-weight: bold; color: #666 }
 .instance-variable { color:#33B }
 .integer  { color:#00D }
+.imaginary { color:#f00 }
 .important { color:#D00 }
+.key { color: #606 }
 .key .char { color: #60f }
 .key .delimiter { color: #404 }
-.key { color: #606 }
 .keyword { color:#080; font-weight:bold }
 .label { color:#970; font-weight:bold }
-.local-variable { color:#963 }
+.local-variable { color:#950 }
+.map .content { color:#808 }
+.map .delimiter { color:#40A}
+.map { background-color:hsla(200,100%,50%,0.06); }
 .namespace { color:#707; font-weight:bold }
 .octal { color:#40E }
 .operator { }
@@ -106,30 +110,30 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .predefined-type { color:#0a5; font-weight:bold }
 .preprocessor { color:#579 }
 .pseudo-class { color:#00C; font-weight:bold }
+.regexp { background-color:hsla(300,100%,50%,0.06); }
 .regexp .content { color:#808 }
 .regexp .delimiter { color:#404 }
 .regexp .modifier { color:#C2C }
-.regexp { background-color:hsla(300,100%,50%,0.06); }
 .reserved { color:#080; font-weight:bold }
+.shell { background-color:hsla(120,100%,50%,0.06); }
 .shell .content { color:#2B2 }
 .shell .delimiter { color:#161 }
-.shell { background-color:hsla(120,100%,50%,0.06); }
+.string { background-color:hsla(0,100%,50%,0.05); }
 .string .char { color: #b0b }
 .string .content { color: #D20 }
 .string .delimiter { color: #710 }
 .string .modifier { color: #E40 }
-.string { background-color:hsla(0,100%,50%,0.05); }
+.symbol { color:#A60 }
 .symbol .content { color:#A60 }
 .symbol .delimiter { color:#630 }
-.symbol { color:#A60 }
-.tag { color:#070 }
+.tag { color:#070; font-weight:bold }
 .type { color:#339; font-weight:bold }
-.value { color: #088; }
-.variable  { color:#037 }
+.value { color: #088 }
+.variable { color:#037 }
 
 .insert { background: hsla(120,100%,50%,0.12) }
 .delete { background: hsla(0,100%,50%,0.12) }
-.change { color: #bbf; background: #007; }
+.change { color: #bbf; background: #007 }
 .head { color: #f8f; background: #505 }
 .head .filename { color: white; }
 
@@ -140,11 +144,9 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top; }
 .delete .delete { color: #c00; background:transparent; font-weight:bold }
 .change .change { color: #88f }
 .head .head { color: #f4f }
-
-.done { text-decoration: line-through; color: gray }
     TOKENS
-
+    
   end
-
+  
 end
 end
diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb
index de3a0d0..9137a49 100755
--- a/lib/coderay/token_kinds.rb
+++ b/lib/coderay/token_kinds.rb
@@ -10,79 +10,78 @@ module CodeRay
   TokenKinds.compare_by_identity if TokenKinds.respond_to? :compare_by_identity
   
   TokenKinds.update(  # :nodoc:
-    :annotation          => 'annotation',
-    :attribute_name      => 'attribute-name',
-    :attribute_value     => 'attribute-value',
-    :binary              => 'bin',
-    :char                => 'char',
-    :class               => 'class',
-    :class_variable      => 'class-variable',
-    :color               => 'color',
-    :comment             => 'comment',
-    :complex             => 'complex',
-    :constant            => 'constant',
-    :content             => 'content',
-    :debug               => 'debug',
-    :decorator           => 'decorator',
-    :definition          => 'definition',
-    :delimiter           => 'delimiter',
-    :directive           => 'directive',
-    :doc                 => 'doc',
-    :doctype             => 'doctype',
-    :docstring           => 'doc-string',
-    :done                => 'done',
-    :entity              => 'entity',
-    :error               => 'error',
-    :escape              => 'escape',
-    :exception           => 'exception',
-    :filename            => 'filename',
-    :float               => 'float',
-    :function            => 'function',
-    :global_variable     => 'global-variable',
-    :hex                 => 'hex',
-    :id                  => 'id',
-    :imaginary           => 'imaginary',
-    :important           => 'important',
-    :include             => 'include',
-    :inline              => 'inline',
-    :inline_delimiter    => 'inline-delimiter',
-    :instance_variable   => 'instance-variable',
-    :integer             => 'integer',
-    :key                 => 'key',
-    :keyword             => 'keyword',
-    :label               => 'label',
-    :local_variable      => 'local-variable',
-    :modifier            => 'modifier',
-    :namespace           => 'namespace',
-    :octal               => 'octal',
-    :predefined          => 'predefined',
-    :predefined_constant => 'predefined-constant',
-    :predefined_type     => 'predefined-type',
-    :preprocessor        => 'preprocessor',
-    :pseudo_class        => 'pseudo-class',
-    :regexp              => 'regexp',
-    :reserved            => 'reserved',
-    :shell               => 'shell',
-    :string              => 'string',
-    :symbol              => 'symbol',
-    :tag                 => 'tag',
-    :type                => 'type',
-    :value               => 'value',
-    :variable            => 'variable',
+    :debug               => 'debug',              # highlight for debugging (white on blue background)
     
-    :change              => 'change',
-    :delete              => 'delete',
-    :head                => 'head',
-    :insert              => 'insert',
+    :annotation          => 'annotation',         # Groovy, Java
+    :attribute_name      => 'attribute-name',     # HTML, CSS
+    :attribute_value     => 'attribute-value',    # HTML
+    :binary              => 'binary',             # Python, Ruby
+    :char                => 'char',               # most scanners, also inside of strings
+    :class               => 'class',              # lots of scanners, for different purposes also in CSS
+    :class_variable      => 'class-variable',     # Ruby, YAML
+    :color               => 'color',              # CSS
+    :comment             => 'comment',            # most scanners
+    :constant            => 'constant',           # PHP, Ruby
+    :content             => 'content',            # inside of strings, most scanners
+    :decorator           => 'decorator',          # Python
+    :definition          => 'definition',         # CSS
+    :delimiter           => 'delimiter',          # inside strings, comments and other types
+    :directive           => 'directive',          # lots of scanners
+    :doctype             => 'doctype',            # Goorvy, HTML, Ruby, YAML
+    :docstring           => 'docstring',          # Python
+    :done                => 'done',               # Taskpaper
+    :entity              => 'entity',             # HTML
+    :error               => 'error',              # invalid token, most scanners
+    :escape              => 'escape',             # Ruby (string inline variables like #$foo, #@bar)
+    :exception           => 'exception',          # Java, PHP, Python
+    :filename            => 'filename',           # Diff
+    :float               => 'float',              # most scanners
+    :function            => 'function',           # CSS, JavaScript, PHP
+    :global_variable     => 'global-variable',    # Ruby, YAML
+    :hex                 => 'hex',                # hexadecimal number; lots of scanners
+    :id                  => 'id',                 # CSS
+    :imaginary           => 'imaginary',          # Python
+    :important           => 'important',          # CSS, Taskpaper
+    :include             => 'include',            # C, Groovy, Java, Python, Sass
+    :inline              => 'inline',             # nested code, eg. inline string evaluation; lots of scanners
+    :inline_delimiter    => 'inline-delimiter',   # used instead of :inline > :delimiter FIXME: Why use inline_delimiter?
+    :instance_variable   => 'instance-variable',  # Ruby
+    :integer             => 'integer',            # most scanners
+    :key                 => 'key',                # lots of scanners, used together with :value
+    :keyword             => 'keyword',            # reserved word that's actually implemented; most scanners
+    :label               => 'label',              # C, PHP
+    :local_variable      => 'local-variable',     # local and magic variables; some scanners
+    :map                 => 'map',                # Lua tables
+    :modifier            => 'modifier',           # used inside on strings; lots of scanners
+    :namespace           => 'namespace',          # Clojure, Java, Taskpaper
+    :octal               => 'octal',              # lots of scanners
+    :predefined          => 'predefined',         # predefined function: lots of scanners
+    :predefined_constant => 'predefined-constant',# lots of scanners
+    :predefined_type     => 'predefined-type',    # C, Java, PHP
+    :preprocessor        => 'preprocessor',       # C, Delphi, HTML
+    :pseudo_class        => 'pseudo-class',       # CSS
+    :regexp              => 'regexp',             # Groovy, JavaScript, Ruby
+    :reserved            => 'reserved',           # most scanners
+    :shell               => 'shell',              # Ruby
+    :string              => 'string',             # most scanners
+    :symbol              => 'symbol',             # Clojure, Ruby, YAML
+    :tag                 => 'tag',                # CSS, HTML
+    :type                => 'type',               # CSS, Java, SQL, YAML
+    :value               => 'value',              # used together with :key; CSS, JSON, YAML
+    :variable            => 'variable',           # Sass, SQL, YAML
     
-    :eyecatcher          => 'eyecatcher',
+    :change              => 'change',             # Diff
+    :delete              => 'delete',             # Diff
+    :head                => 'head',               # Diff, YAML
+    :insert              => 'insert',             # Diff
+    :eyecatcher          => 'eyecatcher',         # Diff
     
-    :ident               => false,
-    :operator            => false,
+    :ident               => false,                # almost all scanners
+    :operator            => false,                # almost all scanners
     
-    :space               => false,
-    :plain               => false
+    :space               => false,                # almost all scanners
+    :plain               => false                 # almost all scanners
   )
   
-  TokenKinds[:method]    = TokenKinds[:function]
+  TokenKinds[:method] = TokenKinds[:function]
 end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index 6957d69..e7bffce 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -1,55 +1,43 @@
 module CodeRay
   
-  # GZip library for writing and reading token dumps.
-  autoload :GZip, coderay_path('helpers', 'gzip')
-  
-  # = Tokens  TODO: Rewrite!
-  #
-  # The Tokens class represents a list of tokens returnd from
-  # a Scanner.
+  # The Tokens class represents a list of tokens returned from
+  # a Scanner. It's actually just an Array with a few helper methods.
   #
-  # A token is not a special object, just a two-element Array
-  # consisting of
+  # A token itself is not a special object, just two elements in an Array:
   # * the _token_ _text_ (the original source of the token in a String) or
   #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
   # * the _token_ _kind_ (a Symbol representing the type of the token)
   #
-  # A token looks like this:
+  # It looks like this:
   #
-  #   ['# It looks like this', :comment]
-  #   ['3.1415926', :float]
-  #   ['$^', :error]
+  #   ..., '# It looks like this', :comment, ...
+  #   ..., '3.1415926', :float, ...
+  #   ..., '$^', :error, ...
   #
   # Some scanners also yield sub-tokens, represented by special
-  # token actions, namely begin_group and end_group.
+  # token actions, for example :begin_group and :end_group.
   #
   # The Ruby scanner, for example, splits "a string" into:
   #
   #  [
-  #   [:begin_group, :string],
-  #   ['"', :delimiter],
-  #   ['a string', :content],
-  #   ['"', :delimiter],
-  #   [:end_group, :string]
+  #   :begin_group, :string,
+  #   '"',          :delimiter,
+  #   'a string',   :content,
+  #   '"',          :delimiter,
+  #   :end_group,   :string
   #  ]
   #
-  # Tokens is the interface between Scanners and Encoders:
-  # The input is split and saved into a Tokens object. The Encoder
-  # then builds the output from this object.
-  #
-  # Thus, the syntax below becomes clear:
+  # Tokens can be used to save the output of a Scanners in a simple
+  # Ruby object that can be send to an Encoder later:
   #
-  #   CodeRay.scan('price = 2.59', :ruby).html
-  #   # the Tokens object is here -------^
-  #
-  # See how small it is? ;)
+  #   tokens = CodeRay.scan('price = 2.59', :ruby).tokens
+  #   tokens.encode(:html)
+  #   tokens.html
+  #   CodeRay.encoder(:html).encode_tokens(tokens)
   #
   # Tokens gives you the power to handle pre-scanned code very easily:
-  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
-  # that you put in your DB.
-  # 
-  # It also allows you to generate tokens directly (without using a scanner),
-  # to load them from a file, and still use any Encoder that CodeRay provides.
+  # You can serialize it to a JSON string and store it in a database, pass it
+  # around to encode it more than once, send it to other algorithms...
   class Tokens < Array
     
     # The Scanner instance that created the tokens.
@@ -58,8 +46,7 @@ module CodeRay
     # Encode the tokens using encoder.
     #
     # encoder can be
-    # * a symbol like :html oder :statistic
-    # * an Encoder class
+    # * a plugin name like :html oder 'statistic'
     # * an Encoder object
     #
     # options are passed to the encoder.
@@ -157,53 +144,11 @@ module CodeRay
       parts
     end
     
-    # Dumps the object into a String that can be saved
-    # in files or databases.
-    #
-    # The dump is created with Marshal.dump;
-    # In addition, it is gzipped using GZip.gzip.
-    #
-    # The returned String object includes Undumping
-    # so it has an #undump method. See Tokens.load.
-    #
-    # You can configure the level of compression,
-    # but the default value 7 should be what you want
-    # in most cases as it is a good compromise between
-    # speed and compression rate.
-    #
-    # See GZip module.
-    def dump gzip_level = 7
-      dump = Marshal.dump self
-      dump = GZip.gzip dump, gzip_level
-      dump.extend Undumping
-    end
-    
     # Return the actual number of tokens.
     def count
       size / 2
     end
     
-    # Include this module to give an object an #undump
-    # method.
-    #
-    # The string returned by Tokens.dump includes Undumping.
-    module Undumping
-      # Calls Tokens.load with itself.
-      def undump
-        Tokens.load self
-      end
-    end
-    
-    # Undump the object using Marshal.load, then
-    # unzip it using GZip.gunzip.
-    #
-    # The result is commonly a Tokens object, but
-    # this is not guaranteed.
-    def Tokens.load dump
-      dump = GZip.gunzip dump
-      @dump = Marshal.load dump
-    end
-    
     alias text_token push
     def begin_group kind; push :begin_group, kind end
     def end_group kind; push :end_group, kind end
author	Kornelius Kalnbach <murphy@rubychan.de>	2013-06-23 16:06:02 +0200
committer	Kornelius Kalnbach <murphy@rubychan.de>	2013-06-23 16:06:02 +0200
commit	0013b649f714f23eef0859921fa7804ca7caef76 (patch)
tree	7c278ee7c420729b4738fe2a195e529ffd2bb6da /lib/coderay
parent	addcbd446066d0da1627112814e3ce1b8d404da0 (diff)
parent	64ca2ae8ad5130bdcf652aa7aa08298de00f20f4 (diff)
download	coderay-0013b649f714f23eef0859921fa7804ca7caef76.tar.gz