21 files changed, 189 insertions, 107 deletions
diff --git a/lib/coderay/duo.rb b/lib/coderay/duo.rb
index 0e5956e..e2d6888 100644
--- a/lib/coderay/duo.rb
+++ b/lib/coderay/duo.rb
@@ -4,26 +4,84 @@ module CodeRay
   #
   # $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
   #
-  # TODO: Doc.
+  # A Duo is a convenient way to use CodeRay. You just create a Duo,
+  # giving it a lang (language of the input code) and a format (desired
+  # output format), and call Duo#highlight with the code.
+  # 
+  # Duo makes it easy to re-use both scanner and encoder for a repetitive
+  # task. It also provides a very easy interface syntax:
+  # 
+  #   require 'coderay'
+  #   CodeRay::Duo[:python, :div].highlight 'import this'
+  # 
+  # Until you want to do uncommon things with CodeRay, I recommend to use
+  # this method, since it takes care of everything.
   class Duo
 
-    attr_accessor :scanner, :encoder
-
-    def initialize lang, format, options = {}
-      @scanner = CodeRay.scanner lang, CodeRay.get_scanner_options(options)
-      @encoder = CodeRay.encoder format, options
+    attr_accessor :lang, :format, :options
+    
+    # Create a new Duo, holding a lang and a format to highlight code.
+    # 
+    # simple:
+    #   CodeRay::Duo[:ruby, :page].highlight 'bla 42'
+    # 
+    # streaming:
+    #   CodeRay::Duo[:ruby, :page].highlight 'bar 23', :stream => true
+    # 
+    # with options:
+    #   CodeRay::Duo[:ruby, :html, :hint => :debug].highlight '????::??'
+    # 
+    # alternative syntax without options:
+    #   CodeRay::Duo[:ruby => :statistic].encode 'class << self; end'
+    # 
+    # alternative syntax with options:
+    #   CodeRay::Duo[(:ruby => :statistic), :do => :something].encode 'abc'
+    # 
+    # The options are forwarded to scanner and encoder
+    # (see CodeRay.get_scanner_options).
+    def initialize lang = nil, format = nil, options = {}
+      if format == nil and lang.is_a? Hash and lang.size == 1
+        @lang = lang.keys.first
+        @format = lang[@lang]
+      else
+        @lang = lang
+        @format = format
+      end
+      @options = options
     end
 
     class << self
+      # To allow calls like Duo[:ruby, :html].highlight.
       alias [] new
     end
 
-    def encode code
-      @scanner.string = code
-      @encoder.encode_tokens(scanner.tokenize)
+    # The scanner of the duo. Only created once.
+    def scanner
+      @scanner ||= CodeRay.scanner @lang, CodeRay.get_scanner_options(@options)
+    end
+    
+    # The encoder of the duo. Only created once.
+    def encoder
+      @encoder ||= CodeRay.encoder @format, @options
+    end
+    
+    # Tokenize and highlight the code using +scanner+ and +encoder+.
+    #
+    # If the :stream option is set, the Duo will go into streaming mode,
+    # saving memory for the cost of time.
+    def encode code, options = { :stream => false }
+      stream = options.delete :stream
+      options = @options.merge options
+      if stream
+        encoder.encode_stream(code, @lang, options)
+      else
+        scanner.code = code
+        encoder.encode_tokens(scanner.tokenize, options)
+      end
     end
     alias highlight encode
 
   end
 
 end
+
diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb
index f72e2f1..ce65832 100644
--- a/lib/coderay/encoder.rb
+++ b/lib/coderay/encoder.rb
@@ -1,3 +1,5 @@
+require "stringio"
+
 module CodeRay
 
   # This module holds the Encoder class and its subclasses.
@@ -130,13 +132,14 @@ module CodeRay
       # By default, it calls text_token or block_token, depending on
       # whether +text+ is a String.
       def token text, kind
-        if text.instance_of? ::String  # Ruby 1.9: :open.is_a? String
+        out = if text.instance_of? ::String  # Ruby 1.9: :open.is_a? String
           text_token text, kind
         elsif text.is_a? ::Symbol
           block_token text, kind
         else
           raise 'Unknown token text type: %p' % text
         end
+        @out << out if @out
       end
 
       def text_token text, kind
diff --git a/lib/coderay/encoders/_map.rb b/lib/coderay/encoders/_map.rb
index fdd8ae4..8e9732b 100644
--- a/lib/coderay/encoders/_map.rb
+++ b/lib/coderay/encoders/_map.rb
@@ -2,7 +2,8 @@ module CodeRay
 module Encoders
 
   map :stats => :statistic,
-    :plain => :text
+    :plain => :text,
+    :tex => :latex
 
 end
 end
diff --git a/lib/coderay/encoders/debug.rb b/lib/coderay/encoders/debug.rb
index eb9eaa4..21d4710 100644
--- a/lib/coderay/encoders/debug.rb
+++ b/lib/coderay/encoders/debug.rb
@@ -19,17 +19,12 @@ module Encoders
 
   protected
     def text_token text, kind
-      @out <<
-        if kind == :space
-          text
-        else
-          text = text.gsub(/[)\\]/, '\\\\\0')
-          "#{kind}(#{text})"
-        end
-    end
-
-    def block_token action, kind
-      @out << super
+      if kind == :space
+        text
+      else
+        text = text.gsub(/[)\\]/, '\\\\\0')
+        "#{kind}(#{text})"
+      end
     end
 
     def open_token kind
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index 3ee677a..32e35f8 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -86,7 +86,7 @@ module Encoders
       :hint => false,
     }
 
-    helper :classes, :output, :css
+    helper :output, :css
 
     attr_reader :css
 
@@ -119,15 +119,14 @@ module Encoders
         end
     }
 
-    TRANSPARENT_TOKEN_KINDS = Set[
+    TRANSPARENT_TOKEN_KINDS = [
       :delimiter, :modifier, :content, :escape, :inline_delimiter,
-    ]
+    ].to_set
 
     # Generate a hint about the given +classes+ in a +hint+ style.
     #
     # +hint+ may be :info, :info_long or :debug.
     def self.token_path_to_hint hint, classes
-      return '' unless hint
       title =
         case hint
         when :info
@@ -159,29 +158,28 @@ module Encoders
 
       when :class
         @css_style = Hash.new do |h, k|
-          if k.is_a? Array
-            type = k.first
-          else
-            type = k
-          end
-          c = ClassOfKind[type]
+          c = Tokens::ClassOfKind[k.first]
           if c == :NO_HIGHLIGHT and not hint
-            h[k] = false
+            h[k.dup] = false
           else
-            title = HTML.token_path_to_hint hint, (k[1..-1] << k.first)
-            h[k] = '<span%s class="%s">' % [title, c]
+            title = if hint
+              HTML.token_path_to_hint(hint, k[1..-1] << k.first)
+            else
+              ''
+            end
+            h[k.dup] = '<span%s class="%s">' % [title, c]
           end
         end
 
       when :style
         @css_style = Hash.new do |h, k|
-          if k.is_a? Array
+          if k.is_a? ::Array
             styles = k.dup
           else
             styles = [k]
           end
           type = styles.first
-          classes = styles.map { |c| ClassOfKind[c] }
+          classes = styles.map { |c| Tokens::ClassOfKind[c] }
           if classes.first == :NO_HIGHLIGHT and not hint
             h[k] = false
           else
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
index e2a0460..6d0c646 100644
--- a/lib/coderay/encoders/statistic.rb
+++ b/lib/coderay/encoders/statistic.rb
@@ -28,17 +28,13 @@ module Encoders
       @type_stats[kind].count += 1
       @type_stats[kind].size += text.size
       @type_stats['TOTAL'].size += text.size
+      @type_stats['TOTAL'].count += 1
     end
 
     # TODO Hierarchy handling
     def block_token action, kind
-      #@content_type = kind
-      @type_stats['open/close'].count += 1
-    end
-
-    def token text, kind
-      super
       @type_stats['TOTAL'].count += 1
+      @type_stats['open/close'].count += 1
     end
 
     STATS = <<-STATS
diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb
index 17256c6..7493280 100644
--- a/lib/coderay/encoders/text.rb
+++ b/lib/coderay/encoders/text.rb
@@ -18,9 +18,8 @@ module Encoders
       @sep = options[:separator]
     end
 
-    def token text, kind
-      return unless text.respond_to? :to_str
-      @out << text + @sep
+    def text_token text, kind
+      text + @sep
     end
 
     def finish options
diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb
index 2428589..27c7f6d 100644
--- a/lib/coderay/encoders/tokens.rb
+++ b/lib/coderay/encoders/tokens.rb
@@ -33,9 +33,9 @@ module Encoders
 
     FILE_EXTENSION = 'tok'
 
-    protected
-    def token *args
-      @out << CodeRay::Tokens.write_token(*args)
+  protected
+    def token text, kind
+      @out << CodeRay::Tokens.write_token(text, kind)
     end
 
   end
diff --git a/lib/coderay/encoders/xml.rb b/lib/coderay/encoders/xml.rb
index 09e4549..dffa98c 100644
--- a/lib/coderay/encoders/xml.rb
+++ b/lib/coderay/encoders/xml.rb
@@ -22,7 +22,6 @@ module Encoders
   protected
 
     def setup options
-      @out = ''
       @doc = REXML::Document.new
       @doc << REXML::XMLDecl.new
       @tab_width = options[:tab_width]
@@ -33,7 +32,7 @@ module Encoders
       @doc.write @out, options[:pretty], options[:transitive], true
       @out
     end
-
+    
     def text_token text, kind
       if kind == :space
         token = @node
diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb
index 55b63f5..13b669d 100644
--- a/lib/coderay/helpers/file_type.rb
+++ b/lib/coderay/helpers/file_type.rb
@@ -1,3 +1,5 @@
+module CodeRay
+
 # = FileType
 #
 # A simple filetype recognizer.
@@ -100,6 +102,8 @@ module FileType
 
 end
 
+end
+
 if $0 == __FILE__
   $VERBOSE = true
   eval DATA.read, nil, $0, __LINE__+4
diff --git a/lib/coderay/helpers/gzip_simple.rb b/lib/coderay/helpers/gzip_simple.rb
index df4bcba..4d44711 100644
--- a/lib/coderay/helpers/gzip_simple.rb
+++ b/lib/coderay/helpers/gzip_simple.rb
@@ -1,3 +1,5 @@
+module CodeRay
+
 # =GZip Simple
 #
 # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
@@ -91,6 +93,8 @@ if $0 == __FILE__
   eval DATA.read, nil, $0, __LINE__+4
 end
 
+end
+
 __END__
 #CODE
 
diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb
index 445d500..e6017d5 100644
--- a/lib/coderay/helpers/plugin.rb
+++ b/lib/coderay/helpers/plugin.rb
@@ -1,3 +1,5 @@
+module CodeRay
+  
 # = PluginHost
 #
 # $Id$
@@ -310,11 +312,10 @@ module Plugin
 
 end
 
-
 # Convenience method for plugin loading.
 # The syntax used is:
 #
-#  require_plugin '<Host ID>/<Plugin ID>'
+#  CodeRay.require_plugin '<Host ID>/<Plugin ID>'
 #
 # Returns the loaded plugin.
 def require_plugin path
@@ -324,3 +325,5 @@ def require_plugin path
     "No host for #{host_id.inspect} found." unless host
   host.load plugin_id
 end
+
+end
+\ No newline at end of file
diff --git a/lib/coderay/helpers/word_list.rb b/lib/coderay/helpers/word_list.rb
index 99f6029..5196a5d 100644
--- a/lib/coderay/helpers/word_list.rb
+++ b/lib/coderay/helpers/word_list.rb
@@ -1,3 +1,5 @@
+module CodeRay
+
 # = WordList
 # 
 # <b>A Hash subclass designed for mapping word lists to token types.</b>
@@ -117,3 +119,5 @@ class CaseIgnoringWordList < WordList
   end
 
 end
+
+end
+\ No newline at end of file
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index 62327c7..5993b4c 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -117,9 +117,6 @@ module CodeRay
         setup
       end
 
-      # More mnemonic accessor name for the input string.
-      alias code string
-
       def reset
         super
         reset_instance
@@ -131,6 +128,10 @@ module CodeRay
         reset_instance
       end
 
+      # More mnemonic accessor name for the input string.
+      alias code string
+      alias code= string=
+
       # Scans the code and returns all tokens in a Tokens object.
       def tokenize new_string=nil, options = {}
         options = @options.merge(options)
@@ -148,6 +149,11 @@ module CodeRay
       def tokens
         @cached_tokens ||= tokenize
       end
+      
+      # Whether the scanner is in streaming mode.
+      def streaming?
+        !!@options[:stream]
+      end
 
       # Traverses the tokens.
       def each &block
@@ -195,7 +201,7 @@ module CodeRay
         raise ScanError, <<-EOE % [
 
 
-***ERROR in %s: %s
+***ERROR in %s: %s (after %d tokens)
 
 tokens:
 %s
@@ -211,13 +217,14 @@ surrounding code:
 ***ERROR***
 
         EOE
-        File.basename(caller[0]),
-        msg,
-        tokens.last(10).map { |t| t.inspect }.join("\n"),
-        line, pos,
-        matched, state, bol?, eos?,
-        string[pos-ambit,ambit],
-        string[pos,ambit],
+          File.basename(caller[0]),
+          msg,
+          tokens.size,
+          tokens.last(10).map { |t| t.inspect }.join("\n"),
+          line, pos,
+          matched, state, bol?, eos?,
+          string[pos-ambit,ambit],
+          string[pos,ambit],
         ]
       end
 
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb
index be113d0..2b63a81 100644
--- a/lib/coderay/scanners/c.rb
+++ b/lib/coderay/scanners/c.rb
@@ -4,6 +4,8 @@ module Scanners
   class C < Scanner
 
     register_for :c
+    
+    include Streamable
 
     RESERVED_WORDS = [
       'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb
index 1cd7a8a..7a08c3a 100644
--- a/lib/coderay/scanners/plaintext.rb
+++ b/lib/coderay/scanners/plaintext.rb
@@ -4,6 +4,8 @@ module Scanners
   class Plaintext < Scanner
 
     register_for :plaintext, :plain
+    
+    include Streamable
 
     def scan_tokens tokens, options
       text = (scan_until(/\z/) || '')
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index def5960..b373a2b 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -90,15 +90,15 @@ module Scanners
             end
 
           when '#'
-            case peek(1)[0]
-            when ?{
+            case peek(1)
+            when '{'
               inline_block_stack << [state, depth, heredocs]
               value_expected = true
               state = :initial
               depth = 1
               tokens << [:open, :inline]
               tokens << [match + getch, :inline_delimiter]
-            when ?$, ?@
+            when '$', '@'
               tokens << [match, :escape]
               last_state = state  # scan one token as normal code, then return here
               state = :initial
@@ -121,36 +121,36 @@ module Scanners
 # }}}
         else
 # {{{
-          if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
-            ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
-            case m = match[0]
-            when ?\s, ?\t, ?\f
-              match << scan(/\s*/) unless eos? or heredocs
-              kind = :space
-            when ?\n, ?\\
-              kind = :space
-              if m == ?\n
-                value_expected = true  # FIXME not quite true
-                state = :initial if state == :undef_comma_expected
-              end
-              if heredocs
-                unscan  # heredoc scanning needs \n at start
-                state = heredocs.shift
-                tokens << [:open, state.type]
-                heredocs = nil if heredocs.empty?
-                next
-              else
-                match << scan(/\s*/) unless eos?
-              end
-            when ?#, ?=, ?_
-              kind = :comment
-              value_expected = true
+          if match = scan(/[ \t\f]+/)
+            kind = :space
+            match << scan(/\s*/) unless eos? or heredocs
+            tokens << [match, kind]
+            next
+            
+          elsif match = scan(/\\?\n/)
+            kind = :space
+            if match == "\n"
+              value_expected = true  # FIXME not quite true
+              state = :initial if state == :undef_comma_expected
+            end
+            if heredocs
+              unscan  # heredoc scanning needs \n at start
+              state = heredocs.shift
+              tokens << [:open, state.type]
+              heredocs = nil if heredocs.empty?
+              next
             else
-              raise_inspect 'else-case _ reached, because case %p was
-                not handled' % [matched[0].chr], tokens
+              match << scan(/\s*/) unless eos?
             end
             tokens << [match, kind]
             next
+          
+          elsif match = scan(/\#.*/) or
+            ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
+              kind = :comment
+              value_expected = true
+              tokens << [match, kind]
+              next
 
           elsif state == :initial
 
@@ -175,11 +175,11 @@ module Scanners
               value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
 
             # OPERATORS #
-            elsif not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)
+            elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
               if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
                 value_expected = :set
               end
-              last_token_dot = :set if match == '.' or match == '::'
+              last_token_dot = :set if self[1]
               kind = :operator
               unless inline_block_stack.empty?
                 case match
diff --git a/lib/coderay/scanners/ruby/patterns.rb b/lib/coderay/scanners/ruby/patterns.rb
index 51cdb95..39962ec 100644
--- a/lib/coderay/scanners/ruby/patterns.rb
+++ b/lib/coderay/scanners/ruby/patterns.rb
@@ -111,7 +111,7 @@ module Scanners
       (?:
         ( [A-Za-z_0-9]+ )  # $2 = delim
       |
-        ( ["'`] )          # $3 = quote, type
+        ( ["'`\/] )        # $3 = quote, type
         ( [^\n]*? ) \3     # $4 = delim
       )
     /mx
diff --git a/lib/coderay/styles/cycnus.rb b/lib/coderay/styles/cycnus.rb
index df982e1..7430e9e 100644
--- a/lib/coderay/styles/cycnus.rb
+++ b/lib/coderay/styles/cycnus.rb
@@ -42,6 +42,8 @@ ol.CodeRay li { white-space: pre }
     MAIN
 
     TOKEN_COLORS = <<-'TOKENS'
+.debug { color:white ! important; background:blue ! important; }
+
 .af { color:#00C }
 .an { color:#007 }
 .av { color:#700 }
diff --git a/lib/coderay/encoders/html/classes.rb b/lib/coderay/token_classes.rb
index 0bac742..b19e512 100644..100755
--- a/lib/coderay/encoders/html/classes.rb
+++ b/lib/coderay/token_classes.rb
@@ -1,8 +1,5 @@
 module CodeRay
-module Encoders
-
-  class HTML
-
+  class Tokens
     ClassOfKind = Hash.new do |h, k|
       h[k] = k.to_s
     end
@@ -70,8 +67,5 @@ module Encoders
     ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
     ClassOfKind[:escape] = ClassOfKind[:delimiter]
     #ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
-
   end
-
-end
-end
+end
+\ No newline at end of file
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
index b0ce70e..d05177a 100644
--- a/lib/coderay/tokens.rb
+++ b/lib/coderay/tokens.rb
@@ -115,7 +115,7 @@ module CodeRay
     #   tokens.each_text_token { |text, kind| text.replace html_escape(text) }
     def each_text_token
       each do |text, kind|
-        next unless text.respond_to? :to_str
+        next unless text.is_a? ::String
         yield text, kind
       end
     end
@@ -252,7 +252,7 @@ module CodeRay
     #
     # You can configure the level of compression,
     # but the default value 7 should be what you want
-    # in most cases as it is a good comprimise between
+    # in most cases as it is a good compromise between
     # speed and compression rate.
     #
     # See GZip module.
@@ -267,7 +267,14 @@ module CodeRay
     # Should be equal to the input size before
     # scanning.
     def text_size
-      map { |t, k| t }.join.size
+      inject(0) { |size, (t, k)| t.is_a?(::String) ? size : size + t.size }
+    end
+
+    # The total size of the tokens.
+    # Should be equal to the input size before
+    # scanning.
+    def text
+      map { |t, k| t if t.is_a? ::String }.join
     end
 
     # Include this module to give an object an #undump
@@ -365,4 +372,8 @@ module CodeRay
 
   end
 
+  
+  # Token name abbreviations
+  require 'coderay/token_classes'
+
 end