From 26a8e5a0388199ac686db28d631b05a5b5aa02e1 Mon Sep 17 00:00:00 2001
From: murphy <murphy@rubychan.de>
Date: Tue, 11 Jul 2006 05:37:50 +0000
Subject: Changed error handling of all scanners: :error tokens are OK now,
 even in debug mode, but token kind is nil unless assigned. Small fixes for C
 and Ruby scanners. Renamed local variable type to kind in Ruby scanner.
 Improved RHTML scanner to recognize -%> as delimiter.

HTML encoder: improved handling of malformed token strings.
Fixed PluginHost#inspect including docu.
Scanner#raise_inspect also shows state if given.
---
 lib/coderay/encoders/html.rb        | 494 +++++++++++-----------
 lib/coderay/helpers/plugin.rb       |  18 +-
 lib/coderay/scanner.rb              | 476 ++++++++++-----------
 lib/coderay/scanners/_map.rb        |  29 +-
 lib/coderay/scanners/c.rb           | 318 +++++++-------
 lib/coderay/scanners/delphi.rb      | 260 ++++++------
 lib/coderay/scanners/html.rb        | 341 +++++++--------
 lib/coderay/scanners/nitro_html.rb  | 125 ------
 lib/coderay/scanners/nitro_xhtml.rb | 130 ++++++
 lib/coderay/scanners/rhtml.rb       | 138 ++++---
 lib/coderay/scanners/ruby.rb        | 804 ++++++++++++++++++------------------
 11 files changed, 1586 insertions(+), 1547 deletions(-)
 delete mode 100644 lib/coderay/scanners/nitro_html.rb
 create mode 100644 lib/coderay/scanners/nitro_xhtml.rb

(limited to 'lib/coderay')
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
index 60c56c1..bd7583a 100644
--- a/lib/coderay/encoders/html.rb
+++ b/lib/coderay/encoders/html.rb
@@ -1,245 +1,249 @@
-module CodeRay
-module Encoders
-
-  # = HTML Encoder
-  #
-  # This is CodeRay's most important highlighter:
-  # It provides save, fast XHTML generation and CSS support.
-  #
-  # == Usage
-  #
-  #  require 'coderay'
-  #  puts CodeRay.scan('Some /code/', :ruby).html  #-> a HTML page
-  #  puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> <span class="CodeRay"><span class="co">Some</span> /code/</span>
-  #  puts CodeRay.scan('Some /code/', :ruby).span  #-> the same
-  #  
-  #  puts CodeRay.scan('Some code', :ruby).html(
-  #    :wrap => nil,
-  #    :line_numbers => :inline,
-  #    :css => :style
-  #  )
-  #  #-> <span class="no">1</span>  <span style="color:#036; font-weight:bold;">Some</span> code
-  #
-  # == Options
-  #
-  # === :tab_width
-  # Convert \t characters to +n+ spaces (a number.)
-  # Default: 8
-  #
-  # === :css
-  # How to include the styles; can be :class or :style.
-  #
-  # Default: :class
-  #
-  # === :wrap
-  # Wrap in :page, :div, :span or nil.
-  #
-  # You can also use Encoders::Div and Encoders::Span.
-  #
-  # Default: nil
-  #
-  # === :line_numbers
-  # Include line numbers in :table, :inline, :list or nil (no line numbers)
-  #
-  # Default: nil
-  #
-  # === :line_number_start
-  # Where to start with line number counting.
-  #
-  # Default: 1
-  #
-  # === :bold_every
-  # Make every +n+-th number appear bold.
-  #
-  # Default: 10
-  #
-  # === :hint
-  # Include some information into the output using the title attribute.
-  # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect).
-  #
-  # Default: false
-  class HTML < Encoder
-
-    include Streamable
-    register_for :html
-
-    FILE_EXTENSION = 'html'
-
-    DEFAULT_OPTIONS = {
-      :tab_width => 8,
-
-      :level => :xhtml,
-      :css => :class,
-
-      :style => :cycnus,
-
-      :wrap => nil,
-
-      :line_numbers => nil,
-      :line_number_start => 1,
-      :bold_every => 10,
-
-      :hint => false,
-    }
-
-    helper :classes, :output, :css
-
-    attr_reader :css
-
-  protected
-
-    HTML_ESCAPE = {  #:nodoc:
-      '&' => '&amp;',
-      '"' => '&quot;',
-      '>' => '&gt;',
-      '<' => '&lt;',
-    }
-
-    # This was to prevent illegal HTML.
-    # Strange chars should still be avoided in codes.
-    evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
-    evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
-    #ansi_chars = Array(0x7f..0xff)
-    #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
-    # \x9 (\t) and \xA (\n) not included
-    #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
-    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
-
-    TOKEN_KIND_TO_INFO = Hash.new { |h, kind|
-      h[kind] =
-        case kind
-        when :pre_constant
-          'Predefined constant'
-        else
-          kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
-        end
-    }
-
-    # Generate a hint about the given +classes+ in a +hint+ style.
-    #
-    # +hint+ may be :info, :info_long or :debug.
-    def self.token_path_to_hint hint, classes
-      return '' unless hint
-      title =
-        case hint
-        when :info
-          TOKEN_KIND_TO_INFO[classes.first]
-        when :info_long
-          classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
-        when :debug
-          classes.inspect
-        end
-      " title=\"#{title}\""
-    end
-
-    def setup options
-      super
-
-      @HTML_ESCAPE = HTML_ESCAPE.dup
-      @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
-
-      @opened = [nil]
-      @css = CSS.new options[:style]
-
-      hint = options[:hint]
-      if hint and not [:debug, :info, :info_long].include? hint
-        raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint
-      end
-
-      case options[:css]
-
-      when :class
-        @css_style = Hash.new do |h, k|
-          if k.is_a? Array
-            type = k.first
-          else
-            type = k
-          end
-          c = ClassOfKind[type]
-          if c == :NO_HIGHLIGHT and not hint
-            h[k] = false
-          else
-            title = HTML.token_path_to_hint hint, (k[1..-1] << k.first)
-            h[k] = '<span%s class="%s">' % [title, c]
-          end
-        end
-
-      when :style
-        @css_style = Hash.new do |h, k|
-          if k.is_a? Array
-            styles = k.dup
-          else
-            styles = [k]
-          end
-          type = styles.first
-          classes = styles.map { |c| ClassOfKind[c] }
-          if classes.first == :NO_HIGHLIGHT and not hint
-            h[k] = false
-          else
-            styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first
-            title = HTML.token_path_to_hint hint, styles
-            classes.delete 'il'
-            style = @css[*classes]
-            h[k] =
-              if style
-                '<span%s style="%s">' % [title, style]
-              else
-                false
-              end
-          end
-        end
-
-      else
-        raise ArgumentError, "Unknown value %p for :css." % options[:css]
-
-      end
-    end
-
-    def finish options
-      not_needed = @opened.shift
-      @out << '</span>' * @opened.size
-      warn '%d tokens still open' % @opened.size unless @opened.empty?
-
-      @out.extend Output
-      @out.css = @css
-      @out.numerize! options[:line_numbers], options
-      @out.wrap! options[:wrap]
-
-      super
-    end
-
-    def token text, type
-      if text.is_a? ::String
-        if text =~ /#{HTML_ESCAPE_PATTERN}/o
-          text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
-        end
-        @opened[0] = type
-        if style = @css_style[@opened]
-          @out << style << text << '</span>'
-        else
-          @out << text
-        end
-      else
-        case text
-        when :open
-          @opened[0] = type
-          @out << (@css_style[@opened] || '<span>')
-          @opened << type
-        when :close
-          unless @opened.empty?
-            raise 'Malformed token stream: Trying to close a token that was never opened.' unless @opened.size > 1
-            @out << '</span>'
-            @opened.pop
-          end
-        when nil
-          raise 'Token with nil as text was given: %p' % [[text, type]]
-        else
-          raise 'unknown token kind: %p' % text
-        end
-      end
-    end
-
-  end
-
-end
-end
+module CodeRay
+module Encoders
+
+  # = HTML Encoder
+  #
+  # This is CodeRay's most important highlighter:
+  # It provides save, fast XHTML generation and CSS support.
+  #
+  # == Usage
+  #
+  #  require 'coderay'
+  #  puts CodeRay.scan('Some /code/', :ruby).html  #-> a HTML page
+  #  puts CodeRay.scan('Some /code/', :ruby).html(:wrap => :span) #-> <span class="CodeRay"><span class="co">Some</span> /code/</span>
+  #  puts CodeRay.scan('Some /code/', :ruby).span  #-> the same
+  #  
+  #  puts CodeRay.scan('Some code', :ruby).html(
+  #    :wrap => nil,
+  #    :line_numbers => :inline,
+  #    :css => :style
+  #  )
+  #  #-> <span class="no">1</span>  <span style="color:#036; font-weight:bold;">Some</span> code
+  #
+  # == Options
+  #
+  # === :tab_width
+  # Convert \t characters to +n+ spaces (a number.)
+  # Default: 8
+  #
+  # === :css
+  # How to include the styles; can be :class or :style.
+  #
+  # Default: :class
+  #
+  # === :wrap
+  # Wrap in :page, :div, :span or nil.
+  #
+  # You can also use Encoders::Div and Encoders::Span.
+  #
+  # Default: nil
+  #
+  # === :line_numbers
+  # Include line numbers in :table, :inline, :list or nil (no line numbers)
+  #
+  # Default: nil
+  #
+  # === :line_number_start
+  # Where to start with line number counting.
+  #
+  # Default: 1
+  #
+  # === :bold_every
+  # Make every +n+-th number appear bold.
+  #
+  # Default: 10
+  #
+  # === :hint
+  # Include some information into the output using the title attribute.
+  # Can be :info (show token type on mouse-over), :info_long (with full path) or :debug (via inspect).
+  #
+  # Default: false
+  class HTML < Encoder
+
+    include Streamable
+    register_for :html
+
+    FILE_EXTENSION = 'html'
+
+    DEFAULT_OPTIONS = {
+      :tab_width => 8,
+
+      :level => :xhtml,
+      :css => :class,
+
+      :style => :cycnus,
+
+      :wrap => nil,
+
+      :line_numbers => nil,
+      :line_number_start => 1,
+      :bold_every => 10,
+
+      :hint => false,
+    }
+
+    helper :classes, :output, :css
+
+    attr_reader :css
+
+  protected
+
+    HTML_ESCAPE = {  #:nodoc:
+      '&' => '&amp;',
+      '"' => '&quot;',
+      '>' => '&gt;',
+      '<' => '&lt;',
+    }
+
+    # This was to prevent illegal HTML.
+    # Strange chars should still be avoided in codes.
+    evil_chars = Array(0x00...0x20) - [?\n, ?\t, ?\s]
+    evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
+    #ansi_chars = Array(0x7f..0xff)
+    #ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
+    # \x9 (\t) and \xA (\n) not included
+    #HTML_ESCAPE_PATTERN = /[\t&"><\0-\x8\xB-\x1f\x7f-\xff]/
+    HTML_ESCAPE_PATTERN = /[\t"&><\0-\x8\xB-\x1f]/
+
+    TOKEN_KIND_TO_INFO = Hash.new { |h, kind|
+      h[kind] =
+        case kind
+        when :pre_constant
+          'Predefined constant'
+        else
+          kind.to_s.gsub(/_/, ' ').gsub(/\b\w/) { $&.capitalize }
+        end
+    }
+
+    # Generate a hint about the given +classes+ in a +hint+ style.
+    #
+    # +hint+ may be :info, :info_long or :debug.
+    def self.token_path_to_hint hint, classes
+      return '' unless hint
+      title =
+        case hint
+        when :info
+          TOKEN_KIND_TO_INFO[classes.first]
+        when :info_long
+          classes.reverse.map { |kind| TOKEN_KIND_TO_INFO[kind] }.join('/')
+        when :debug
+          classes.inspect
+        end
+      " title=\"#{title}\""
+    end
+
+    def setup options
+      super
+
+      @HTML_ESCAPE = HTML_ESCAPE.dup
+      @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
+
+      @opened = [nil]
+      @css = CSS.new options[:style]
+
+      hint = options[:hint]
+      if hint and not [:debug, :info, :info_long].include? hint
+        raise ArgumentError, "Unknown value %p for :hint; expected :info, :debug, false or nil." % hint
+      end
+
+      case options[:css]
+
+      when :class
+        @css_style = Hash.new do |h, k|
+          if k.is_a? Array
+            type = k.first
+          else
+            type = k
+          end
+          c = ClassOfKind[type]
+          if c == :NO_HIGHLIGHT and not hint
+            h[k] = false
+          else
+            title = HTML.token_path_to_hint hint, (k[1..-1] << k.first)
+            h[k] = '<span%s class="%s">' % [title, c]
+          end
+        end
+
+      when :style
+        @css_style = Hash.new do |h, k|
+          if k.is_a? Array
+            styles = k.dup
+          else
+            styles = [k]
+          end
+          type = styles.first
+          classes = styles.map { |c| ClassOfKind[c] }
+          if classes.first == :NO_HIGHLIGHT and not hint
+            h[k] = false
+          else
+            styles.shift if [:delimiter, :modifier, :content, :escape].include? styles.first
+            title = HTML.token_path_to_hint hint, styles
+            classes.delete 'il'
+            style = @css[*classes]
+            h[k] =
+              if style
+                '<span%s style="%s">' % [title, style]
+              else
+                false
+              end
+          end
+        end
+
+      else
+        raise ArgumentError, "Unknown value %p for :css." % options[:css]
+
+      end
+    end
+
+    def finish options
+      not_needed = @opened.shift
+      @out << '</span>' * @opened.size
+      warn '%d tokens still open: %p' % [@opened.size, @opened] unless @opened.empty?
+
+      @out.extend Output
+      @out.css = @css
+      @out.numerize! options[:line_numbers], options
+      @out.wrap! options[:wrap]
+
+      super
+    end
+
+    def token text, type
+      if text.is_a? ::String
+        if text =~ /#{HTML_ESCAPE_PATTERN}/o
+          text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
+        end
+        @opened[0] = type
+        if style = @css_style[@opened]
+          @out << style << text << '</span>'
+        else
+          @out << text
+        end
+      else
+        case text
+        when :open
+          @opened[0] = type
+          @out << (@css_style[@opened] || '<span>')
+          @opened << type
+        when :close
+          if @opened.empty?
+            # nothing to close
+          else
+            if @opened.size == 1 or @opened.last != type
+              raise 'Malformed token stream: Trying to close a token (%p) that is not open. Open are: %p.' % [type, @opened[1..-1]] if $DEBUG
+            end
+            @out << '</span>'
+            @opened.pop
+          end
+        when nil
+          raise 'Token with nil as text was given: %p' % [[text, type]]
+        else
+          raise 'unknown token kind: %p' % text
+        end
+      end
+    end
+
+  end
+
+end
+end
diff --git a/lib/coderay/helpers/plugin.rb b/lib/coderay/helpers/plugin.rb
index 7e90279..742717d 100644
--- a/lib/coderay/helpers/plugin.rb
+++ b/lib/coderay/helpers/plugin.rb
@@ -168,6 +168,15 @@ module PluginHost
     end
   end
 
+  # Makes a map of all loaded plugins.
+  def inspect
+    map = plugin_hash.dup
+    map.each do |id, plugin|
+      map[id] = plugin.to_s[/(?>[\w_]+)$/]
+    end
+    "#{name}[#{host_id}]#{map.inspect}"
+  end
+
 protected
   # Created a new plugin list and stores it to @plugin_hash.
   def create_plugin_hash
@@ -194,15 +203,6 @@ protected
       end
   end
 
-  # Makes a map of all loaded scanners.
-  def inspect
-    map = plugin_hash.dup
-    map.each do |id, plugin|
-      map[id] = plugin.name[/(?>[\w_]+)$/]
-    end
-    map.inspect
-  end
-
   # Loads the map file (see map).
   #
   # This is done automatically when plugin_path is called.
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
index 6d20211..83f73f2 100644
--- a/lib/coderay/scanner.rb
+++ b/lib/coderay/scanner.rb
@@ -1,238 +1,238 @@
-module CodeRay
-
-  require 'coderay/helpers/plugin'
-
-  # = Scanners
-  #
-  # $Id$
-  #
-  # This module holds the Scanner class and its subclasses.
-  # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
-  # can be found in coderay/scanners/ruby.
-  #
-  # Scanner also provides methods and constants for the register
-  # mechanism and the [] method that returns the Scanner class
-  # belonging to the given lang.
-  #
-  # See PluginHost.
-  module Scanners
-    extend PluginHost
-    plugin_path File.dirname(__FILE__), 'scanners'
-
-    require 'strscan'
-
-    # = Scanner
-    #
-    # The base class for all Scanners.
-    #
-    # It is a subclass of Ruby's great +StringScanner+, which
-    # makes it easy to access the scanning methods inside.
-    #
-    # It is also +Enumerable+, so you can use it like an Array of
-    # Tokens:
-    #
-    #   require 'coderay'
-    #   
-    #   c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
-    #   
-    #   for text, kind in c_scanner
-    #     puts text if kind == :operator
-    #   end
-    #   
-    #   # prints: (*==)++;
-    #
-    # OK, this is a very simple example :)
-    # You can also use +map+, +any?+, +find+ and even +sort_by+,
-    # if you want.
-    class Scanner < StringScanner
-      extend Plugin
-      plugin_host Scanners
-
-      # Raised if a Scanner fails while scanning
-      ScanError = Class.new(Exception)
-
-      require 'coderay/helpers/word_list'
-
-      # The default options for all scanner classes.
-      #
-      # Define @default_options for subclasses.
-      DEFAULT_OPTIONS = { :stream => false }
-
-      class << self
-
-        # Returns if the Scanner can be used in streaming mode.
-        def streamable?
-          is_a? Streamable
-        end
-
-        def normify code
-          code = code.to_s.to_unix
-        end
-
-      end
-
-=begin
-## Excluded for speed reasons; protected seems to make methods slow.
-
-  # Save the StringScanner methods from being called.
-  # This would not be useful for highlighting.
-  strscan_public_methods =
-    StringScanner.instance_methods -
-    StringScanner.ancestors[1].instance_methods
-  protected(*strscan_public_methods)
-=end
-
-      # Create a new Scanner.
-      #
-      # * +code+ is the input String and is handled by the superclass
-      #   StringScanner.
-      # * +options+ is a Hash with Symbols as keys.
-      #   It is merged with the default options of the class (you can
-      #   overwrite default options here.)
-      # * +block+ is the callback for streamed highlighting.
-      #
-      # If you set :stream to +true+ in the options, the Scanner uses a
-      # TokenStream with the +block+ as callback to handle the tokens.
-      #
-      # Else, a Tokens object is used.
-      def initialize code='', options = {}, &block
-        @options = self.class::DEFAULT_OPTIONS.merge options
-        raise "I am only the basic Scanner class. I can't scan "\
-          "anything. :( Use my subclasses." if self.class == Scanner
-
-        super Scanner.normify(code)
-
-        @tokens = options[:tokens]
-        if @options[:stream]
-          warn "warning in CodeRay::Scanner.new: :stream is set, "\
-            "but no block was given" unless block_given?
-          raise NotStreamableError, self unless kind_of? Streamable
-          @tokens ||= TokenStream.new(&block)
-        else
-          warn "warning in CodeRay::Scanner.new: Block given, "\
-            "but :stream is #{@options[:stream]}" if block_given?
-          @tokens ||= Tokens.new
-        end
-
-        setup
-      end
-
-      # More mnemonic accessor name for the input string.
-      alias code string
-
-      def reset
-        super
-        reset_instance
-      end
-
-      def string= code
-        code = Scanner.normify(code)
-        super code
-        reset_instance
-      end
-
-      # Scans the code and returns all tokens in a Tokens object.
-      def tokenize new_string=nil, options = {}
-        options = @options.merge(options)
-        self.string = new_string if new_string
-        @cached_tokens =
-          if @options[:stream]  # :stream must have been set already
-            reset unless new_string
-            scan_tokens @tokens, options
-            @tokens
-          else
-            scan_tokens @tokens, options
-          end
-      end
-
-      def tokens
-        @cached_tokens ||= tokenize
-      end
-
-      # Traverses the tokens.
-      def each &block
-        raise ArgumentError,
-          'Cannot traverse TokenStream.' if @options[:stream]
-        tokens.each(&block)
-      end
-      include Enumerable
-
-      # The current line position of the scanner.
-      #
-      # Beware, this is implemented inefficiently. It should be used
-      # for debugging only.
-      def line
-        string[0..pos].count("\n") + 1
-      end
-
-    protected
-
-      # Can be implemented by subclasses to do some initialization
-      # that has to be done once per instance.
-      #
-      # Use reset for initialization that has to be done once per
-      # scan.
-      def setup
-      end
-
-      # This is the central method, and commonly the only one a
-      # subclass implements.
-      #
-      # Subclasses must implement this method; it must return +tokens+
-      # and must only use Tokens#<< for storing scanned tokens!
-      def scan_tokens tokens, options
-        raise NotImplementedError,
-          "#{self.class}#scan_tokens not implemented."
-      end
-
-      def reset_instance
-        @tokens.clear unless @options[:keep_tokens]
-        @cached_tokens = nil
-      end
-
-      # Scanner error with additional status information
-      def raise_inspect msg, tokens, ambit = 30
-        raise ScanError, <<-EOE % [
-
-
-***ERROR in %s: %s
-
-tokens:
-%s
-
-current line: %d  pos = %d
-matched: %p
-bol? = %p,  eos? = %p
-
-surrounding code:
-%p  ~~  %p
-
-
-***ERROR***
-
-        EOE
-        File.basename(caller[0]),
-        msg,
-        tokens.last(10).map { |t| t.inspect }.join("\n"),
-        line, pos,
-        matched, bol?, eos?,
-        string[pos-ambit,ambit],
-        string[pos,ambit],
-        ]
-      end
-
-    end
-
-  end
-end
-
-class String
-  # I love this hack. It seems to silence all dos/unix/mac newline problems.
-  def to_unix
-    if index ?\r
-      gsub(/\r\n?/, "\n")
-    else
-      self
-    end
-  end
-end
+module CodeRay
+
+  require 'coderay/helpers/plugin'
+
+  # = Scanners
+  #
+  # $Id$
+  #
+  # This module holds the Scanner class and its subclasses.
+  # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
+  # can be found in coderay/scanners/ruby.
+  #
+  # Scanner also provides methods and constants for the register
+  # mechanism and the [] method that returns the Scanner class
+  # belonging to the given lang.
+  #
+  # See PluginHost.
+  module Scanners
+    extend PluginHost
+    plugin_path File.dirname(__FILE__), 'scanners'
+
+    require 'strscan'
+
+    # = Scanner
+    #
+    # The base class for all Scanners.
+    #
+    # It is a subclass of Ruby's great +StringScanner+, which
+    # makes it easy to access the scanning methods inside.
+    #
+    # It is also +Enumerable+, so you can use it like an Array of
+    # Tokens:
+    #
+    #   require 'coderay'
+    #   
+    #   c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
+    #   
+    #   for text, kind in c_scanner
+    #     puts text if kind == :operator
+    #   end
+    #   
+    #   # prints: (*==)++;
+    #
+    # OK, this is a very simple example :)
+    # You can also use +map+, +any?+, +find+ and even +sort_by+,
+    # if you want.
+    class Scanner < StringScanner
+      extend Plugin
+      plugin_host Scanners
+
+      # Raised if a Scanner fails while scanning
+      ScanError = Class.new(Exception)
+
+      require 'coderay/helpers/word_list'
+
+      # The default options for all scanner classes.
+      #
+      # Define @default_options for subclasses.
+      DEFAULT_OPTIONS = { :stream => false }
+
+      class << self
+
+        # Returns if the Scanner can be used in streaming mode.
+        def streamable?
+          is_a? Streamable
+        end
+
+        def normify code
+          code = code.to_s.to_unix
+        end
+
+      end
+
+=begin
+## Excluded for speed reasons; protected seems to make methods slow.
+
+  # Save the StringScanner methods from being called.
+  # This would not be useful for highlighting.
+  strscan_public_methods =
+    StringScanner.instance_methods -
+    StringScanner.ancestors[1].instance_methods
+  protected(*strscan_public_methods)
+=end
+
+      # Create a new Scanner.
+      #
+      # * +code+ is the input String and is handled by the superclass
+      #   StringScanner.
+      # * +options+ is a Hash with Symbols as keys.
+      #   It is merged with the default options of the class (you can
+      #   overwrite default options here.)
+      # * +block+ is the callback for streamed highlighting.
+      #
+      # If you set :stream to +true+ in the options, the Scanner uses a
+      # TokenStream with the +block+ as callback to handle the tokens.
+      #
+      # Else, a Tokens object is used.
+      def initialize code='', options = {}, &block
+        @options = self.class::DEFAULT_OPTIONS.merge options
+        raise "I am only the basic Scanner class. I can't scan "\
+          "anything. :( Use my subclasses." if self.class == Scanner
+
+        super Scanner.normify(code)
+
+        @tokens = options[:tokens]
+        if @options[:stream]
+          warn "warning in CodeRay::Scanner.new: :stream is set, "\
+            "but no block was given" unless block_given?
+          raise NotStreamableError, self unless kind_of? Streamable
+          @tokens ||= TokenStream.new(&block)
+        else
+          warn "warning in CodeRay::Scanner.new: Block given, "\
+            "but :stream is #{@options[:stream]}" if block_given?
+          @tokens ||= Tokens.new
+        end
+
+        setup
+      end
+
+      # More mnemonic accessor name for the input string.
+      alias code string
+
+      def reset
+        super
+        reset_instance
+      end
+
+      def string= code
+        code = Scanner.normify(code)
+        super code
+        reset_instance
+      end
+
+      # Scans the code and returns all tokens in a Tokens object.
+      def tokenize new_string=nil, options = {}
+        options = @options.merge(options)
+        self.string = new_string if new_string
+        @cached_tokens =
+          if @options[:stream]  # :stream must have been set already
+            reset unless new_string
+            scan_tokens @tokens, options
+            @tokens
+          else
+            scan_tokens @tokens, options
+          end
+      end
+
+      def tokens
+        @cached_tokens ||= tokenize
+      end
+
+      # Traverses the tokens.
+      def each &block
+        raise ArgumentError,
+          'Cannot traverse TokenStream.' if @options[:stream]
+        tokens.each(&block)
+      end
+      include Enumerable
+
+      # The current line position of the scanner.
+      #
+      # Beware, this is implemented inefficiently. It should be used
+      # for debugging only.
+      def line
+        string[0..pos].count("\n") + 1
+      end
+
+    protected
+
+      # Can be implemented by subclasses to do some initialization
+      # that has to be done once per instance.
+      #
+      # Use reset for initialization that has to be done once per
+      # scan.
+      def setup
+      end
+
+      # This is the central method, and commonly the only one a
+      # subclass implements.
+      #
+      # Subclasses must implement this method; it must return +tokens+
+      # and must only use Tokens#<< for storing scanned tokens!
+      def scan_tokens tokens, options
+        raise NotImplementedError,
+          "#{self.class}#scan_tokens not implemented."
+      end
+
+      def reset_instance
+        @tokens.clear unless @options[:keep_tokens]
+        @cached_tokens = nil
+      end
+
+      # Scanner error with additional status information
+      def raise_inspect msg, tokens, state = nil, ambit = 30
+        raise ScanError, <<-EOE % [
+
+
+***ERROR in %s: %s
+
+tokens:
+%s
+
+current line: %d  pos = %d
+matched: %p  state: %p
+bol? = %p,  eos? = %p
+
+surrounding code:
+%p  ~~  %p
+
+
+***ERROR***
+
+        EOE
+        File.basename(caller[0]),
+        msg,
+        tokens.last(10).map { |t| t.inspect }.join("\n"),
+        line, pos,
+        matched, state, bol?, eos?,
+        string[pos-ambit,ambit],
+        string[pos,ambit],
+        ]
+      end
+
+    end
+
+  end
+end
+
+class String
+  # I love this hack. It seems to silence all dos/unix/mac newline problems.
+  def to_unix
+    if index ?\r
+      gsub(/\r\n?/, "\n")
+    else
+      self
+    end
+  end
+end
diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb
index 6268a6c..1c5fc89 100644
--- a/lib/coderay/scanners/_map.rb
+++ b/lib/coderay/scanners/_map.rb
@@ -1,14 +1,15 @@
-module CodeRay
-module Scanners
-
-  map :cpp => :c,
-    :plain => :plaintext,
-    :pascal => :delphi,
-    :irb => :ruby,
-    :xml => :html,
-    :xhtml => :nitro_html
-
-  default :plain
-
-end
-end
+module CodeRay
+module Scanners
+
+  map :cpp => :c,
+    :plain => :plaintext,
+    :pascal => :delphi,
+    :irb => :ruby,
+    :xml => :html,
+    :xhtml => :nitro_xhtml,
+    :nitro => :nitro_xhtml
+
+  default :plain
+
+end
+end
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb
index 66b8de1..be113d0 100644
--- a/lib/coderay/scanners/c.rb
+++ b/lib/coderay/scanners/c.rb
@@ -1,155 +1,163 @@
-module CodeRay
-module Scanners
-
-  class C < Scanner
-
-    register_for :c
-
-    RESERVED_WORDS = [
-      'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
-      'for', 'goto', 'if', 'return', 'switch', 'while',
-      'struct', 'union', 'enum', 'typedef',
-      'static', 'register', 'auto', 'extern',
-      'sizeof',
-      'volatile', 'const',  # C89
-      'inline', 'restrict', # C99
-    ]
-
-    PREDEFINED_TYPES = [
-      'int', 'long', 'short', 'char', 'void',
-      'signed', 'unsigned', 'float', 'double',
-      'bool', 'complex',  # C99
-    ]
-
-    PREDEFINED_CONSTANTS = [
-      'EOF', 'NULL',
-      'true', 'false',  # C99
-    ]
-
-    IDENT_KIND = WordList.new(:ident).
-      add(RESERVED_WORDS, :reserved).
-      add(PREDEFINED_TYPES, :pre_type).
-      add(PREDEFINED_CONSTANTS, :pre_constant)
-
-    ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
-    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
-
-    def scan_tokens tokens, options
-
-      state = :initial
-
-      until eos?
-
-        kind = :error
-        match = nil
-
-        case state
-
-        when :initial
-
-          if scan(/ \s+ | \\\n /x)
-            kind = :space
-
-          elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
-            kind = :comment
-
-          elsif match = scan(/ \# \s* if \s* 0 /x)
-            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
-            kind = :comment
-
-          elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
-            kind = :operator
-
-          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
-            kind = IDENT_KIND[match]
-            if kind == :ident and check(/:(?!:)/)
-              match << scan(/:/)
-              kind = :label
-            end
-
-          elsif match = scan(/L?"/)
-            tokens << [:open, :string]
-            if match[0] == ?L
-              tokens << ['L', :modifier]
-              match = '"'
-            end
-            state = :string
-            kind = :delimiter
-
-          elsif scan(/#\s*(\w*)/)
-            kind = :preprocessor  # FIXME multiline preprocs
-            state = :include_expected if self[1] == 'include'
-
-          elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
-            kind = :char
-
-          elsif scan(/0[xX][0-9A-Fa-f]+/)
-            kind = :hex
-
-          elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
-            kind = :oct
-
-          elsif scan(/(?:\d+)(?![.eEfF])/)
-            kind = :integer
-
-          elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
-            kind = :float
-
-          else
-            getch
-          end
-
-        when :string
-          if scan(/[^\\"]+/)
-            kind = :content
-          elsif scan(/"/)
-            tokens << ['"', :delimiter]
-            tokens << [:close, :string]
-            state = :initial
-            next
-          elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
-            kind = :char
-          elsif scan(/ \\ | $ /x)
-            kind = :error
-            state = :initial
-          else
-            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
-          end
-
-        when :include_expected
-          if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
-            kind = :include
-            state = :initial
-
-          elsif match = scan(/\s+/)
-            kind = :space
-            state = :initial if match.index ?\n
-
-          else
-            getch
-
-          end
-
-        else
-          raise_inspect 'Unknown state', tokens
-
-        end
-
-        match ||= matched
-        if $DEBUG and (not kind or kind == :error)
-          raise_inspect 'Error token %p in line %d' %
-            [[match, kind], line], tokens
-        end
-        raise_inspect 'Empty token', tokens unless match
-
-        tokens << [match, kind]
-
-      end
-
-      tokens
-    end
-
-  end
-
-end
-end
+module CodeRay
+module Scanners
+
+  class C < Scanner
+
+    register_for :c
+
+    RESERVED_WORDS = [
+      'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
+      'for', 'goto', 'if', 'return', 'switch', 'while',
+      'struct', 'union', 'enum', 'typedef',
+      'static', 'register', 'auto', 'extern',
+      'sizeof',
+      'volatile', 'const',  # C89
+      'inline', 'restrict', # C99
+    ]
+
+    PREDEFINED_TYPES = [
+      'int', 'long', 'short', 'char', 'void',
+      'signed', 'unsigned', 'float', 'double',
+      'bool', 'complex',  # C99
+    ]
+
+    PREDEFINED_CONSTANTS = [
+      'EOF', 'NULL',
+      'true', 'false',  # C99
+    ]
+
+    IDENT_KIND = WordList.new(:ident).
+      add(RESERVED_WORDS, :reserved).
+      add(PREDEFINED_TYPES, :pre_type).
+      add(PREDEFINED_CONSTANTS, :pre_constant)
+
+    ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+
+    def scan_tokens tokens, options
+
+      state = :initial
+
+      until eos?
+
+        kind = nil
+        match = nil
+
+        case state
+
+        when :initial
+
+          if scan(/ \s+ | \\\n /x)
+            kind = :space
+
+          elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+            kind = :comment
+
+          elsif match = scan(/ \# \s* if \s* 0 /x)
+            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
+            kind = :comment
+
+          elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
+            kind = :operator
+
+          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+            kind = IDENT_KIND[match]
+            if kind == :ident and check(/:(?!:)/)
+              match << scan(/:/)
+              kind = :label
+            end
+
+          elsif match = scan(/L?"/)
+            tokens << [:open, :string]
+            if match[0] == ?L
+              tokens << ['L', :modifier]
+              match = '"'
+            end
+            state = :string
+            kind = :delimiter
+
+          elsif scan(/#\s*(\w*)/)
+            kind = :preprocessor  # FIXME multiline preprocs
+            state = :include_expected if self[1] == 'include'
+
+          elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
+            kind = :char
+
+          elsif scan(/0[xX][0-9A-Fa-f]+/)
+            kind = :hex
+
+          elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+            kind = :oct
+
+          elsif scan(/(?:\d+)(?![.eEfF])/)
+            kind = :integer
+
+          elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+            kind = :float
+
+          else
+            getch
+            kind = :error
+
+          end
+
+        when :string
+          if scan(/[^\\\n"]+/)
+            kind = :content
+          elsif scan(/"/)
+            tokens << ['"', :delimiter]
+            tokens << [:close, :string]
+            state = :initial
+            next
+          elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+            kind = :char
+          elsif scan(/ \\ | $ /x)
+            tokens << [:close, :string]
+            kind = :error
+            state = :initial
+          else
+            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
+          end
+
+        when :include_expected
+          if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+            kind = :include
+            state = :initial
+
+          elsif match = scan(/\s+/)
+            kind = :space
+            state = :initial if match.index ?\n
+
+          else
+            getch
+            kind = :error
+
+          end
+
+        else
+          raise_inspect 'Unknown state', tokens
+
+        end
+
+        match ||= matched
+        if $DEBUG and not kind
+          raise_inspect 'Error token %p in line %d' %
+            [[match, kind], line], tokens
+        end
+        raise_inspect 'Empty token', tokens unless match
+
+        tokens << [match, kind]
+
+      end
+
+      if state == :string
+        tokens << [:close, :string]
+      end
+
+      tokens
+    end
+
+  end
+
+end
+end
diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb
index d9d9e1d..c141874 100644
--- a/lib/coderay/scanners/delphi.rb
+++ b/lib/coderay/scanners/delphi.rb
@@ -1,129 +1,131 @@
-module CodeRay
-module Scanners
-  
-  class Delphi < Scanner
-
-    register_for :delphi
-    
-    RESERVED_WORDS = [
-      'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
-      'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
-      'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
-      'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
-      'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
-      'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
-      'procedure', 'program', 'property', 'raise', 'record', 'repeat',
-      'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
-      'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
-      'xor', 'on'
-    ]
-
-    DIRECTIVES = [
-      'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
-      'contains', 'deprecated', 'dispid', 'dynamic', 'export',
-      'external', 'far', 'forward', 'implements', 'local', 
-      'near', 'nodefault', 'on', 'overload', 'override',
-      'package', 'pascal', 'platform', 'private', 'protected', 'public',
-      'published', 'read', 'readonly', 'register', 'reintroduce',
-      'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
-      'virtual', 'write', 'writeonly'
-    ]
-
-    IDENT_KIND = CaseIgnoringWordList.new(:ident).
-      add(RESERVED_WORDS, :reserved).
-      add(DIRECTIVES, :directive)
-
-    def scan_tokens tokens, options
-
-      state = :initial
-
-      until eos?
-
-        kind = :error
-        match = nil
-
-        if state == :initial
-          
-          if scan(/ \s+ /x)
-            kind = :space
-            
-          elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
-            kind = :preprocessor
-            
-          elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
-            kind = :comment
-            
-          elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
-            kind = :operator
-            
-          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
-            kind = IDENT_KIND[match]
-            
-          elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
-            tokens << [:open, :char]
-            tokens << ["'", :delimiter]
-            tokens << [self[1], :content]
-            tokens << ["'", :delimiter]
-            tokens << [:close, :char]
-            next
-            
-          elsif match = scan(/ ' /x)
-            tokens << [:open, :string]
-            state = :string
-            kind = :delimiter
-            
-          elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
-            kind = :char
-            
-          elsif scan(/ \$ [0-9A-Fa-f]+ /x)
-            kind = :hex
-            
-          elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
-            kind = :integer
-            
-          elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
-            kind = :float
-
-          else
-            getch
-          end
-          
-        elsif state == :string
-          if scan(/[^\n']+/)
-            kind = :content
-          elsif scan(/''/)
-            kind = :char
-          elsif scan(/'/)
-            tokens << ["'", :delimiter]
-            tokens << [:close, :string]
-            state = :initial
-            next
-          elsif scan(/\n/)
-            state = :initial
-          else
-            raise "else case \' reached; %p not handled." % peek(1), tokens
-          end
-          
-        else
-          raise 'else-case reached', tokens
-          
-        end
-        
-        match ||= matched
-        if $DEBUG and (not kind or kind == :error)
-          raise_inspect 'Error token %p in line %d' %
-            [[match, kind], line], tokens
-        end
-        raise_inspect 'Empty token', tokens unless match
-
-        tokens << [match, kind]
-        
-      end
-      
-      tokens
-    end
-
-  end
-
-end
-end
+module CodeRay
+module Scanners
+  
+  class Delphi < Scanner
+
+    register_for :delphi
+    
+    RESERVED_WORDS = [
+      'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
+      'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
+      'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
+      'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
+      'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
+      'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
+      'procedure', 'program', 'property', 'raise', 'record', 'repeat',
+      'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
+      'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
+      'xor', 'on'
+    ]
+
+    DIRECTIVES = [
+      'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
+      'contains', 'deprecated', 'dispid', 'dynamic', 'export',
+      'external', 'far', 'forward', 'implements', 'local', 
+      'near', 'nodefault', 'on', 'overload', 'override',
+      'package', 'pascal', 'platform', 'private', 'protected', 'public',
+      'published', 'read', 'readonly', 'register', 'reintroduce',
+      'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
+      'virtual', 'write', 'writeonly'
+    ]
+
+    IDENT_KIND = CaseIgnoringWordList.new(:ident).
+      add(RESERVED_WORDS, :reserved).
+      add(DIRECTIVES, :directive)
+
+    def scan_tokens tokens, options
+
+      state = :initial
+
+      until eos?
+
+        kind = nil
+        match = nil
+
+        if state == :initial
+          
+          if scan(/ \s+ /x)
+            kind = :space
+            
+          elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+            kind = :preprocessor
+            
+          elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+            kind = :comment
+            
+          elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
+            kind = :operator
+            
+          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+            kind = IDENT_KIND[match]
+            
+          elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
+            tokens << [:open, :char]
+            tokens << ["'", :delimiter]
+            tokens << [self[1], :content]
+            tokens << ["'", :delimiter]
+            tokens << [:close, :char]
+            next
+            
+          elsif match = scan(/ ' /x)
+            tokens << [:open, :string]
+            state = :string
+            kind = :delimiter
+            
+          elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+            kind = :char
+            
+          elsif scan(/ \$ [0-9A-Fa-f]+ /x)
+            kind = :hex
+            
+          elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+            kind = :integer
+            
+          elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+            kind = :float
+
+          else
+            kind = :error
+            getch
+
+          end
+          
+        elsif state == :string
+          if scan(/[^\n']+/)
+            kind = :content
+          elsif scan(/''/)
+            kind = :char
+          elsif scan(/'/)
+            tokens << ["'", :delimiter]
+            tokens << [:close, :string]
+            state = :initial
+            next
+          elsif scan(/\n/)
+            state = :initial
+          else
+            raise "else case \' reached; %p not handled." % peek(1), tokens
+          end
+          
+        else
+          raise 'else-case reached', tokens
+          
+        end
+        
+        match ||= matched
+        if $DEBUG and not kind
+          raise_inspect 'Error token %p in line %d' %
+            [[match, kind], line], tokens
+        end
+        raise_inspect 'Empty token', tokens unless match
+
+        tokens << [match, kind]
+        
+      end
+      
+      tokens
+    end
+
+  end
+
+end
+end
diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb
index 7cdc07e..181e5d3 100644
--- a/lib/coderay/scanners/html.rb
+++ b/lib/coderay/scanners/html.rb
@@ -1,167 +1,174 @@
-module CodeRay
-module Scanners
-
-  # HTML Scanner
-  #
-  # $Id$
-  class HTML < Scanner
-
-    include Streamable
-    register_for :html
-
-    ATTR_NAME = /[\w.:-]+/
-    ATTR_VALUE_UNQUOTED = ATTR_NAME
-    TAG_END = /\/?>/
-    HEX = /[0-9a-fA-F]/
-    ENTITY = /
-      &
-      (?:
-        \w+
-      |
-        \#
-        (?:
-          \d+
-        |
-          x#{HEX}+
-        )
-      )
-      ;
-    /ox
-
-    PLAIN_STRING_CONTENT = {
-      "'" => /[^&'>\n]+/,
-      '"' => /[^&">\n]+/,
-    }
-
-  private
-    def setup
-      @state = :initial
-      @plain_string_content = nil
-    end
-
-    def scan_tokens tokens, options
-
-      state = @state
-      plain_string_content = @plain_string_content
-
-      until eos?
-
-        kind = :error
-        match = nil
-
-        if scan(/\s+/m)
-          kind = :space
-
-        else
-
-          case state
-
-          when :initial
-            if scan(/<!--.*?-->/m)
-              kind = :comment
-            elsif scan(/<!DOCTYPE.*?>/m)
-              kind = :preprocessor
-            elsif scan(/<\?xml.*?\?>/m)
-              kind = :preprocessor
-            elsif scan(/<\?.*?\?>|<%.*?%>/m)
-              kind = :comment
-            elsif scan(/<\/[-\w_.:]*>/m)
-              kind = :tag
-            elsif match = scan(/<[-\w_.:]*>?/m)
-              kind = :tag
-              state = :attribute unless match[-1] == ?>
-            elsif scan(/[^<>&]+/)
-              kind = :plain
-            elsif scan(/#{ENTITY}/ox)
-              kind = :entity
-            elsif scan(/[>&]/)
-              kind = :error
-            else
-              raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
-            end
-
-          when :attribute
-            if scan(/#{TAG_END}/)
-              kind = :tag
-              state = :initial
-            elsif scan(/#{ATTR_NAME}/o)
-              kind = :attribute_name
-              state = :attribute_equal
-            else
-              getch
-            end
-
-          when :attribute_equal
-            if scan(/=/)
-              kind = :operator
-              state = :attribute_value
-            elsif scan(/#{ATTR_NAME}/o)
-              kind = :attribute_name
-            elsif scan(/#{TAG_END}/o)
-              kind = :tag
-              state = :initial
-            elsif scan(/./)
-              state = :attribute
-            end
-
-          when :attribute_value
-            if scan(/#{ATTR_VALUE_UNQUOTED}/o)
-              kind = :attribute_value
-              state = :attribute
-            elsif match = scan(/["']/)
-              tokens << [:open, :string]
-              state = :attribute_value_string
-              plain_string_content = PLAIN_STRING_CONTENT[match]
-              kind = :delimiter
-            elsif scan(/#{TAG_END}/o)
-              kind = :tag
-              state = :initial
-            else
-              getch
-            end
-
-          when :attribute_value_string
-            if scan(plain_string_content)
-              kind = :content
-            elsif scan(/['"]/)
-              tokens << [matched, :delimiter]
-              tokens << [:close, :string]
-              state = :attribute
-              next
-            elsif scan(/#{ENTITY}/ox)
-              kind = :entity
-            elsif scan(/[\n>]/)
-              tokens << [:close, :string]
-              kind = :error
-              state = :initial
-            end
-
-          else
-            raise_inspect 'Unknown state: %p' % [state], tokens
-
-          end
-
-        end
-
-        match ||= matched
-        if $DEBUG and (not kind or kind == :error)
-          raise_inspect 'Error token %p in line %d' %
-            [[match, kind], line], tokens
-        end
-        raise_inspect 'Empty token', tokens unless match
-
-        tokens << [match, kind]
-      end
-
-      if options[:keep_state]
-        @state = state
-        @plain_string_content = plain_string_content
-      end
-
-      tokens
-    end
-
-  end
-
-end
-end
+module CodeRay
+module Scanners
+
+  # HTML Scanner
+  #
+  # $Id$
+  class HTML < Scanner
+
+    include Streamable
+    register_for :html
+
+    ATTR_NAME = /[\w.:-]+/
+    ATTR_VALUE_UNQUOTED = ATTR_NAME
+    TAG_END = /\/?>/
+    HEX = /[0-9a-fA-F]/
+    ENTITY = /
+      &
+      (?:
+        \w+
+      |
+        \#
+        (?:
+          \d+
+        |
+          x#{HEX}+
+        )
+      )
+      ;
+    /ox
+
+    PLAIN_STRING_CONTENT = {
+      "'" => /[^&'>\n]+/,
+      '"' => /[^&">\n]+/,
+    }
+
+    def reset
+      super
+      @state = :initial
+    end
+
+  private
+    def setup
+      @state = :initial
+      @plain_string_content = nil
+    end
+
+    def scan_tokens tokens, options
+
+      state = @state
+      plain_string_content = @plain_string_content
+
+      until eos?
+
+        kind = nil
+        match = nil
+
+        if scan(/\s+/m)
+          kind = :space
+
+        else
+
+          case state
+
+          when :initial
+            if scan(/<!--.*?-->/m)
+              kind = :comment
+            elsif scan(/<!DOCTYPE.*?>/m)
+              kind = :preprocessor
+            elsif scan(/<\?xml.*?\?>/m)
+              kind = :preprocessor
+            elsif scan(/<\?.*?\?>|<%.*?%>/m)
+              kind = :comment
+            elsif scan(/<\/[-\w_.:]*>/m)
+              kind = :tag
+            elsif match = scan(/<[-\w_.:]+>?/m)
+              kind = :tag
+              state = :attribute unless match[-1] == ?>
+            elsif scan(/[^<>&]+/)
+              kind = :plain
+            elsif scan(/#{ENTITY}/ox)
+              kind = :entity
+            elsif scan(/[<>&]/)
+              kind = :error
+            else
+              raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
+            end
+
+          when :attribute
+            if scan(/#{TAG_END}/)
+              kind = :tag
+              state = :initial
+            elsif scan(/#{ATTR_NAME}/o)
+              kind = :attribute_name
+              state = :attribute_equal
+            else
+              kind = :error
+              getch
+            end
+
+          when :attribute_equal
+            if scan(/=/)
+              kind = :operator
+              state = :attribute_value
+            elsif scan(/#{ATTR_NAME}/o)
+              kind = :attribute_name
+            elsif scan(/#{TAG_END}/o)
+              kind = :tag
+              state = :initial
+            elsif scan(/./)
+              state = :attribute
+            end
+
+          when :attribute_value
+            if scan(/#{ATTR_VALUE_UNQUOTED}/o)
+              kind = :attribute_value
+              state = :attribute
+            elsif match = scan(/["']/)
+              tokens << [:open, :string]
+              state = :attribute_value_string
+              plain_string_content = PLAIN_STRING_CONTENT[match]
+              kind = :delimiter
+            elsif scan(/#{TAG_END}/o)
+              kind = :tag
+              state = :initial
+            else
+              kind = :error
+              getch
+            end
+
+          when :attribute_value_string
+            if scan(plain_string_content)
+              kind = :content
+            elsif scan(/['"]/)
+              tokens << [matched, :delimiter]
+              tokens << [:close, :string]
+              state = :attribute
+              next
+            elsif scan(/#{ENTITY}/ox)
+              kind = :entity
+            elsif scan(/[\n>]/)
+              tokens << [:close, :string]
+              kind = :error
+              state = :initial
+            end
+
+          else
+            raise_inspect 'Unknown state: %p' % [state], tokens
+
+          end
+
+        end
+
+        match ||= matched
+        if $DEBUG and not kind
+          raise_inspect 'Error token %p in line %d' %
+            [[match, kind], line], tokens, state
+        end
+        raise_inspect 'Empty token', tokens unless match
+
+        tokens << [match, kind]
+      end
+
+      if options[:keep_state]
+        @state = state
+        @plain_string_content = plain_string_content
+      end
+
+      tokens
+    end
+
+  end
+
+end
+end
diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb
deleted file mode 100644
index 5955195..0000000
--- a/lib/coderay/scanners/nitro_html.rb
+++ /dev/null
@@ -1,125 +0,0 @@
-module CodeRay
-module Scanners
-
-  load :html
-  load :ruby
-
-  # RHTML Scanner
-  #
-  # $Id$
-  class NitroHTML < Scanner
-
-    include Streamable
-    register_for :nitro_html
-
-    NITRO_RUBY_BLOCK = /
-      <\?r
-      (?>
-        [^\?]*
-        (?> \?(?!>) [^\?]* )*
-      )
-      (?: \?> )?
-    |
-      <ruby>
-      (?>
-        [^<]*
-        (?> <(?!\/ruby>) [^<]* )*
-      )
-      (?: <\/ruby> )?
-    |
-      <%
-      (?>
-        [^%]*
-        (?> %(?!>) [^%]* )*
-      )
-      (?: %> )?
-    /mx
-
-    NITRO_VALUE_BLOCK = /
-      \#
-      (?:
-        \{
-        [^{}]*
-        (?>
-          \{ [^}]* \}
-          (?> [^{}]* )
-        )*
-        \}?
-      | \| [^|]* \|?
-      | \( [^)]* \)?
-      | \[ [^\]]* \]?
-      | \\ [^\\]* \\?
-      )
-    /x
-
-    NITRO_ENTITY = /
-      % (?: \#\d+ | \w+ ) ;
-    /
-
-    START_OF_RUBY = /
-      (?=[<\#%])
-      < (?: \?r | % | ruby> )
-    | \# [{(|]
-    | % (?: \#\d+ | \w+ ) ;
-    /x
-
-    CLOSING_PAREN = Hash.new do |h, p|
-      h[p] = p
-    end.update( {
-      '(' => ')',
-      '[' => ']',
-      '{' => '}',
-    } )
-
-  private
-
-    def setup
-      @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
-      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
-    end
-
-    def scan_tokens tokens, options
-
-      until eos?
-
-        if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
-          @html_scanner.tokenize match
-
-        elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
-          start_tag = match[0,2]
-          delimiter = CLOSING_PAREN[start_tag[1,1]]
-          end_tag = match[-1,1] == delimiter ? delimiter : ''
-          tokens << [:open, :inline]
-          tokens << [start_tag, :delimiter]
-          code = match[start_tag.size .. -1 - end_tag.size]
-          @ruby_scanner.tokenize code
-          tokens << [end_tag, :delimiter] unless end_tag.empty?
-          tokens << [:close, :inline]
-
-        elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
-          start_tag = '<?r'
-          end_tag = match[-2,2] == '?>' ? '?>' : ''
-          tokens << [:open, :inline]
-          tokens << [start_tag, :delimiter]
-          code = match[start_tag.size .. -(end_tag.size)-1]
-          @ruby_scanner.tokenize code
-          tokens << [end_tag, :delimiter] unless end_tag.empty?
-          tokens << [:close, :inline]
-
-        elsif entity = scan(/#{NITRO_ENTITY}/o)
-          tokens << [entity, :entity]
-
-        else
-          raise_inspect 'else-case reached!', tokens
-        end
-
-      end
-
-      tokens
-
-    end
-
-  end
-
-end
-end
diff --git a/lib/coderay/scanners/nitro_xhtml.rb b/lib/coderay/scanners/nitro_xhtml.rb
new file mode 100644
index 0000000..baef162
--- /dev/null
+++ b/lib/coderay/scanners/nitro_xhtml.rb
@@ -0,0 +1,130 @@
+module CodeRay
+module Scanners
+
+  load :html
+  load :ruby
+
+  # Nitro XHTML Scanner
+  #
+  # $Id$
+  class NitroXHTML < Scanner
+
+    include Streamable
+    register_for :nitro_xhtml
+
+    NITRO_RUBY_BLOCK = /
+      <\?r
+      (?>
+        [^\?]*
+        (?> \?(?!>) [^\?]* )*
+      )
+      (?: \?> )?
+    |
+      <ruby>
+      (?>
+        [^<]*
+        (?> <(?!\/ruby>) [^<]* )*
+      )
+      (?: <\/ruby> )?
+    |
+      <%
+      (?>
+        [^%]*
+        (?> %(?!>) [^%]* )*
+      )
+      (?: %> )?
+    /mx
+
+    NITRO_VALUE_BLOCK = /
+      \#
+      (?:
+        \{
+        [^{}]*
+        (?>
+          \{ [^}]* \}
+          (?> [^{}]* )
+        )*
+        \}?
+      | \| [^|]* \|?
+      | \( [^)]* \)?
+      | \[ [^\]]* \]?
+      | \\ [^\\]* \\?
+      )
+    /x
+
+    NITRO_ENTITY = /
+      % (?: \#\d+ | \w+ ) ;
+    /
+
+    START_OF_RUBY = /
+      (?=[<\#%])
+      < (?: \?r | % | ruby> )
+    | \# [{(|]
+    | % (?: \#\d+ | \w+ ) ;
+    /x
+
+    CLOSING_PAREN = Hash.new do |h, p|
+      h[p] = p
+    end.update( {
+      '(' => ')',
+      '[' => ']',
+      '{' => '}',
+    } )
+
+  private
+
+    def setup
+      @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+    end
+
+    def reset_instance
+      super
+      @html_scanner.reset
+    end
+
+    def scan_tokens tokens, options
+
+      until eos?
+
+        if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
+          @html_scanner.tokenize match
+
+        elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
+          start_tag = match[0,2]
+          delimiter = CLOSING_PAREN[start_tag[1,1]]
+          end_tag = match[-1,1] == delimiter ? delimiter : ''
+          tokens << [:open, :inline]
+          tokens << [start_tag, :delimiter]
+          code = match[start_tag.size .. -1 - end_tag.size]
+          @ruby_scanner.tokenize code
+          tokens << [end_tag, :delimiter] unless end_tag.empty?
+          tokens << [:close, :inline]
+
+        elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
+          start_tag = '<?r'
+          end_tag = match[-2,2] == '?>' ? '?>' : ''
+          tokens << [:open, :inline]
+          tokens << [start_tag, :delimiter]
+          code = match[start_tag.size .. -(end_tag.size)-1]
+          @ruby_scanner.tokenize code
+          tokens << [end_tag, :delimiter] unless end_tag.empty?
+          tokens << [:close, :inline]
+
+        elsif entity = scan(/#{NITRO_ENTITY}/o)
+          tokens << [entity, :entity]
+
+        else
+          raise_inspect 'else-case reached!', tokens
+        end
+
+      end
+
+      tokens
+
+    end
+
+  end
+
+end
+end
diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb
index 15a7566..8afb727 100644
--- a/lib/coderay/scanners/rhtml.rb
+++ b/lib/coderay/scanners/rhtml.rb
@@ -1,65 +1,73 @@
-module CodeRay
-module Scanners
-
-  load :html
-  load :ruby
-
-  # RHTML Scanner
-  #
-  # $Id$
-  class RHTML < Scanner
-
-    include Streamable
-    register_for :rhtml
-
-    ERB_RUBY_BLOCK = /
-      <%(?!%)[=-]?
-      (?>
-        [^%]*
-        (?> %(?!>) [^%]* )*
-      )
-      (?: %> )?
-    /x
-
-    START_OF_ERB = /
-      <%(?!%)
-    /x
-
-  private
-
-    def setup
-      @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
-      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
-    end
-
-    def scan_tokens tokens, options
-
-      until eos?
-
-        if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
-          @html_scanner.tokenize match
-
-        elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
-          start_tag = match[/\A<%[-=]?/]
-          end_tag = match[/%?>?\z/]
-          tokens << [:open, :inline]
-          tokens << [start_tag, :delimiter]
-          code = match[start_tag.size .. -1 - end_tag.size]
-          @ruby_scanner.tokenize code
-          tokens << [end_tag, :delimiter] unless end_tag.empty?
-          tokens << [:close, :inline]
-
-        else
-          raise_inspect 'else-case reached!', tokens
-        end
-
-      end
-
-      tokens
-
-    end
-
-  end
-
-end
-end
+module CodeRay
+module Scanners
+
+  load :html
+  load :ruby
+
+  # RHTML Scanner
+  #
+  # $Id$
+  class RHTML < Scanner
+
+    include Streamable
+    register_for :rhtml
+
+    ERB_RUBY_BLOCK = /
+      <%(?!%)[=-]?
+      (?>
+        [^\-%]*    # normal*
+        (?>        # special
+          (?: %(?!>) | -(?!%>) )
+          [^\-%]*  # normal*
+        )*
+      )
+      (?: -?%> )?
+    /x
+
+    START_OF_ERB = /
+      <%(?!%)
+    /x
+
+  private
+
+    def setup
+      @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+    end
+
+    def reset_instance
+      super
+      @html_scanner.reset
+    end
+
+    def scan_tokens tokens, options
+
+      until eos?
+
+        if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
+          @html_scanner.tokenize match
+
+        elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
+          start_tag = match[/\A<%[-=]?/]
+          end_tag = match[/-?%?>?\z/]
+          tokens << [:open, :inline]
+          tokens << [start_tag, :delimiter]
+          code = match[start_tag.size .. -1 - end_tag.size]
+          @ruby_scanner.tokenize code
+          tokens << [end_tag, :delimiter] unless end_tag.empty?
+          tokens << [:close, :inline]
+
+        else
+          raise_inspect 'else-case reached!', tokens
+        end
+
+      end
+
+      tokens
+
+    end
+
+  end
+
+end
+end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
index 3ce5003..76c87ca 100644
--- a/lib/coderay/scanners/ruby.rb
+++ b/lib/coderay/scanners/ruby.rb
@@ -1,400 +1,404 @@
-module CodeRay
-module Scanners
-
-  # This scanner is really complex, since Ruby _is_ a complex language!
-  #
-  # It tries to highlight 100% of all common code,
-  # and 90% of strange codes.
-  #
-  # It is optimized for HTML highlighting, and is not very useful for
-  # parsing or pretty printing.
-  #
-  # For now, I think it's better than the scanners in VIM or Syntax, or
-  # any highlighter I was able to find, except Caleb's RubyLexer.
-  #
-  # I hope it's also better than the rdoc/irb lexer.
-  class Ruby < Scanner
-
-    include Streamable
-
-    register_for :ruby
-
-    helper :patterns
-    
-    DEFAULT_OPTIONS = {
-      :parse_regexps => true,
-    }
-
-  private
-    def scan_tokens tokens, options
-      parse_regexp = false # options[:parse_regexps]
-      first_bake = saved_tokens = nil
-      last_token_dot = false
-      fancy_allowed = regexp_allowed = true
-      heredocs = nil
-      last_state = nil
-      state = :initial
-      depth = nil
-      states = []
-
-      patterns = Patterns  # avoid constant lookup
-
-      until eos?
-        type = :error
-        match = nil
-        kind = nil
-
-        if state.instance_of? patterns::StringState
-# {{{
-          match = scan_until(state.pattern) || scan_until(/\z/)
-          tokens << [match, :content] unless match.empty?
-          break if eos?
-          
-          if state.heredoc and self[1]  # end of heredoc
-            match = getch.to_s
-            match << scan_until(/$/) unless eos?
-            tokens << [match, :delimiter]
-            tokens << [:close, state.type]
-            state = state.next_state
-            next
-          end
-          
-          case match = getch
-          
-          when state.delim
-            if state.paren
-              state.paren_depth -= 1 
-              if state.paren_depth > 0
-                tokens << [match, :nesting_delimiter]
-                next
-              end
-            end
-            tokens << [match, :delimiter]
-            if state.type == :regexp and not eos?
-              modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
-              tokens << [modifiers, :modifier] unless modifiers.empty?
-              if parse_regexp
-                extended = modifiers.index ?x
-                tokens = saved_tokens
-                regexp = tokens
-                for text, type in regexp
-                  if text.is_a? ::String
-                    case type
-                    when :content
-                      text.scan(/([^#]+)|(#.*)/) do |plain, comment|
-                        if plain
-                          tokens << [plain, :content]
-                        else
-                          tokens << [comment, :comment]
-                        end
-                      end
-                    when :character
-                      if text[/\\(?:[swdSWDAzZbB]|\d+)/]
-                        tokens << [text, :modifier]
-                      else
-                        tokens << [text, type]
-                      end
-                    else
-                      tokens << [text, type]
-                    end
-                  else
-                    tokens << [text, type]
-                  end                    
-                end
-                first_bake = saved_tokens = nil
-              end
-            end
-            tokens << [:close, state.type]
-            fancy_allowed = regexp_allowed = false
-            state = state.next_state
-            
-          when '\\'
-            if state.interpreted
-              if esc = scan(/ #{patterns::ESCAPE} /ox)
-                tokens << [match + esc, :char]
-              else
-                tokens << [match, :error]
-              end
-            else
-              case m = getch
-              when state.delim, '\\'
-                tokens << [match + m, :char]
-              when nil
-                tokens << [match, :error]
-              else
-                tokens << [match + m, :content]
-              end
-            end
-            
-          when '#'
-            case peek(1)[0]
-            when ?{
-              states.push [state, depth, heredocs]
-              fancy_allowed = regexp_allowed = true
-              state = :initial
-              depth = 1
-              tokens << [:open, :inline]
-              tokens << [match + getch, :delimiter]
-            when ?$, ?@
-              tokens << [match, :escape]
-              last_state = state  # scan one token as normal code, then return here
-              state = :initial
-            else
-              raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
-            end
-            
-          when state.paren
-            state.paren_depth += 1
-            tokens << [match, :nesting_delimiter]
-
-          when /#{patterns::REGEXP_SYMBOLS}/ox
-            tokens << [match, :function]
-            
-          else
-            raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
-            
-          end
-          next
-# }}}
-        else
-# {{{
-          if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
-            ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
-            fancy_allowed = true
-            case m = match[0]
-            when ?\s, ?\t, ?\f
-              match << scan(/\s*/) unless eos? or heredocs
-              type = :space
-            when ?\n, ?\\
-              type = :space
-              if m == ?\n
-                regexp_allowed = true
-                state = :initial if state == :undef_comma_expected
-              end
-              if heredocs
-                unscan  # heredoc scanning needs \n at start
-                state = heredocs.shift
-                tokens << [:open, state.type]
-                heredocs = nil if heredocs.empty?
-                next
-              else
-                match << scan(/\s*/) unless eos?
-              end
-            when ?#, ?=, ?_
-              type = :comment
-              regexp_allowed = true
-            else
-              raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
-            end
-            tokens << [match, type]
-            next
-
-          elsif state == :initial
-
-            # IDENTS #
-            if match = scan(/#{patterns::METHOD_NAME}/o)
-              if last_token_dot
-                type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
-              else
-                type = patterns::IDENT_KIND[match]
-                if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
-                  type = :constant
-                elsif type == :reserved
-                  state = patterns::DEF_NEW_STATE[match]
-                end
-              end
-              ## experimental!
-              fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
-
-            # OPERATORS #
-            elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
-              (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
-              if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
-                regexp_allowed = fancy_allowed = :set
-              end
-              last_token_dot = :set if match == '.' or match == '::'
-              type = :operator
-              unless states.empty?
-                case match
-                when '{'
-                  depth += 1
-                when '}'
-                  depth -= 1
-                  if depth == 0
-                    state, depth, heredocs = states.pop
-                    tokens << [match, :delimiter]
-                    type = :inline
-                    match = :close
-                  end
-                end
-              end
-
-            elsif match = scan(/ ['"] /mx)
-              tokens << [:open, :string]
-              type = :delimiter
-              state = patterns::StringState.new :string, match == '"', match  # important for streaming
-
-            elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
-              type = :instance_variable
-
-            elsif regexp_allowed and match = scan(/\//)
-              tokens << [:open, :regexp]
-              type = :delimiter
-              interpreted = true
-              state = patterns::StringState.new :regexp, interpreted, match
-              if parse_regexp
-                tokens = []
-                saved_tokens = tokens
-              end
-
-            elsif match = scan(/#{patterns::NUMERIC}/o)
-              type = if self[1] then :float else :integer end
-
-            elsif match = scan(/#{patterns::SYMBOL}/o)
-              case delim = match[1]
-              when ?', ?"
-                tokens << [:open, :symbol]
-                tokens << [':', :symbol]
-                match = delim.chr
-                type = :delimiter
-                state = patterns::StringState.new :symbol, delim == ?", match
-              else
-                type = :symbol
-              end
-
-            elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
-              regexp_allowed = fancy_allowed = :set
-              type = :operator
-
-            elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
-              indented = self[1] == '-'
-              quote = self[3]
-              delim = self[quote ? 4 : 2]
-              type = patterns::QUOTE_TO_TYPE[quote]
-              tokens << [:open, type]
-              tokens << [match, :delimiter]
-              match = :close
-              heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
-              heredocs ||= []  # create heredocs if empty
-              heredocs << heredoc
-
-            elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
-              type, interpreted = *patterns::FancyStringType.fetch(self[1]) do
-                raise_inspect 'Unknown fancy string: %%%p' % k, tokens
-              end
-              tokens << [:open, type]
-              state = patterns::StringState.new type, interpreted, self[2]
-              type = :delimiter
-
-            elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
-              type = :integer
-
-            elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
-              regexp_allowed = fancy_allowed = :set
-              type = :operator
-
-            elsif match = scan(/`/)
-              if last_token_dot
-                type = :operator
-              else
-                tokens << [:open, :shell]
-                type = :delimiter
-                state = patterns::StringState.new :shell, true, match
-              end
-
-            elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
-              type = :global_variable
-
-            elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
-              type = :class_variable
-
-            else
-              match = getch
-
-            end
-
-          elsif state == :def_expected
-            state = :initial
-            if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
-              type = :method
-            else
-              next
-            end
-
-          elsif state == :undef_expected
-            state = :undef_comma_expected
-            if match = scan(/#{patterns::METHOD_NAME_EX}/o)
-              type = :method
-            elsif match = scan(/#{patterns::SYMBOL}/o)
-              case delim = match[1]
-              when ?', ?"
-                tokens << [:open, :symbol]
-                tokens << [':', :symbol]
-                match = delim.chr
-                type = :delimiter
-                state = patterns::StringState.new :symbol, delim == ?", match
-                state.next_state = :undef_comma_expected
-              else
-                type = :symbol
-              end
-            else
-              state = :initial
-              next
-            end
-
-          elsif state == :undef_comma_expected
-            if match = scan(/,/)
-              type = :operator
-              state = :undef_expected
-            else
-              state = :initial
-              next
-            end
-
-          elsif state == :module_expected
-            if match = scan(/<</)
-              type = :operator
-            else
-              state = :initial
-              if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
-                type = :class
-              else
-                next
-              end
-            end
-
-          end
-# }}}
-
-          regexp_allowed = regexp_allowed == :set
-          fancy_allowed = fancy_allowed == :set
-          last_token_dot = last_token_dot == :set
-
-          if $DEBUG and (not kind or kind == :error)
-            raise_inspect 'Error token %p in line %d' %
-              [[match, kind], line], tokens
-          end
-          raise_inspect 'Empty token', tokens unless match
-
-          tokens << [match, type]
-
-          if last_state
-            state = last_state
-            last_state = nil
-          end
-        end
-      end
-
-      states << state if state.is_a? patterns::StringState
-      until states.empty?
-        tokens << [:close, states.pop.type]
-      end
-
-      tokens
-    end
-  end
-
-end
-end
-
-# vim:fdm=marker
+module CodeRay
+module Scanners
+
+  # This scanner is really complex, since Ruby _is_ a complex language!
+  #
+  # It tries to highlight 100% of all common code,
+  # and 90% of strange codes.
+  #
+  # It is optimized for HTML highlighting, and is not very useful for
+  # parsing or pretty printing.
+  #
+  # For now, I think it's better than the scanners in VIM or Syntax, or
+  # any highlighter I was able to find, except Caleb's RubyLexer.
+  #
+  # I hope it's also better than the rdoc/irb lexer.
+  class Ruby < Scanner
+
+    include Streamable
+
+    register_for :ruby
+
+    helper :patterns
+
+    DEFAULT_OPTIONS = {
+      :parse_regexps => true,
+    }
+
+  private
+    def scan_tokens tokens, options
+      parse_regexp = false # options[:parse_regexps]
+      first_bake = saved_tokens = nil
+      last_token_dot = false
+      fancy_allowed = regexp_allowed = true
+      heredocs = nil
+      last_state = nil
+      state = :initial
+      depth = nil
+      inline_block_stack = []
+
+      patterns = Patterns  # avoid constant lookup
+
+      until eos?
+        match = nil
+        kind = nil
+
+        if state.instance_of? patterns::StringState
+# {{{
+          match = scan_until(state.pattern) || scan_until(/\z/)
+          tokens << [match, :content] unless match.empty?
+          break if eos?
+
+          if state.heredoc and self[1]  # end of heredoc
+            match = getch.to_s
+            match << scan_until(/$/) unless eos?
+            tokens << [match, :delimiter]
+            tokens << [:close, state.type]
+            state = state.next_state
+            next
+          end
+
+          case match = getch
+
+          when state.delim
+            if state.paren
+              state.paren_depth -= 1
+              if state.paren_depth > 0
+                tokens << [match, :nesting_delimiter]
+                next
+              end
+            end
+            tokens << [match, :delimiter]
+            if state.type == :regexp and not eos?
+              modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
+              tokens << [modifiers, :modifier] unless modifiers.empty?
+              if parse_regexp
+                extended = modifiers.index ?x
+                tokens = saved_tokens
+                regexp = tokens
+                for text, kind in regexp
+                  if text.is_a? ::String
+                    case kind
+                    when :content
+                      text.scan(/([^#]+)|(#.*)/) do |plain, comment|
+                        if plain
+                          tokens << [plain, :content]
+                        else
+                          tokens << [comment, :comment]
+                        end
+                      end
+                    when :character
+                      if text[/\\(?:[swdSWDAzZbB]|\d+)/]
+                        tokens << [text, :modifier]
+                      else
+                        tokens << [text, kind]
+                      end
+                    else
+                      tokens << [text, kind]
+                    end
+                  else
+                    tokens << [text, kind]
+                  end
+                end
+                first_bake = saved_tokens = nil
+              end
+            end
+            tokens << [:close, state.type]
+            fancy_allowed = regexp_allowed = false
+            state = state.next_state
+
+          when '\\'
+            if state.interpreted
+              if esc = scan(/ #{patterns::ESCAPE} /ox)
+                tokens << [match + esc, :char]
+              else
+                tokens << [match, :error]
+              end
+            else
+              case m = getch
+              when state.delim, '\\'
+                tokens << [match + m, :char]
+              when nil
+                tokens << [match, :error]
+              else
+                tokens << [match + m, :content]
+              end
+            end
+
+          when '#'
+            case peek(1)[0]
+            when ?{
+              inline_block_stack << [state, depth, heredocs]
+              fancy_allowed = regexp_allowed = true
+              state = :initial
+              depth = 1
+              tokens << [:open, :inline]
+              tokens << [match + getch, :delimiter]
+            when ?$, ?@
+              tokens << [match, :escape]
+              last_state = state  # scan one token as normal code, then return here
+              state = :initial
+            else
+              raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
+            end
+
+          when state.paren
+            state.paren_depth += 1
+            tokens << [match, :nesting_delimiter]
+
+          when /#{patterns::REGEXP_SYMBOLS}/ox
+            tokens << [match, :function]
+
+          else
+            raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
+
+          end
+          next
+# }}}
+        else
+# {{{
+          if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
+            ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
+            fancy_allowed = true
+            case m = match[0]
+            when ?\s, ?\t, ?\f
+              match << scan(/\s*/) unless eos? or heredocs
+              kind = :space
+            when ?\n, ?\\
+              kind = :space
+              if m == ?\n
+                regexp_allowed = true
+                state = :initial if state == :undef_comma_expected
+              end
+              if heredocs
+                unscan  # heredoc scanning needs \n at start
+                state = heredocs.shift
+                tokens << [:open, state.type]
+                heredocs = nil if heredocs.empty?
+                next
+              else
+                match << scan(/\s*/) unless eos?
+              end
+            when ?#, ?=, ?_
+              kind = :comment
+              regexp_allowed = true
+            else
+              raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens
+            end
+            tokens << [match, kind]
+            next
+
+          elsif state == :initial
+
+            # IDENTS #
+            if match = scan(/#{patterns::METHOD_NAME}/o)
+              if last_token_dot
+                kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
+              else
+                kind = patterns::IDENT_KIND[match]
+                if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
+                  kind = :constant
+                elsif kind == :reserved
+                  state = patterns::DEF_NEW_STATE[match]
+                end
+              end
+              ## experimental!
+              fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/)
+
+            # OPERATORS #
+            elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or
+              (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o))
+              if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
+                regexp_allowed = fancy_allowed = :set
+              end
+              last_token_dot = :set if match == '.' or match == '::'
+              kind = :operator
+              unless inline_block_stack.empty?
+                case match
+                when '{'
+                  depth += 1
+                when '}'
+                  depth -= 1
+                  if depth == 0  # closing brace of inline block reached
+                    state, depth, heredocs = inline_block_stack.pop
+                    tokens << [match, :delimiter]
+                    kind = :inline
+                    match = :close
+                  end
+                end
+              end
+
+            elsif match = scan(/ ['"] /mx)
+              tokens << [:open, :string]
+              kind = :delimiter
+              state = patterns::StringState.new :string, match == '"', match  # important for streaming
+
+            elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
+              kind = :instance_variable
+
+            elsif regexp_allowed and match = scan(/\//)
+              tokens << [:open, :regexp]
+              kind = :delimiter
+              interpreted = true
+              state = patterns::StringState.new :regexp, interpreted, match
+              if parse_regexp
+                tokens = []
+                saved_tokens = tokens
+              end
+
+            elsif match = scan(/#{patterns::NUMERIC}/o)
+              kind = if self[1] then :float else :integer end
+
+            elsif match = scan(/#{patterns::SYMBOL}/o)
+              case delim = match[1]
+              when ?', ?"
+                tokens << [:open, :symbol]
+                tokens << [':', :symbol]
+                match = delim.chr
+                kind = :delimiter
+                state = patterns::StringState.new :symbol, delim == ?", match
+              else
+                kind = :symbol
+              end
+
+            elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
+              regexp_allowed = fancy_allowed = :set
+              kind = :operator
+
+            elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o)
+              indented = self[1] == '-'
+              quote = self[3]
+              delim = self[quote ? 4 : 2]
+              kind = patterns::QUOTE_TO_TYPE[quote]
+              tokens << [:open, kind]
+              tokens << [match, :delimiter]
+              match = :close
+              heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
+              heredocs ||= []  # create heredocs if empty
+              heredocs << heredoc
+
+            elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o)
+              kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
+                raise_inspect 'Unknown fancy string: %%%p' % k, tokens
+              end
+              tokens << [:open, kind]
+              state = patterns::StringState.new kind, interpreted, self[2]
+              kind = :delimiter
+
+            elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o)
+              kind = :integer
+
+            elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
+              regexp_allowed = fancy_allowed = :set
+              kind = :operator
+
+            elsif match = scan(/`/)
+              if last_token_dot
+                kind = :operator
+              else
+                tokens << [:open, :shell]
+                kind = :delimiter
+                state = patterns::StringState.new :shell, true, match
+              end
+
+            elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
+              kind = :global_variable
+
+            elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
+              kind = :class_variable
+
+            else
+              kind = :error
+              match = getch
+
+            end
+
+          elsif state == :def_expected
+            state = :initial
+            if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
+              kind = :method
+            else
+              next
+            end
+
+          elsif state == :undef_expected
+            state = :undef_comma_expected
+            if match = scan(/#{patterns::METHOD_NAME_EX}/o)
+              kind = :method
+            elsif match = scan(/#{patterns::SYMBOL}/o)
+              case delim = match[1]
+              when ?', ?"
+                tokens << [:open, :symbol]
+                tokens << [':', :symbol]
+                match = delim.chr
+                kind = :delimiter
+                state = patterns::StringState.new :symbol, delim == ?", match
+                state.next_state = :undef_comma_expected
+              else
+                kind = :symbol
+              end
+            else
+              state = :initial
+              next
+            end
+
+          elsif state == :undef_comma_expected
+            if match = scan(/,/)
+              kind = :operator
+              state = :undef_expected
+            else
+              state = :initial
+              next
+            end
+
+          elsif state == :module_expected
+            if match = scan(/<</)
+              kind = :operator
+            else
+              state = :initial
+              if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
+                kind = :class
+              else
+                next
+              end
+            end
+
+          end
+# }}}
+
+          regexp_allowed = regexp_allowed == :set
+          fancy_allowed = fancy_allowed == :set
+          last_token_dot = last_token_dot == :set
+
+          if $DEBUG and not kind
+            raise_inspect 'Error token %p in line %d' %
+              [[match, kind], line], tokens, state
+          end
+          raise_inspect 'Empty token', tokens unless match
+
+          tokens << [match, kind]
+
+          if last_state
+            state = last_state
+            last_state = nil
+          end
+        end
+      end
+
+      inline_block_stack << [state] if state.is_a? patterns::StringState
+      until inline_block_stack.empty?
+        this_block = inline_block_stack.pop
+        tokens << [:close, :inline] if this_block.size > 1
+        state = this_block.first
+        tokens << [:close, state.type]
+      end
+
+      tokens
+    end
+
+  end
+
+end
+end
+
+# vim:fdm=marker
-- 
cgit v1.2.1