New Repository, initial import

author: no author <noone@nowhere> 2005-09-26 02:58:54 +0000
committer: no author <noone@nowhere> 2005-09-26 02:58:54 +0000
commit: 84b8431608174e74a4c0d2394eb330a6621bc74b (patch)
tree: ffc2bd7ce21708a9147247c80b0e7fc7728ea063 /lib
download: coderay-84b8431608174e74a4c0d2394eb330a6621bc74b.tar.gz
27 files changed, 3515 insertions, 0 deletions
diff --git a/lib/coderay.rb b/lib/coderay.rb
new file mode 100644
index 0000000..17c315d
--- /dev/null
+++ b/lib/coderay.rb
@@ -0,0 +1,169 @@
+# = CodeRay
+#
+# CodeRay is a Ruby library for syntax highlighting.
+#
+# I try to make CodeRay easy to use and intuitive, but at the same time fully featured, complete,
+# fast and efficient.
+# 
+# See README.
+# 
+# It consists mainly of
+# * the main engine: CodeRay, CodeRay::Scanner, CodeRay::Tokens, CodeRay::TokenStream, CodeRay::Encoder
+# * the scanners in CodeRay::Scanners
+# * the encoders in CodeRay::Encoders
+# 
+# Here's a fancy graphic to light up this gray docu:
+# 
+# http://rd.cYcnus.de/coderay/scheme.png
+# 
+# == Documentation
+#
+# See CodeRay, Encoders, Scanners, Tokens.
+#
+# == Usage
+#
+# Remember you need RubyGems to use CodeRay. Run Ruby with -rubygems option
+# if required.
+#
+# === Highlight Ruby code in a string as html
+# 
+#   require 'coderay'
+#   print CodeRay.scan('puts "Hello, world!"', :ruby).compact.html.page
+#
+#   # prints something like this:
+#   puts <span class="s">&quot;Hello, world!&quot;</span>
+# 
+# 
+# === Highlight C code from a file in a html div
+# 
+#   require 'coderay'
+#   print CodeRay.scan(File.read('ruby.h'), :c).html.div
+#   # print CodeRay.scan_file('ruby.h').html.div ## not working yet
+# 
+# You can include this div in your page. The used CSS styles can be printed with
+# 
+#   % ruby -rcoderay -e "print CodeRay::Encoders[:html]::CSS"
+# 
+# === Highlight without typing too much
+#
+# If you are one of the hasty (or lazy, or extremely curious) people, just run this file:
+#
+#   % ruby -rubygems coderay.rb
+# 
+# If the output was to fast for you, try
+# 
+#   % ruby -rubygems coderay.rb > example.html
+#
+# and look at the file it created.
+# 
+module CodeRay
+	
+	Version = '0.4.2'
+	
+	require 'coderay/tokens'
+	require 'coderay/scanner'
+	require 'coderay/encoder'
+
+
+	class << self
+
+		# Scans the given +code+ (a String) with the Scanner for +lang+.
+		# 
+		# This is a simple way to use CodeRay. Example:
+		#  require 'coderay'
+		#  page = CodeRay.scan("puts 'Hello, world!'", :ruby).html
+		#
+		# See also demo/demo_simple.
+		def scan code, lang, options = {}, &block
+			scanner = Scanners[lang].new code, options, &block
+			scanner.tokenize
+		end
+
+		# Scans +filename+ (a path to a code file) with the Scanner for +lang+.
+		# 
+		# If +lang+ is :auto or omitted, the CodeRay::FileType module is used to
+		# determine it. If it cannot find out what type it is, it uses CodeRay::Scanners::Plaintext.
+		#
+		# Calls CodeRay.scan.
+		# 
+		# Example:
+		#  require 'coderay'
+		#  page = CodeRay.scan_file('some_c_code.c').html
+		def scan_file filename, lang = :auto, options = {}, &block
+			file = IO.read filename
+			if lang == :auto
+				require 'coderay/helpers/filetype'
+				lang = FileType.fetch filename, :plaintext, true
+			end
+			scan file, lang, options = {}, &block
+		end
+
+		# Scan the +code+ (a string) with the scanner for +lang+.
+		# 
+		# Calls scan.
+		# 
+		# See CodeRay.scan.
+		def scan_stream code, lang, options = {}, &block
+			options[:stream] = true
+			scan code, lang, options, &block
+		end
+
+		# Encode +code+ with the Encoder for +format+ and the Scanner for +lang+.
+		# +options+ will be passed to the Encoder.
+		#
+		# See CodeRay::Encoder.encode_stream
+		def encode_stream code, lang, format, options = {}
+			encoder(format, options).encode_stream code, lang, options
+		end
+
+		def encode code, lang, format, options = {}
+			encoder(format, options).encode code, lang, options
+		end
+
+		# Finds the Encoder class for +format+ and creates an instance, passing
+		# +options+ to it.
+		# 
+		# Example:
+		#  require 'coderay'
+		#  token_count = CodeRay.encoder(:count).encodea("puts 17 + 4\n", :ruby).to_i  #-> 8
+		#  require 'coderay'
+		#  
+		#  stats = CodeRay.encoder(:statistic)
+		#  stats.encode("puts 17 + 4\n", :ruby)
+		#  
+		#  puts '%d out of %d tokens have the kind :integer.' % [
+		#  	stats.type_stats[:integer].count,
+		#  	stats.real_token_count
+		#  ]
+		#  #-> 2 out of 4 tokens have the kind :integer.
+		def encoder format, options = {}
+			Encoders[format].new options
+		end
+
+	end
+
+	# This Exception is raised when you try to stream with something that is not
+	# capable of streaming.
+	class NotStreamableError < Exception
+		def initialize obj
+			@obj = obj
+		end
+
+		def to_s
+			'%s is not Streamable!' % @obj.class
+		end
+	end
+	
+	# A dummy module that is included by subclasses of CodeRay::Scanner an CodeRay::Encoder
+	# to show that they are able to handle streams.
+	module Streamable
+	end
+	
+end
+
+# Run a test script.
+if $0 == __FILE__
+	$stderr.print 'Press key to print demo.'; gets
+	code = File.read($0)[/module CodeRay.*/m]
+	print CodeRay.scan(code, :ruby).html
+end
diff --git a/lib/coderay/encoder.rb b/lib/coderay/encoder.rb
new file mode 100644
index 0000000..5f6d511
--- /dev/null
+++ b/lib/coderay/encoder.rb
@@ -0,0 +1,210 @@
+module CodeRay
+
+  # This module holds class Encoder and its subclasses.
+  # For example, the HTML encoder is named CodeRay::Encoders::HTML
+  # can be found in coderay/encoders/html.
+  # 
+  # Encoders also provides methods and constants for the register mechanism
+  # and the [] method that returns the Encoder class belonging to the
+  # given format.
+  module Encoders
+
+    # Raised if Encoders[] fails because:
+    # * an file could not be found
+    # * the requested Encoder is not registered
+    EncoderNotFound = Class.new Exception
+
+    # Loaded Encoders are saved here.
+    ENCODERS = Hash.new do |h, lang|
+      path = Encoders.path_to lang
+      lang = lang.to_sym
+      begin
+        require path
+      rescue LoadError
+        raise EncoderNotFound, "#{path} not found."
+      else
+        # Encoder should have registered by now
+        unless h[lang]
+          raise EncoderNotFound, "No Encoder for #{lang} found in #{path}."
+        end
+      end
+      h[lang]
+    end
+
+    class << self
+
+      # Every Encoder class must register itself for one or more +formats+
+      # by calling register_for, which calls this method.
+      #
+      # See CodeRay::Encoder.register_for.
+      def register encoder_class, *formats
+        for format in formats
+          ENCODERS[format.to_sym] = encoder_class
+        end
+      end
+
+      # Returns the Encoder for +lang+.
+      # 
+      # Example:
+      #  require 'coderay'
+      #  yaml_encoder = CodeRay::Encoders[:yaml]
+      def [] lang
+        ENCODERS[lang]
+      end
+
+      # Alias for +[]+.
+      alias load []
+
+      # Returns the path to the encoder for format.
+      def path_to plugin
+        File.join 'coderay', 'encoders', "#{plugin}.rb"
+      end
+
+    end
+
+
+    # The Encoder base class. Together with CodeRay::Scanner and
+    # CodeRay::Tokens, it forms the highlighting triad.
+    #
+    # Encoder instances take a Tokens object and do something with it.
+    #
+    # The most common Encoder is surely the HTML encoder
+    # (CodeRay::Encoders::HTML). It highlights the code in a colorful
+    # html page.
+    # If you want the highlighted code in a div or a span instead,
+    # use its subclasses Div and Span. 
+    class Encoder
+
+      attr_reader :token_stream
+
+      class << self
+        
+        # Register this class for the given langs.
+        #
+        # Example:
+        #   class MyEncoder < CodeRay::Encoders:Encoder
+        #     register_for :myenc
+        #     ...
+        #   end
+        #
+        # See Encoder.register.
+        def register_for *args
+          Encoders.register self, *args
+        end
+
+        # Returns if the Encoder can be used in streaming mode.
+        def streamable?
+          is_a? Streamable
+        end
+        
+        # If FILE_EXTENSION isn't defined, this method returns the downcase
+        # class name instead.
+        def const_missing sym
+          if sym == :FILE_EXTENSION
+            sym.to_s.downcase
+          else
+            super
+          end
+        end
+        
+      end
+
+      # Subclasses are to store their default options in this constant.
+      DEFAULT_OPTIONS = { :stream => false }
+
+      # The options you gave the Encoder at creating.
+      attr_accessor :options
+
+      # Creates a new Encoder.
+      # +options+ is saved and used for all encode operations, as long as you
+      # don't overwrite it there by passing additional options.
+      # 
+      # Encoder objects provide three encode methods:
+      # - encode simply takes a +code+ string and a +lang+
+      # - encode_tokens expects a +tokens+ object instead
+      # - encode_stream is like encode, but uses streaming mode.
+      # 
+      # Each method has an optional +options+ parameter. These are added to
+      # the options you passed at creation.
+      def initialize options = {}
+        @options = self.class::DEFAULT_OPTIONS.merge options
+        raise "I am only the basic Encoder class. I can't encode anything. :(\n" + 
+          "Use my subclasses." if self.class == Encoder
+      end
+
+      # Encode a Tokens object.
+      def encode_tokens tokens, options = {}
+        options = @options.merge options
+        setup options
+        compile tokens, options
+        finish options
+      end
+
+      # Encode the given +code+ after tokenizing it using the Scanner for
+      # +lang+.
+      def encode code, lang, options = {}
+        options = @options.merge options
+        scanner_options = options.fetch(:scanner_options, {})
+        tokens = CodeRay.scan code, lang, scanner_options
+        encode_tokens tokens, options
+      end
+
+      # You can use highlight instead of encode, if that seems
+      # more clear to you.
+      alias highlight encode
+
+      # Encode the given +code+ using the Scanner for +lang+ in streaming
+      # mode.
+      def encode_stream code, lang, options = {}
+        raise NotStreamableError, self unless kind_of? Streamable
+        options = @options.merge options
+        setup options
+        scanner_options = options.fetch :scanner_options, {}
+        @token_stream = CodeRay.scan_stream code, lang, scanner_options, &self
+        finish options
+      end
+
+      # Behave like a proc. The tokens method is converted to a proc.
+      def to_proc
+        method(:token).to_proc
+      end
+
+    protected
+    
+      # Called with merged options before encoding starts.
+      # Sets @out to an empty string.
+      # 
+      # See the HTML Encoder for an example of option caching.
+      def setup options
+        @out = ''
+      end
+
+      # Called with +text+ and +kind+ of the currently scanned token.
+      # For simple scanners, it's enougth to implement this method.
+      #
+      # Raises a NotImplementedError exception if it is not overwritten in
+      # subclass.
+      def token text, kind
+        raise NotImplementedError, "#{self.class}#token not implemented."
+      end
+
+      # Called with merged options after encoding starts.
+      # The return value is the result of encoding, typically @out.
+      def finish options 
+        @out
+      end
+
+      # Do the encoding.
+      #
+      # The already created +tokens+ object must be used; it can be a
+      # TokenStream or a Tokens object.
+      def compile tokens, options
+        tokens.each(&self)
+      end
+
+    end	
+
+  end
+end
+
+# vim:sw=2:ts=2:et:tw=78
diff --git a/lib/coderay/encoders/count.rb b/lib/coderay/encoders/count.rb
new file mode 100644
index 0000000..80aec57
--- /dev/null
+++ b/lib/coderay/encoders/count.rb
@@ -0,0 +1,20 @@
+module CodeRay
+module Encoders
+
+	class Count < Encoder
+
+		register_for :count
+
+	protected		
+		
+		def setup options
+			@out = 0
+		end
+		
+		def token text, kind
+			@out += 1
+		end
+	end	
+
+end 
+end
diff --git a/lib/coderay/encoders/div.rb b/lib/coderay/encoders/div.rb
new file mode 100644
index 0000000..640df0e
--- /dev/null
+++ b/lib/coderay/encoders/div.rb
@@ -0,0 +1,16 @@
+module CodeRay module Encoders
+	
+	require 'coderay/encoders/html'
+	class Div < HTML
+
+		FILE_EXTENSION = 'div.html'
+
+		register_for :div
+
+		DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+			:css => :style,
+			:wrap => :div,
+		})
+	end
+
+end end
diff --git a/lib/coderay/encoders/helpers/html_css.rb b/lib/coderay/encoders/helpers/html_css.rb
new file mode 100644
index 0000000..f9cadf7
--- /dev/null
+++ b/lib/coderay/encoders/helpers/html_css.rb
@@ -0,0 +1,168 @@
+module CodeRay module Encoders
+
+	class HTML
+		class CSS
+			
+			def initialize stylesheet = TOKENS
+				@classes = Hash.new
+				parse stylesheet
+			end
+
+			def [] *styles
+				cl = @classes[styles.first]
+				return '' unless cl
+				style = false
+				1.upto(cl.size + 1) do |offset|
+					break if style = cl[styles[offset .. -1]]
+				end
+				return style
+			end
+			
+		private
+			
+			CSS_CLASS = /
+				( (?:                # $1 = classes
+					\s* \. [-\w]+
+				)+ )
+				\s* \{
+				( [^\}]* )           # $2 = style
+				\} \s*
+			|
+				( . )                # $3 = error
+			/mx
+			def parse stylesheet
+				stylesheet.scan CSS_CLASS do |classes, style, error|
+					raise "CSS parse error: '#{error}' not recognized" if error
+					styles = classes.scan(/[-\w]+/)
+					cl = styles.pop
+					@classes[cl] ||= Hash.new
+					@classes[cl][styles] = style.strip
+				end
+			end
+			
+			MAIN = <<-'MAIN'
+.code {
+	background-color: #FAFAFA;
+	border: 1px solid #D1D7DC;
+	font-family: 'Courier New', 'Terminal', monospace;
+	font-size: 10pt;
+	color: black;
+	vertical-align: top;
+	text-align: left;
+	padding: 0px;
+}
+span.code { white-space: pre; }
+.code tt { font-weight: bold; }
+.code pre {
+	font-size: 10pt;
+	margin: 0px 5px;
+}
+.code .code_table {
+	margin: 0px;
+}
+.code .line_numbers {
+	margin: 0px;
+	background-color:#DEF; color: #777;
+	vertical-align: top;
+	text-align: right;
+}
+.code .code_cell {
+	width: 100%;
+	background-color:#FAFAFA;
+	color: black;
+	vertical-align: top;
+	text-align: left;
+}
+.code .no {
+	background-color:#DEF;
+	color: #777;
+	padding: 0px 5px;
+	font-weight: normal;
+	font-style: normal;
+}
+
+.code tt { display: hidden; }
+
+			MAIN
+
+			TOKENS = <<-'TOKENS'
+.af { color:#00C; }
+.an { color:#007; }
+.av { color:#700; }
+.aw { color:#C00; }
+.bi { color:#509; font-weight:bold; }
+.c  { color:#888; }
+
+.ch { color:#04D; /* background-color:#f0f0ff; */ }
+.ch .k { color:#04D; }
+.ch .dl { color:#039; }
+
+.cl { color:#B06; font-weight:bold; }
+.co { color:#036; font-weight:bold; }
+.cr { color:#0A0; }
+.cv { color:#369; }
+.df { color:#099; font-weight:bold; }
+.di { color:#088; font-weight:bold; }
+.dl { color:black; }
+.do { color:#970; }
+.ds { color:#D42; font-weight:bold; }
+.e  { color:#666; font-weight:bold; }
+.er { color:#F00; background-color:#FAA; }
+.ex { color:#F00; font-weight:bold; }
+.fl { color:#60E; font-weight:bold; }
+.fu { color:#06B; font-weight:bold; }
+.gv { color:#d70; font-weight:bold; }
+.hx { color:#058; font-weight:bold; }
+.i  { color:#00D; font-weight:bold; }
+.ic { color:#B44; font-weight:bold; }
+.in { color:#B2B; font-weight:bold; }
+.iv { color:#33B; }
+.la { color:#970; font-weight:bold; }
+.lv { color:#963; }
+.oc { color:#40E; font-weight:bold; }
+.on { color:#000; font-weight:bold; }
+.pc { color:#038; font-weight:bold; }
+.pd { color:#369; font-weight:bold; }
+.pp { color:#579; }
+.pt { color:#339; font-weight:bold; }
+.r  { color:#080; font-weight:bold; }
+
+.rx { background-color:#fff0ff; }
+.rx .k { color:#808; }
+.rx .dl { color:#404; }
+.rx .mod { color:#C2C; }
+.rx .fu  { color:#404; font-weight: bold; }
+
+.s  { background-color:#fff0f0; }
+.s  .s { background-color:#ffe0e0; }
+.s  .s  .s { background-color:#ffd0d0; }
+.s  .k { color:#D20; }
+.s  .dl { color:#710; }
+
+.sh { background-color:#f0fff0; }
+.sh .k { color:#2B2; }
+.sh .dl { color:#161; }
+
+.sy { color:#A60; }
+.sy .k { color:#A60; }
+.sy .dl { color:#630; }
+
+.ta { color:#070; }
+.tf { color:#070; font-weight:bold; }
+.ts { color:#D70; font-weight:bold; }
+.ty { color:#339; font-weight:bold; }
+.v  { color:#036; }
+.xt { color:#444; }
+			TOKENS
+			
+			DEFAULT_STYLESHEET = MAIN + TOKENS
+		
+		end
+	end
+	
+end end
+
+if $0 == __FILE__
+	require 'pp'
+	pp CodeRay::Encoders::HTML::CSS.new
+end
diff --git a/lib/coderay/encoders/helpers/html_helper.rb b/lib/coderay/encoders/helpers/html_helper.rb
new file mode 100644
index 0000000..03ea0a2
--- /dev/null
+++ b/lib/coderay/encoders/helpers/html_helper.rb
@@ -0,0 +1,68 @@
+module CodeRay module Encoders
+
+	class HTML
+
+		ClassOfKind = {
+			:attribute_name => 'an',
+			:attribute_name_fat => 'af',
+			:attribute_value => 'av',
+			:attribute_value_fat => 'aw',
+			:bin => 'bi',
+			:char => 'ch',
+			:class => 'cl',
+			:class_variable => 'cv',
+			:color => 'cr',
+			:comment => 'c',
+			:constant => 'co',
+			:content => 'k',
+			:definition => 'df',
+			:delimiter => 'dl',
+			:directive => 'di',
+			:doc => 'do',
+			:doc_string => 'ds',
+			:error => 'er',
+			:escape => 'e',
+			:exception => 'ex',
+			:float => 'fl',
+			:function => 'fu',
+			:global_variable => 'gv',
+			:hex => 'hx',
+			:include => 'ic',
+			:instance_variable => 'iv',
+			:integer => 'i',
+			:interpreted => 'in',
+			:label => 'la',
+			:local_variable => 'lv',
+			:modifier => 'mod',
+			:oct => 'oc',
+			:operator_name => 'on',
+			:pre_constant => 'pc',
+			:pre_type => 'pt',
+			:predefined => 'pd',
+			:preprocessor => 'pp',
+			:regexp => 'rx',
+			:reserved => 'r',
+			:shell => 'sh',
+			:string => 's',
+			:symbol => 'sy',
+			:tag => 'ta',
+			:tag_fat => 'tf',
+			:tag_special => 'ts',
+			:type => 'ty',
+			:variable => 'v',
+			:xml_text => 'xt',
+
+			:ident => :NO_HIGHLIGHT, # 'id'
+			:operator => :NO_HIGHLIGHT,  # 'op'
+			:space => :NO_HIGHLIGHT,  # 'sp'
+			:plain => :NO_HIGHLIGHT,
+		}
+		ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
+		ClassOfKind[:open] = ClassOfKind[:close] = ClassOfKind[:delimiter]
+		ClassOfKind[:nesting_delimiter] = ClassOfKind[:delimiter]
+		ClassOfKind[:escape] = ClassOfKind[:delimiter]
+		ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
+
+	end
+
+end end
diff --git a/lib/coderay/encoders/helpers/html_output.rb b/lib/coderay/encoders/helpers/html_output.rb
new file mode 100644
index 0000000..e2b26e7
--- /dev/null
+++ b/lib/coderay/encoders/helpers/html_output.rb
@@ -0,0 +1,240 @@
+module CodeRay
+	module Encoders
+
+	class HTML
+		
+		# This module is included in the output String from thew HTML Encoder.
+		#
+		# It provides methods like wrap, div, page etc.
+		#
+		# Remember to use #clone instead of #dup to keep the modules the object was
+		# extended with.
+		#
+		# TODO: more doc.
+		module Output
+
+			class << self
+				
+				# This makes Output look like a class.
+				#
+				# Example:
+				# 
+				#  a = Output.new '<span class="co">Code</span>'
+				#  a.wrap! :page
+				def new string, element = nil
+					output = string.clone.extend self
+					output.wrapped_in = element
+					output
+				end
+				
+				# Raises an exception if an object that doesn't respond to to_str is extended by Output,
+				# to prevent users from misuse. Use Module#remove_method to disable.
+				def extended o
+					warn "The Output module is intended to extend instances of String, not #{o.class}." unless o.respond_to? :to_str
+				end
+
+				def page_template_for_css css = :default
+					css = CSS::DEFAULT_STYLESHEET if css == :default
+					PAGE.apply 'CSS', css
+				end
+
+				# Define a new wrapper. This is meta programming.
+				def wrapper *wrappers
+					wrappers.each do |wrapper|
+						define_method wrapper do |*args|
+							wrap wrapper, *args
+						end
+						define_method(:"#{wrapper}!") do |*args|  
+							wrap! wrapper, *args
+						end
+					end
+				end
+			end
+
+			wrapper :div, :span, :page
+
+			def wrapped_in
+				@wrapped_in || nil
+			end
+			attr_writer :wrapped_in
+			
+			def wrapped_in? element
+				wrapped_in == element
+			end
+			
+			def wrap_in template
+				clone.wrap_in! template
+			end
+
+			def wrap_in! template
+				Template.wrap! self, template, 'CONTENT'
+				self
+			end
+			
+			def wrap! element, *args
+				return self if not element or element == wrapped_in
+				case element
+				when :div
+					raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
+					wrap_in! DIV
+				when :span
+					raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? nil
+					wrap_in! SPAN
+				when :page
+					wrap! :div if wrapped_in? nil
+					raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
+					wrap_in! Output.page_template_for_css
+				when nil
+					return self
+				else
+					raise "Unknown value %p for :wrap" % element
+				end
+				@wrapped_in = element
+				self
+			end
+
+			def wrap *args
+				clone.wrap!(*args)
+			end
+
+			def numerize! mode = :table, options = {}
+				return self unless mode
+
+				offset = options.fetch :line_numbers_offset, DEFAULT_OPTIONS[:line_numbers_offset]
+				unless offset.is_a? Integer
+					raise ArgumentError, "Invalid value %p for :offset; Integer expected." % offset
+				end
+				
+				unless NUMERIZABLE_WRAPPINGS.include? options[:wrap]
+					raise ArgumentError, "Can't numerize, :wrap must be in %p, but is %p" % [NUMERIZABLE_WRAPPINGS, options[:wrap]]
+				end
+				
+				bold_every = options.fetch :bold_every, DEFAULT_OPTIONS[:bold_every]
+				bolding = 
+					if bold_every == :no_bolding or bold_every == 0
+						proc { |line| line.to_s }
+					elsif bold_every.is_a? Integer
+						proc do |line|
+							if line % bold_every == 0
+								"<strong>#{line}</strong>"  # every bold_every-th number in bold
+							else
+								line.to_s
+							end
+						end
+					else
+						raise ArgumentError, "Invalid value %p for :bolding; :no_bolding or Integer expected." % bolding
+					end
+				
+				line_count = count("\n")
+				line_count += 1 if self[-1] != ?\n
+
+				case mode				
+				when :inline
+					max_width = line_count.to_s.size
+					line = offset - 1
+					gsub!(/^/) do
+						line += 1
+						line_number = bolding.call line
+						"<span class=\"no\">#{ line_number.rjust(max_width) }</span>  "
+					end
+					wrap! :div
+					
+				when :table
+					# This is really ugly.
+					# Because even monospace fonts seem to have different heights when bold, 
+					# I make the newline bold, both in the code and the line numbers.
+					# FIXME Still not working perfect for Mr. Internet Exploder
+					line_numbers = (offset ... offset + line_count).to_a.map(&bolding).join("\n")
+					line_numbers << "\n"  # also for Mr. MS Internet Exploder :-/
+					line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
+					
+					line_numbers_tpl = DIV_TABLE.apply('LINE_NUMBERS', line_numbers)
+					gsub!(/\n/) { "<tt>\n</tt>" }
+					wrap_in! line_numbers_tpl
+					@wrapped_in = :div
+					
+				else
+					raise ArgumentError, "Unknown value %p for mode: :inline or :table expected" % mode
+				end
+
+				self
+			end
+
+			def numerize *args
+				clone.numerize!(*args)
+			end
+
+			class Template < String
+
+				def self.wrap! str, template, target
+					target = Regexp.new(Regexp.escape("<%#{target}%>"))
+					if template =~ target
+						str[0,0] = $`
+						str << $'
+					else
+						raise "Template target <%%%p%%> not found" % target
+					end
+				end
+				
+				def apply target, replacement
+					target = Regexp.new(Regexp.escape("<%#{target}%>"))
+					if self =~ target
+						Template.new($` + replacement + $')
+					else
+						raise "Template target <%%%p%%> not found" % target
+					end
+				end
+
+				module Simple
+					def ` str  #`
+						Template.new str
+					end
+				end
+			end
+			
+			extend Template::Simple
+
+#-- don't include the templates in docu
+			
+			SPAN = `<span class="code"><%CONTENT%></span>`
+
+			DIV, DIV_TABLE, PAGE =
+				<<-`DIV`, <<-`DIV_TABLE`, <<-`PAGE`
+			
+<div class="code">
+<pre><%CONTENT%></pre>
+</div>
+			DIV
+
+<div class="code">
+	<table class="code_table">
+		<tr>
+			<td class="line_numbers"><pre><%LINE_NUMBERS%></pre></td>
+			<td class="code_cell"><div class="nowrap"><pre><%CONTENT%></pre></div></td>
+		</tr>
+	</table>
+</div>			
+			DIV_TABLE
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
+<head>
+	<meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
+	<title>CodeRay HTML Encoder Example</title>
+	<style type="text/css">
+<%CSS%>
+	</style>
+</head>
+<body style="background-color: white;">
+<%CONTENT%>
+</body>
+</html>
+			PAGE
+
+		end
+
+	end
+
+end
+end
diff --git a/lib/coderay/encoders/html.rb b/lib/coderay/encoders/html.rb
new file mode 100644
index 0000000..69b6e22
--- /dev/null
+++ b/lib/coderay/encoders/html.rb
@@ -0,0 +1,167 @@
+module CodeRay
+module Encoders
+
+	class HTML < Encoder
+
+		include Streamable
+		register_for :html
+
+		FILE_EXTENSION = 'html'
+
+		DEFAULT_OPTIONS = {
+			:tab_width => 8,
+
+			:level => :xhtml,
+			:css => :class,
+
+			:wrap => :page,
+			:line_numbers => :table,
+			:line_numbers_offset => 1,
+			:bold_every => 10,
+		}
+		NUMERIZABLE_WRAPPINGS = [:div, :page]
+		
+		require 'coderay/encoders/helpers/html_helper'
+		require 'coderay/encoders/helpers/html_output'
+		require 'coderay/encoders/helpers/html_css'
+
+		def initialize(*)
+			super
+			@last_options = nil
+		end
+
+	protected
+		
+		HTML_ESCAPE = {  #:nodoc:
+			'&' => '&amp;',
+			'"' => '&quot;',
+			'>' => '&gt;',
+			'<' => '&lt;',
+		}
+
+		# This is to prevent illegal HTML.
+		# Strange chars should still be avoided in codes.
+		evil_chars = Array(0x00...0x20) - [?n, ?t]
+		evil_chars.each { |i| HTML_ESCAPE[i.chr] = ' ' }
+		ansi_chars = Array(0x7f..0xff)
+		ansi_chars.each { |i| HTML_ESCAPE[i.chr] = '&#%d;' % i }
+		# \x9 (\t) and \xA (\n) not included
+		HTML_ESCAPE_PATTERN = /[&"><\0-\x8\xB-\x1f\x7f-\xff]/
+
+		def setup options
+			if options[:line_numbers] and not NUMERIZABLE_WRAPPINGS.include? options[:wrap]
+				warn ':line_numbers wanted, but :wrap is %p' % options[:wrap]
+			end
+			super
+			return if options == @last_options
+			@last_options = options
+
+			@HTML_ESCAPE = HTML_ESCAPE.dup
+			@HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
+			
+			@opened = [nil]
+			@css = CSS.new
+
+			case options[:css]
+			
+			when :class
+				@css_style = Hash.new do |h, k|
+					if k.is_a? Array
+						type = k.first
+					else
+						type = k
+					end
+					c = ClassOfKind[type]
+					if c == :NO_HIGHLIGHT
+						h[k] = false
+					else
+						if options[:debug]
+							debug_info = ' title="%p"' % [ k ]
+						else
+							debug_info = ''
+						end
+						h[k] = '<span%s class="%s">' % [debug_info, c]
+					end
+				end
+				
+			when :style
+				@css_style = Hash.new do |h, k|
+					if k.is_a? Array
+						styles = k.dup
+					else
+						styles = [k]
+					end
+					styles.map! { |c| ClassOfKind[c] }
+					if styles.first == :NO_HIGHLIGHT
+						h[k] = false
+					else
+						if options[:debug]
+							debug_info = ' title="%s"' % [ styles.inspect.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } ]
+						else
+							debug_info = ''
+						end
+						style = @css[*styles]
+						h[k] =
+							if style
+								'<span%s style="%s">' % [debug_info, style]
+							else
+								false
+							end
+					end
+				end
+				
+			else
+				raise "Unknown value %p for :css." % options[:css]
+				
+			end
+		end
+
+		def finish options
+			not_needed = @opened.shift
+			@out << '</span>' * @opened.size
+
+			@out.extend Output
+			@out.numerize! options[:line_numbers], options # if options[:line_numbers]
+			@out.wrap! options[:wrap] # if options[:wrap]
+
+			#require 'pp'
+			#pp @css_style, @css_style.size
+
+			super
+		end
+
+		def token text, type
+			if text.is_a? String
+				# be careful when streaming: text is changed!
+				text.gsub!(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
+				@opened[0] = type
+				style = @css_style[@opened]
+				if style
+					@out << style << text << '</span>'
+				else
+					@out << text
+				end
+			else
+				case text
+				when :open
+					@opened[0] = type
+					@out << @css_style[@opened]
+					@opened << type
+				when :close
+					unless @opened.empty?
+						raise 'Not Token to be closed.' unless @opened.size > 1
+						@out << '</span>'
+						@opened.pop
+					end
+				when nil
+					raise 'Token with nil as text was given: %p' % [[text, type]]
+				else
+					raise 'unknown token kind: %p' % text
+				end
+			end
+		end
+		
+	end
+
+end
+end
diff --git a/lib/coderay/encoders/null.rb b/lib/coderay/encoders/null.rb
new file mode 100644
index 0000000..67c4987
--- /dev/null
+++ b/lib/coderay/encoders/null.rb
@@ -0,0 +1,20 @@
+module CodeRay
+	module Encoders
+
+		class Null < Encoder
+
+			include Streamable
+			register_for :null
+
+			protected
+
+			def token(*)
+				# do nothing
+			end
+
+		end
+
+	end
+end
+
+
diff --git a/lib/coderay/encoders/span.rb b/lib/coderay/encoders/span.rb
new file mode 100644
index 0000000..a7715f4
--- /dev/null
+++ b/lib/coderay/encoders/span.rb
@@ -0,0 +1,17 @@
+module CodeRay module Encoders
+	
+	require 'coderay/encoders/html'
+	class Span < HTML
+
+		FILE_EXTENSION = 'span.html'
+
+		register_for :span
+
+		DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
+			:css => :style,
+			:wrap => :span,
+			:line_numbers => nil,
+		})
+	end
+
+end end
diff --git a/lib/coderay/encoders/statistic.rb b/lib/coderay/encoders/statistic.rb
new file mode 100644
index 0000000..0685c03
--- /dev/null
+++ b/lib/coderay/encoders/statistic.rb
@@ -0,0 +1,74 @@
+module CodeRay module Encoders
+
+	# Makes a statistic for the given tokens.
+	class Statistic < Encoder
+
+		include Streamable
+		register_for :stats, :statistic
+
+		attr_reader :type_stats, :real_token_count
+
+	protected
+		
+		TypeStats = Struct.new :count, :size
+
+		def setup options
+			@type_stats = Hash.new { |h, k| h[k] = TypeStats.new 0, 0 }
+			@real_token_count = 0
+		end
+
+		def generate tokens, options
+			@tokens = tokens
+			super
+		end
+		
+		def token text, type
+			@type_stats['TOTAL'].count += 1
+			if text.is_a? String
+				@real_token_count += 1 unless type == :space
+				@type_stats[type].count += 1
+				@type_stats[type].size += text.size
+				@type_stats['TOTAL'].size += text.size
+			else
+				@content_type = type
+				@type_stats['open/close'].count += 1
+			end
+		end
+
+		STATS = <<-STATS
+
+Code Statistics
+
+Tokens            %8d
+  Non-Whitespace  %8d
+Bytes Total       %8d
+
+Token Types (%d):
+  type                     count     ratio    size (average)
+-------------------------------------------------------------
+%s
+			STATS
+# space                    12007   33.81 %     1.7
+		TOKEN_TYPES_ROW = <<-TKR
+  %-20s  %8d  %6.2f %%   %5.1f
+			TKR
+
+		def finish options
+			all = @type_stats['TOTAL']
+			all_count, all_size = all.count, all.size
+			@type_stats.each do |type, stat|
+				stat.size /= stat.count.to_f
+			end
+			types_stats = @type_stats.sort_by { |k, v| -v.count }.map do |k, v|
+				TOKEN_TYPES_ROW % [k, v.count, 100.0 * v.count / all_count, v.size]
+			end.join
+			STATS % [
+				all_count, @real_token_count, all_size,
+				@type_stats.delete_if { |k, v| k.is_a? String }.size,
+				types_stats
+			]
+		end
+		
+	end
+
+end end
diff --git a/lib/coderay/encoders/text.rb b/lib/coderay/encoders/text.rb
new file mode 100644
index 0000000..4f0a754
--- /dev/null
+++ b/lib/coderay/encoders/text.rb
@@ -0,0 +1,33 @@
+module CodeRay
+	module Encoders
+
+		class Text < Encoder
+
+			include Streamable
+			register_for :text
+
+			FILE_EXTENSION = 'txt'
+
+			DEFAULT_OPTIONS = {
+				:separator => ''
+			}
+
+			protected
+			def setup options
+				super
+				@sep = options[:separator]
+			end
+
+			def token text, kind
+				return unless text.respond_to :to_str
+				@out << text + @sep
+			end
+
+			def finish options
+				@out.chomp @sep
+			end
+
+		end
+
+	end
+end
diff --git a/lib/coderay/encoders/tokens.rb b/lib/coderay/encoders/tokens.rb
new file mode 100644
index 0000000..4573307
--- /dev/null
+++ b/lib/coderay/encoders/tokens.rb
@@ -0,0 +1,44 @@
+module CodeRay
+	module Encoders
+
+		# The Tokens encoder converts the tokens to a simple
+		# readable format. It doesn't use colors and is mainly
+		# intended for console output.
+		# 
+		# The tokens are converted with Tokens.write_token.
+		#
+		# The format is:
+		#
+		#   <token-kind> \t <escaped token-text> \n
+		#
+		# Example:
+		#
+		#   require 'coderay'
+		#   puts CodeRay.scan("puts 3 + 4", :ruby).tokens
+		# 
+		# prints:
+		#   
+		#   ident   puts
+		#   space
+		#   integer 3
+		#   space
+		#   operator        +
+		#   space
+		#   integer 4
+		# 
+		class Tokens < Encoder
+
+			include Streamable
+			register_for :tokens
+
+			FILE_EXTENSION = 'tok'
+
+			protected
+			def token *args
+				@out << CodeRay::Tokens.write_token(*args)
+			end
+
+		end
+
+	end
+end
diff --git a/lib/coderay/encoders/yaml.rb b/lib/coderay/encoders/yaml.rb
new file mode 100644
index 0000000..4e2b7a1
--- /dev/null
+++ b/lib/coderay/encoders/yaml.rb
@@ -0,0 +1,19 @@
+module CodeRay
+	module Encoders
+
+		class YAML < Encoder
+
+			register_for :yaml
+
+			FILE_EXTENSION = 'yaml'
+
+			protected
+			def compile tokens, options
+				require 'yaml'
+				@out = tokens.to_a.to_yaml
+			end
+
+		end
+
+	end
+end
diff --git a/lib/coderay/helpers/filetype.rb b/lib/coderay/helpers/filetype.rb
new file mode 100644
index 0000000..7f34c35
--- /dev/null
+++ b/lib/coderay/helpers/filetype.rb
@@ -0,0 +1,145 @@
+# =FileType
+#
+# A simple filetype recognizer
+#
+# Author: murphy (mail to murphy cYcnus de)
+#
+# Version: 0.1 (2005.september.1)
+#
+# ==Documentation
+#
+# TODO
+#
+module FileType
+	
+	UnknownFileType = Class.new Exception
+
+	class << self
+
+		def [] filename, read_shebang = false
+			name = File.basename filename
+			ext = File.extname name
+			ext.sub!(/^\./, '')  # delete the leading dot
+			
+			type = 
+				TypeFromExt[ext] || 
+				TypeFromExt[ext.downcase] || 
+				TypeFromName[name] ||
+				TypeFromName[name.downcase]
+			type ||= shebang(filename) if read_shebang
+
+			type
+		end
+
+		def shebang filename
+			begin
+				File.open filename, 'r' do |f|
+					first_line = f.gets
+					first_line[TypeFromShebang]
+				end
+			rescue IOError
+				nil
+			end
+		end
+		
+		# This works like Hash#fetch.
+		def fetch filename, default = nil, read_shebang = false
+			if default and block_given?
+				warn 'block supersedes default value argument'
+			end
+			
+			unless type = self[filename, read_shebang]
+				return yield if block_given?
+				return default if default
+				raise UnknownFileType, 'Could not determine type of %p.' % filename
+			end
+			type
+		end
+
+	end
+
+	TypeFromExt = {
+		'rb' => :ruby,
+		'rbw' => :ruby,
+		'cpp' => :cpp,
+		'c' => :c,
+		'h' => :c,
+		'xml' => :xml,
+		'htm' => :html,
+		'html' => :html,
+	}
+
+	TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
+
+	TypeFromName = {
+		'Rakefile' => :ruby,
+		'Rantfile' => :ruby,
+	}
+
+end
+
+if $0 == __FILE__
+	$VERBOSE = true
+  eval DATA.read, nil, $0, __LINE__+4
+end
+
+__END__
+
+require 'test/unit'
+
+class TC_FileType < Test::Unit::TestCase
+
+	def test_fetch
+		assert_raise FileType::UnknownFileType do
+			FileType.fetch ''
+		end
+
+		assert_throws :not_found do
+			FileType.fetch '.' do
+				throw :not_found
+			end
+		end
+
+		assert_equal :default, FileType.fetch('c', :default)
+		
+		stderr, fake_stderr = $stderr, Object.new
+		$err = ''
+		def fake_stderr.write x
+			$err << x
+		end
+		$stderr = fake_stderr
+		FileType.fetch('c', :default) { }
+		assert_equal "block supersedes default value argument\n", $err
+		$stderr = stderr
+	end
+
+	def test_ruby
+		assert_equal :ruby, FileType['test.rb']
+		assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw']
+		assert_equal :ruby, FileType['/usr/bin/something/Rakefile']
+		assert_equal :ruby, FileType['~/myapp/gem/Rantfile']
+		assert_not_equal :ruby, FileType['test_rb']
+		assert_not_equal :ruby, FileType['Makefile']
+		assert_not_equal :ruby, FileType['set.rb/set']
+		assert_not_equal :ruby, FileType['~/projects/blabla/rb']
+	end
+
+	def test_c
+		assert_equal :c, FileType['test.c']
+		assert_equal :c, FileType['C:\\Program Files\\x\\y\\c\\test.h']
+		assert_not_equal :c, FileType['test_c']
+		assert_not_equal :c, FileType['Makefile']
+		assert_not_equal :c, FileType['set.h/set']
+		assert_not_equal :c, FileType['~/projects/blabla/c']
+	end
+
+	def test_shebang
+		dir = './test'
+		if File.directory? dir
+			Dir.chdir dir do
+				assert_equal :c, FileType['test.c']
+			end
+		end
+	end
+
+end
diff --git a/lib/coderay/helpers/gzip_simple.rb b/lib/coderay/helpers/gzip_simple.rb
new file mode 100644
index 0000000..02d1ffd
--- /dev/null
+++ b/lib/coderay/helpers/gzip_simple.rb
@@ -0,0 +1,123 @@
+# =GZip Simple
+#
+# A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
+#
+# Author: murphy (mail to murphy cYcnus de)
+#
+# Version: 0.2 (2005.may.28)
+#
+# ==Documentation
+#
+# See +GZip+ module and the +String+ extensions.
+#
+module GZip
+	
+	require 'zlib'
+
+	# The default zipping level. 7 zips good and fast.
+	DEFAULT_GZIP_LEVEL = 7
+	
+	# Unzips the given string +s+.
+	#
+	# Example:
+	#   require 'gzip_simple'
+	#   print GZip.gunzip(File.read('adresses.gz'))
+	# 
+	def GZip.gunzip s
+		Zlib::Inflate.inflate s
+	end
+	
+	# Zips the given string +s+.
+	#
+	# Example:
+	#   require 'gzip_simple'
+	#   File.open('adresses.gz', 'w') do |file
+	#     file.write GZip.gzip('Mum: 0123 456 789', 9)
+	#   end
+	# 
+	# If you provide a +level+, you can control how strong
+	# the string is compressed:
+	# - 0: no compression, only convert to gzip format
+	# - 1: compress fast
+	# - 7: compress more, but still fast (default)
+	# - 8: compress more, slower
+	# - 9: compress best, very slow
+	def GZip.gzip s, level = DEFAULT_GZIP_LEVEL
+		Zlib::Deflate.new(level).deflate s, Zlib::FINISH
+	end
+end
+
+# String extensions to use the GZip module.
+#
+# The methods gzip and gunzip provide an even more simple
+# interface to the ZLib:
+#
+#   # create a big string
+#   x = 'a' * 1000
+#   
+#   # zip it
+#   x_gz = x.gzip
+#   
+#   # test the result
+#   puts 'Zipped %d bytes to %d bytes.' % [x.size, x_gz.size]
+#   #-> Zipped 1000 bytes to 19 bytes.
+#   
+#   # unzipping works
+#   p x_gz.gunzip == x  #-> true
+class String
+	# Returns the string, unzipped.
+	# See GZip.gunzip
+	def gunzip
+		GZip.gunzip self
+	end
+	# Replaces the string with its unzipped value.
+	# See GZip.gunzip
+	def gunzip!
+		replace gunzip
+	end
+	
+	# Returns the string, zipped.
+	# +level+ is the gzip compression level, see GZip.gzip.
+	def gzip level = GZip::DEFAULT_GZIP_LEVEL
+		GZip.gzip self, level
+	end
+	# Replaces the string with its zipped value.
+	# See GZip.gzip.
+	def gzip!(*args)
+		replace gzip(*args)
+	end
+end
+
+if $0 == __FILE__
+  eval DATA.read, nil, $0, __LINE__+4
+end
+
+__END__
+#CODE
+
+# Testing / Benchmark
+x = 'a' * 1000
+x_gz = x.gzip
+puts 'Zipped %d bytes to %d bytes.' % [x.size, x_gz.size]  #-> Zipped 1000 bytes to 19 bytes.
+p x_gz.gunzip == x  #-> true
+
+require 'benchmark'
+
+INFO = 'packed to %0.3f%%'  # :nodoc:
+
+x = Array.new(100000) { rand(255).chr + 'aaaaaaaaa' + rand(255).chr }.join
+Benchmark.bm(10) do |bm|
+	for level in 0..9
+		bm.report "zip #{level}" do
+			$x = x.gzip level
+		end
+		puts INFO % [100.0 * $x.size / x.size]
+	end
+	bm.report 'zip' do
+		$x = x.gzip
+	end
+	puts INFO % [100.0 * $x.size / x.size]
+	bm.report 'unzip' do
+		$x.gunzip
+	end
+end
diff --git a/lib/coderay/helpers/scanner_helper.rb b/lib/coderay/helpers/scanner_helper.rb
new file mode 100644
index 0000000..a2e14bb
--- /dev/null
+++ b/lib/coderay/helpers/scanner_helper.rb
@@ -0,0 +1,63 @@
+module CodeRay
+module Scanners
+
+  class Scanner
+
+    # A WordList is a Hash with some additional features.
+    # It is intended to be used for keyword recognition.
+    class WordList < Hash
+
+      def initialize default = false, case_mode = :case_match
+        @case_ignore =
+          case case_mode
+          when :case_match then false
+          when :case_ignore then true
+          else
+            raise ArgumentError,
+              "#{self.class.name}.new: second argument must be :case_ignore or :case_match, but #{case_mode} was given."
+          end
+
+        if @case_ignore
+          super() do |h, k|
+            h[k] = h.fetch k.downcase, default
+          end
+        else
+          super default
+        end
+      end
+
+      def include? word
+        self[word] if @case_ignore
+        has_key? word
+      end
+
+      def add words, kind = true
+        words.each do |word|
+          self[mind_case(word)] = kind
+        end
+        self
+      end
+
+      alias words keys
+
+      def case_ignore?
+        @case_mode
+      end
+
+    private
+      def mind_case word
+        if @case_ignore
+          word.downcase
+        else
+          word.dup
+        end
+      end
+
+    end		
+
+  end
+
+end
+end
+
+# vim:sw=2:ts=2:et:tw=78
diff --git a/lib/coderay/scanner.rb b/lib/coderay/scanner.rb
new file mode 100644
index 0000000..1cca607
--- /dev/null
+++ b/lib/coderay/scanner.rb
@@ -0,0 +1,298 @@
+module CodeRay
+
+  # This module holds class Scanner and its subclasses.
+  # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
+  # can be found in coderay/scanners/ruby.
+  # 
+  # Scanner also provides methods and constants for the register mechanism
+  # and the [] method that returns the Scanner class belonging to the
+  # given lang.
+  module Scanners
+
+    # Raised if Scanners[] fails because:
+    # * a file could not be found
+    # * the requested Scanner is not registered
+    ScannerNotFound = Class.new(Exception)
+
+    # Loaded Scanners are saved here.
+    SCANNERS = Hash.new { |h, lang|
+      raise ScannerNotFound, "No scanner for #{lang} found."
+    }
+
+    class << self
+
+      # Registers a scanner class by setting SCANNERS[lang].
+      #
+      # Typically used in Scanners, for example in the Ruby scanner:
+      #
+      #   register_for :ruby
+      def register scanner_class, *langs
+        for lang in langs
+          raise ArgumentError, 'lang must be a Symbol, but it was a %s' % lang.class unless lang.is_a? Symbol
+          SCANNERS[lang] = scanner_class
+        end
+      end
+
+      # Loads the scanner class for +lang+ and returns it.
+      #
+      # Example:
+      #
+      #   Scanners[:xml].new
+      #
+      # +lang+ is converted using +normalize+ and must be
+      # * a String containing only alphanumeric characters (\w+)
+      # * a Symbol
+      #
+      # Strings are converted to lowercase symbols (so +'C'+ and +'c'+ load the
+      # same scanner, namely the one registered for +:c+.)
+      # 
+      # If the scanner isn't registered yet, it is searched.
+      # CodeRay expects that the scanner class is defined in
+      #
+      #   <install-dir>/coderay/scanners/<lang>.rb
+      #
+      # (See path_to.)
+      #
+      # If the file isn't found, a ScannerNotFound exception is raised
+      #
+      # The scanner should register itself using +register+. If the scanner is
+      # still not found (because has not registered or registered under another lang),
+      # a ScannerNotFound exception is raised.
+      def [] lang
+        lang = normalize lang
+
+        SCANNERS.fetch lang do
+          scanner_file = path_to lang
+
+          begin
+            require scanner_file
+          rescue LoadError
+            raise ScannerNotFound, "File #{scanner_file} not found."
+          end
+
+          SCANNERS.fetch lang do
+            raise ScannerNotFound, <<-ERR
+No scanner for #{lang} found in #{scanner_file}.
+Known scanners: #{SCANNERS}
+            ERR
+          end
+        end
+      end
+
+      # Alias for +[]+.
+      alias load []
+
+      # Calculates the path where a scanner for +lang+
+      # is expected to be. This is:
+      # 
+      #   <install-dir>/coderay/scanners/<lang>.rb
+      def path_to lang
+        File.join 'coderay', 'scanners', "#{lang}.rb"
+      end
+
+      # Returns an array of all filenames in the scanners/ folder.
+      # The extension +.rb+ is not included.
+      def languages
+        scanners = File.join File.dirname(__FILE__), 'scanners', '*.rb'
+        Dir[scanners].map do |file|
+          File.basename file, '.rb'
+        end
+      end
+
+      # Loads all scanners that +languages+ finds using +load+.
+      def load_all
+        for lang in languages
+          load lang
+        end
+      end
+
+      # Converts +lang+ to a downcase Symbol if it is a String,
+      # or returns +lang+ if it already is a Symbol.
+      #
+      # Raises +ArgumentError+ for all other objects, or if the
+      # given String includes non-alphanumeric characters (\W).
+      def normalize lang
+        if lang.is_a? Symbol
+          lang
+        elsif lang.is_a? String
+          if lang[/\w+/] == lang
+            lang[/\w+/].downcase.to_sym
+          else
+            raise ArgumentError, "Invalid lang: '#{lang}' given."
+          end
+        elsif lang.nil?
+          :plaintext
+        else
+          raise ArgumentError, "String or Symbol expected, but #{lang.class} given."
+        end
+      end
+
+    end
+
+
+    require 'strscan'
+    # The base class for all Scanners.
+    #
+    # It is a subclass of Ruby's great +StringScanner+, which
+    # makes it easy to access the scanning methods inside.
+    #
+    # It is also +Enumerable+, so you can do this:
+    #
+    #   require 'coderay'
+    #   
+    #   c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
+    #   
+    #   for text, kind in c_scanner
+    #     puts text if kind == :operator
+    #   end
+    #   
+    #   # prints: (*==)++;
+    # 
+    # OK, this is not a very good example :)
+    # You can also use map, any?, find and even sort_by.
+    class Scanner < StringScanner
+
+      # Raised if a Scanner fails while scanning
+      ScanError = Class.new(Exception)
+
+      require 'coderay/helpers/scanner_helper'
+
+      # The default options for all scanner classes.
+      # 
+      # Define @default_options for subclasses.
+      DEFAULT_OPTIONS = { :stream => false }
+
+      class << self
+        # Register the scanner class for all
+        # +langs+.
+        #
+        # See Scanners.register.
+        def register_for *langs
+          Scanners.register self, *langs
+        end
+
+        # Returns if the Scanner can be used in streaming mode.
+        def streamable?
+          is_a? Streamable
+        end
+        
+      end
+
+=begin
+      ## Excluded for speed reasons - protected seems to make methods slow.
+
+      # Save the StringScanner methods from being called.
+      # This would not be useful for highlighting.
+      strscan_public_methods = StringScanner.instance_methods - StringScanner.ancestors[1].instance_methods
+      protected(*strscan_public_methods)
+=end
+      # Creates a new Scanner.
+      #
+      # * +code+ is the input String and is handled by the superclass StringScanner.
+      # * +options+ is a Hash with Symbols as keys.
+      #   It is merged with the default options of the class (you can overwrite
+      #   default options here.)
+      # * +block+ is the callback for streamed highlighting.
+      #
+      # If you set :stream to +true+ in the options, the Scanner uses a
+      # TokenStream with the +block+ as callback to handle the tokens.
+      #
+      # Else, a Tokens object is used.
+      def initialize code, options = {}, &block
+        @options = self.class::DEFAULT_OPTIONS.merge options
+        raise "I am only the basic Scanner class. I can't scan anything. :(\n" + 
+          "Use my subclasses." if self.class == Scanner
+
+        # I love this hack. It seems to silence all dos/unix/mac newline problems.
+        super code.gsub(/\r\n?/, "\n")
+
+        if @options[:stream]
+          warn "warning in CodeRay::Scanner.new: :stream is set, but no block was given" unless block_given?
+          raise NotStreamableError, self unless kind_of? Streamable
+          @tokens = TokenStream.new(&block)
+        else
+          warn "warning in CodeRay::Scanner.new: Block given, but :stream is #{@options[:stream]}" if block_given?
+          @tokens = Tokens.new
+        end
+      end
+
+      # More mnemonic accessor name for the input string.
+      alias code string
+
+      # Scans the code and returns all tokens in a Tokens object.
+      def tokenize options = {}
+        options = @options.merge({}) #options
+        if @options[:stream]  # :stream must have been set already
+          reset ## what is this for?
+          scan_tokens @tokens, options
+          @tokens
+        else
+          @cached_tokens ||= scan_tokens @tokens, options
+        end
+      end
+
+      # you can also see this as a read-only attribute
+      alias tokens tokenize
+
+      # Traverses the tokens.
+      def each &block
+        raise ArgumentError, 'Cannot traverse TokenStream.' if @options[:stream]
+        tokens.each(&block)
+      end
+      include Enumerable
+
+      # The current line position of the scanner.
+      #
+      # Beware, this is implemented inefficiently. It should be used
+      # for debugging only.
+      def line
+        string[0..pos].count("\n") + 1
+      end
+      
+    protected
+
+      # This is the central method, and often the only one a subclass implements.
+      # 
+      # Subclasses must implement this method; it must return +tokens+ and must only
+      # use Tokens#<< for storing scanned tokens.
+      def scan_tokens tokens, options
+        raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
+      end
+
+      # Scanner error with additional status information
+      def raise_inspect msg, tokens, ambit = 30
+        raise ScanError, <<-EOE % [
+
+
+***ERROR in %s: %s
+
+tokens:
+%s
+
+current line: %d  pos = %d
+matched: %p
+bol? = %p,  eos? = %p
+
+surrounding code:
+%p  ~~  %p
+
+
+***ERROR***
+
+        EOE
+          File.basename(caller[0]),
+          msg,
+          tokens.last(10).map { |t| t.inspect }.join("\n"),
+          line, pos,
+          matched, bol?, eos?,
+          string[pos-ambit,ambit],
+          string[pos,ambit],
+        ]
+      end
+
+    end
+
+  end
+end
+
+# vim:sw=2:ts=2:et:tw=78
diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb
new file mode 100644
index 0000000..3420822
--- /dev/null
+++ b/lib/coderay/scanners/c.rb
@@ -0,0 +1,147 @@
+module CodeRay module Scanners
+	
+	class C < Scanner
+
+		register_for :c
+		
+		RESERVED_WORDS = [
+			'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
+			'for', 'goto', 'if', 'return', 'switch', 'while',
+			'struct', 'union', 'enum', 'typedef',
+			'static', 'register', 'auto', 'extern',
+			'sizeof',
+			'volatile', 'const',  # C89
+			'inline', 'restrict', # C99			
+		]
+
+		PREDEFINED_TYPES = [
+			'int', 'long', 'short', 'char', 'void',
+			'signed', 'unsigned', 'float', 'double',
+			'bool', 'complex',  # C99
+		]
+
+		PREDEFINED_CONSTANTS = [
+			'EOF', 'NULL',
+			'true', 'false',  # C99
+		]
+
+		IDENT_KIND = Scanner::WordList.new(:ident).
+			add(RESERVED_WORDS, :reserved).
+			add(PREDEFINED_TYPES, :pre_type).
+			add(PREDEFINED_CONSTANTS, :pre_constant)
+
+		ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
+		UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
+
+		def scan_tokens tokens, options
+
+			state = :initial
+
+			until eos?
+
+				kind = :error
+				match = nil
+
+				if state == :initial
+					
+					if scan(/ \s+ | \\\n /x)
+						kind = :space
+						
+					elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
+						kind = :comment
+
+					elsif match = scan(/ \# \s* if \s* 0 /x)
+						match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
+						kind = :comment
+						
+					elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
+						kind = :operator
+						
+					elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+						kind = IDENT_KIND[match]
+						if kind == :ident and check(/:(?!:)/)
+							match << scan(/:/)
+							kind = :label
+						end
+						
+					elsif match = scan(/L?"/)
+						tokens << [:open, :string]
+						if match[0] == ?L
+							tokens << ['L', :modifier]
+							match = '"'
+						end
+						state = :string
+						kind = :delimiter
+						
+					elsif scan(/#\s*(\w*)/)
+						kind = :preprocessor  # FIXME multiline preprocs
+						state = :include_expected if self[1] == 'include'
+						
+					elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
+						kind = :char
+						
+					elsif scan(/0[xX][0-9A-Fa-f]+/)
+						kind = :hex
+						
+					elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
+						kind = :oct
+						
+					elsif scan(/(?:\d+)(?![.eEfF])/)
+						kind = :integer
+						
+					elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
+						kind = :float
+
+					else
+						getch
+					end
+					
+				elsif state == :string
+					if scan(/[^\\"]+/)
+						kind = :content
+					elsif scan(/"/)
+						tokens << ['"', :delimiter]
+						tokens << [:close, :string]
+						state = :initial
+						next
+					elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
+						kind = :char
+					elsif scan(/ \\ | $ /x)
+						kind = :error
+						state = :initial
+					else
+						raise "else case \" reached; %p not handled." % peek(1), tokens
+					end
+					
+				elsif state == :include_expected
+					if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
+						kind = :include
+						state = :initial
+						
+					elsif match = scan(/\s+/)
+						kind = :space
+						state = :initial if match.index ?\n
+						
+					else
+						getch
+						
+					end
+					
+				else
+					raise 'else-case reached', tokens
+					
+				end
+				
+				match ||= matched
+				raise [match, kind], tokens if kind == :error
+
+				tokens << [match, kind]
+				
+			end
+			
+			tokens
+		end
+
+	end
+
+end end
diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb
new file mode 100644
index 0000000..4c03147
--- /dev/null
+++ b/lib/coderay/scanners/delphi.rb
@@ -0,0 +1,123 @@
+module CodeRay module Scanners
+	
+	class Delphi < Scanner
+
+		register_for :delphi
+		
+		RESERVED_WORDS = [
+			'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
+			'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do',
+			'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization',
+			'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in',
+			'inherited', 'initialization', 'inline', 'interface', 'is', 'label',
+			'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed',
+			'procedure', 'program', 'property', 'raise', 'record', 'repeat',
+			'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar',
+			'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with',
+			'xor', 'on'
+		]
+
+		DIRECTIVES = [
+			'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl',
+			'contains', 'deprecated', 'dispid', 'dynamic', 'export',
+			'external', 'far', 'forward', 'implements', 'local', 
+			'near', 'nodefault', 'on', 'overload', 'override',
+			'package', 'pascal', 'platform', 'private', 'protected', 'public',
+			'published', 'read', 'readonly', 'register', 'reintroduce',
+			'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs',
+			'virtual', 'write', 'writeonly'
+		]
+
+		IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore).
+			add(RESERVED_WORDS, :reserved).
+			add(DIRECTIVES, :directive)
+
+		def scan_tokens tokens, options
+
+			state = :initial
+
+			until eos?
+
+				kind = :error
+				match = nil
+
+				if state == :initial
+					
+					if scan(/ \s+ /x)
+						kind = :space
+						
+					elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+						kind = :preprocessor
+						
+					elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+						kind = :comment
+						
+					elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
+						kind = :operator
+						
+					elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+						kind = IDENT_KIND[match]
+						
+					elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
+						tokens << [:open, :char]
+						tokens << ["'", :delimiter]
+						tokens << [self[1], :content]
+						tokens << ["'", :delimiter]
+						tokens << [:close, :char]
+						next
+						
+					elsif match = scan(/ ' /x)
+						tokens << [:open, :string]
+						state = :string
+						kind = :delimiter
+						
+					elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+						kind = :char
+						
+					elsif scan(/ \$ [0-9A-Fa-f]+ /x)
+						kind = :hex
+						
+					elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+						kind = :integer
+						
+					elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+						kind = :float
+
+					else
+						getch
+					end
+					
+				elsif state == :string
+					if scan(/[^\n']+/)
+						kind = :content
+					elsif scan(/''/)
+						kind = :char
+					elsif scan(/'/)
+						tokens << ["'", :delimiter]
+						tokens << [:close, :string]
+						state = :initial
+						next
+					elsif scan(/\n/)
+						state = :initial
+					else
+						raise "else case \' reached; %p not handled." % peek(1), tokens
+					end
+					
+				else
+					raise 'else-case reached', tokens
+					
+				end
+				
+				match ||= matched
+				raise [match, kind], tokens if kind == :error
+
+				tokens << [match, kind]
+				
+			end
+			
+			tokens
+		end
+
+	end
+
+end end
diff --git a/lib/coderay/scanners/helpers/ruby_helper.rb b/lib/coderay/scanners/helpers/ruby_helper.rb
new file mode 100644
index 0000000..241b392
--- /dev/null
+++ b/lib/coderay/scanners/helpers/ruby_helper.rb
@@ -0,0 +1,212 @@
+module CodeRay module Scanners
+
+	class Ruby
+
+		RESERVED_WORDS = %w[
+			and def end in or unless begin
+			defined? ensure module redo super until
+			BEGIN break do next rescue then
+			when END case else for retry
+			while alias class elsif if not return
+			undef yield
+		]
+
+		DEF_KEYWORDS = %w[ def ]
+		MODULE_KEYWORDS = %w[class module]
+		DEF_NEW_STATE = WordList.new(:initial).
+			add(DEF_KEYWORDS, :def_expected).
+			add(MODULE_KEYWORDS, :module_expected)
+
+		IDENTS_ALLOWING_REGEXP = %w[
+			and or not while until unless if then elsif when sub sub! gsub gsub! scan slice slice! split
+		]
+		REGEXP_ALLOWED = WordList.new(false).
+			add(IDENTS_ALLOWING_REGEXP, :set)
+		
+		PREDEFINED_CONSTANTS = %w[
+			nil true false self
+			DATA ARGV ARGF __FILE__ __LINE__
+		]
+
+		IDENT_KIND = WordList.new(:ident).
+			add(RESERVED_WORDS, :reserved).
+			add(PREDEFINED_CONSTANTS, :pre_constant)
+
+#		IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/
+		IDENT = /[a-z_][\w_]*/i
+
+		METHOD_NAME = / #{IDENT} [?!]? /ox
+		METHOD_NAME_EX = /
+			#{IDENT}[?!=]?  # common methods: split, foo=, empty?, gsub!
+			| \*\*?         # multiplication and power
+			| [-+]@?        # plus, minus
+			| [\/%&|^`~]    # division, modulo or format strings, &and, |or, ^xor, `system`, tilde
+			| \[\]=?        # array getter and setter
+			| << | >>       # append or shift left, shift right
+			| <=?>? | >=?   # comparison, rocket operator
+			| ===?          # simple equality and case equality
+		/ox
+		INSTANCE_VARIABLE = / @ #{IDENT} /ox
+		CLASS_VARIABLE = / @@ #{IDENT} /ox
+		OBJECT_VARIABLE = / @@? #{IDENT} /ox
+		GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+		PREFIX_VARIABLE = / #{GLOBAL_VARIABLE} |#{OBJECT_VARIABLE} /ox
+		VARIABLE = / @?@? #{IDENT} | #{GLOBAL_VARIABLE} /ox
+
+		QUOTE_TO_TYPE = {
+			'`' => :shell,
+			'/'=> :regexp,
+		}
+		QUOTE_TO_TYPE.default = :string
+		
+		REGEXP_MODIFIERS = /[mixounse]*/
+		REGEXP_SYMBOLS = /
+			[|?*+?(){}\[\].^$]
+		/x
+
+		DECIMAL = /\d+(?:_\d+)*/  # doesn't recognize 09 as octal error
+		OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+		HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+		BINARY = /0b[01]+(?:_[01]+)*/
+
+		EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+		FLOAT_OR_INT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? )? /ox
+		FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /ox
+		NUMERIC = / #{OCTAL} | #{HEXADECIMAL} | #{BINARY} | #{FLOAT_OR_INT} /ox
+
+		SYMBOL = /
+			:
+			(?:
+				#{METHOD_NAME_EX}
+			| #{PREFIX_VARIABLE}
+			| ['"]
+			)
+		/ox
+
+		# TODO investigste \M, \c and \C escape sequences
+		# (?: M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-)? (?: \\ (?: [0-7]{3} | x[0-9A-Fa-f]{2} | . ) )
+		# assert_equal(225, ?\M-a)
+		# assert_equal(129, ?\M-\C-a)
+		ESCAPE = /
+				[abefnrstv]
+			| M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-
+			|	[0-7]{1,3}
+			| x[0-9A-Fa-f]{1,2}
+			| .
+		/mx
+
+		CHARACTER = /
+			\?
+			(?:
+				[^\s\\]
+			| \\ #{ESCAPE}
+			)
+		/mx
+
+		# NOTE: This is not completel correct, but
+		# nobody needs heredoc delimiters ending with \n.
+		HEREDOC_OPEN = /
+			<< (-)?              # $1 = float
+			(?:
+				( [A-Za-z_0-9]+ )  # $2 = delim
+			|
+				( ["'`] )          # $3 = quote, type
+				( [^\n]*? ) \3     # $4 = delim
+			)
+		/mx
+
+		RDOC = /
+			=begin (?!\S)
+			.*?
+			(?: \Z | ^=end (?!\S) [^\n]* )
+		/mx
+
+		DATA = /
+			__END__$
+			.*?
+			(?: \Z | (?=^\#CODE) )
+		/mx
+
+		RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
+
+		FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox
+
+		FancyStringType = {
+			'q' => [:string, false],
+			'Q' => [:string, true],
+			'r' => [:regexp, true],
+			's' => [:symbol, false],
+			'x' => [:shell, true],
+			'w' => [:string, :word],
+			'W' => [:string, :word],
+		}
+		FancyStringType['w'] = FancyStringType['q']
+		FancyStringType['W'] = FancyStringType[''] = FancyStringType['Q']
+			
+		class StringState < Struct.new :type, :interpreted, :delim, :heredoc,
+			:paren, :paren_depth, :pattern
+			
+			CLOSING_PAREN = Hash[ *%w[
+				( )
+				[ ]
+				< >
+				{ }
+			] ]
+			
+			CLOSING_PAREN.values.each { |o| o.freeze }  # debug, if I try to change it with <<
+			OPENING_PAREN = CLOSING_PAREN.invert
+
+			STRING_PATTERN = Hash.new { |h, k|
+				delim, interpreted = *k
+				delim_pattern = Regexp.escape(delim.dup)
+				if starter = OPENING_PAREN[delim]
+					delim_pattern << Regexp.escape(starter)
+				end
+
+				
+				special_escapes = 
+					case interpreted
+					when :regexp_symbols
+						'| ' + REGEXP_SYMBOLS.source
+					when :words
+						'| \s'
+					end
+
+				h[k] =
+					if interpreted and not delim == '#'
+						/ (?= [#{delim_pattern}\\] | \# [{$@] #{special_escapes} ) /mx
+					else
+						/ (?= [#{delim_pattern}\\] #{special_escapes} ) /mx
+					end
+			}
+
+			HEREDOC_PATTERN = Hash.new { |h, k|
+				delim, interpreted, indented = *k
+				delim_pattern = Regexp.escape(delim.dup)
+				delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
+				h[k] =
+					if interpreted
+						/ (?= #{delim_pattern}() | \\ | \# [{$@] ) /mx
+					else
+						/ (?= #{delim_pattern}() | \\ ) /mx
+					end
+			}
+
+			def initialize kind, interpreted, delim, heredoc = false
+				if paren = CLOSING_PAREN[delim]
+					delim, paren = paren, delim
+					paren_depth = 1
+				end
+				if heredoc
+					pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
+					delim	= nil
+				else
+					pattern = STRING_PATTERN[ [delim, interpreted] ]
+				end
+				super kind, interpreted, delim, heredoc, paren, paren_depth, pattern
+			end
+		end unless defined? StringState
+	
+	end
+
+end end
diff --git a/lib/coderay/scanners/mush.rb b/lib/coderay/scanners/mush.rb
new file mode 100644
index 0000000..5217ed9
--- /dev/null
+++ b/lib/coderay/scanners/mush.rb
@@ -0,0 +1,102 @@
+module CodeRay module Scanners
+	
+	class Mush < Scanner
+
+		register_for :mush
+		
+		RESERVED_WORDS = [
+		]
+
+		IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore).
+			add(RESERVED_WORDS, :reserved).
+			add(DIRECTIVES, :directive)
+
+		def scan_tokens tokens, options
+
+			state = :initial
+
+			until eos?
+
+				kind = :error
+				match = nil
+
+				if state == :initial
+					
+					if scan(/ \s+ /x)
+						kind = :space
+						
+					elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx)
+						kind = :preprocessor
+						
+					elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx)
+						kind = :comment
+						
+					elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x)
+						kind = :operator
+						
+					elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
+						kind = IDENT_KIND[match]
+						
+					elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x)
+						tokens << [:open, :char]
+						tokens << ["'", :delimiter]
+						tokens << [self[1], :content]
+						tokens << ["'", :delimiter]
+						tokens << [:close, :char]
+						next
+						
+					elsif match = scan(/ ' /x)
+						tokens << [:open, :string]
+						state = :string
+						kind = :delimiter
+						
+					elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x)
+						kind = :char
+						
+					elsif scan(/ \$ [0-9A-Fa-f]+ /x)
+						kind = :hex
+						
+					elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x)
+						kind = :integer
+						
+					elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x)
+						kind = :float
+
+					else
+						getch
+					end
+					
+				elsif state == :string
+					if scan(/[^\n']+/)
+						kind = :content
+					elsif scan(/''/)
+						kind = :char
+					elsif scan(/'/)
+						tokens << ["'", :delimiter]
+						tokens << [:close, :string]
+						state = :initial
+						next
+					elsif scan(/\n/)
+						state = :initial
+					else
+						raise "else case \' reached; %p not handled." % peek(1), tokens
+					end
+					
+				else
+					raise 'else-case reached', tokens
+					
+				end
+				
+				match ||= matched
+				raise [match, kind], tokens if kind == :error
+
+				tokens << [match, kind]
+				
+			end
+			
+			tokens
+		end
+
+	end
+
+end end
diff --git a/lib/coderay/scanners/plaintext.rb b/lib/coderay/scanners/plaintext.rb
new file mode 100644
index 0000000..0aebf35
--- /dev/null
+++ b/lib/coderay/scanners/plaintext.rb
@@ -0,0 +1,13 @@
+module CodeRay module Scanners
+
+	class Plaintext < Scanner
+		
+		register_for :plaintext, :plain
+
+		def scan_tokens tokens, options
+			tokens << [scan_until(/\z/), :plain]
+		end
+
+	end
+
+end end
diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb
new file mode 100644
index 0000000..433726b
--- /dev/null
+++ b/lib/coderay/scanners/ruby.rb
@@ -0,0 +1,333 @@
+module CodeRay module Scanners
+
+	# This scanner is really complex, since Ruby _is_ a complex language!
+	#
+	# It tries to highlight 100% of all common code,
+	# and 90% of strange codes.
+	#
+	# It is optimized for HTML highlighting, and is not very useful for
+	# parsing or pretty printing.
+	#
+	# For now, I think it's better than the scanners in VIM or Syntax, or
+	# any highlighter I was able to find, except Caleb's RubyLexer.
+	#
+	# I hope it's also better than the rdoc/irb lexer.
+	class Ruby < Scanner
+
+		include Streamable
+
+		register_for :ruby
+
+		require 'coderay/scanners/helpers/ruby_helper'
+		
+		DEFAULT_OPTIONS = {
+			:parse_regexps => true,
+		}
+
+	private
+		def scan_tokens tokens, options
+			parse_regexp = false # options[:parse_regexps]
+			first_bake = saved_tokens = nil
+			last_token_dot = false
+			fancy_allowed = regexp_allowed = true
+			heredocs = nil
+			last_state = nil
+			state = :initial
+			depth = nil
+			states = []
+
+			until eos?
+				type = :error
+				match = nil
+				kind = nil
+
+				if state.instance_of? StringState
+# {{{
+
+					match = scan_until(state.pattern) || scan_until(/\z/)
+					tokens << [match, :content] unless match.empty?
+					break if eos?
+					
+					if state.heredoc and self[1]
+						match = getch + scan_until(/$/)
+						tokens << [match, :delimiter]
+						tokens << [:close, state.type]
+						state = :initial
+						next
+					end
+					
+					case match = getch
+					
+					when state.delim
+						if state.paren
+							state.paren_depth -= 1 
+							if state.paren_depth > 0
+								tokens << [match, :nesting_delimiter]
+								next
+							end
+						end
+						tokens << [match, :delimiter]
+						if state.type == :regexp and not eos?
+							modifiers = scan(/#{REGEXP_MODIFIERS}/ox)
+							tokens << [modifiers, :modifier] unless modifiers.empty?
+							if parse_regexp
+								extended = modifiers.index ?x
+								tokens, regexp = saved_tokens, tokens
+								for text, type in regexp
+									if text.is_a? String
+										case type
+										when :content
+											text.scan(/([^#]+)|(#.*)/) do |plain, comment|
+												if plain
+													tokens << [plain, :content]
+												else
+													tokens << [comment, :comment]
+												end
+											end
+										when :character
+											if text[/\\(?:[swdSWDAzZbB]|\d+)/]
+												tokens << [text, :modifier]
+											else
+												tokens << [text, type]
+											end
+										else
+											tokens << [text, type]
+										end
+									else
+										tokens << [text, type]
+									end										
+								end
+								first_bake = saved_tokens = nil
+							end
+						end
+						tokens << [:close, state.type]
+						fancy_allowed = regexp_allowed = false
+						state = :initial
+						
+					when '\\'
+						if state.interpreted
+							if esc = scan(/ #{ESCAPE} /ox)
+								tokens << [match + esc, :char]
+							else
+								tokens << [match, :error]
+							end
+						else
+							case m = getch
+							when state.delim, '\\'
+								tokens << [match + m, :char]
+							else
+								tokens << [match + m, :content]
+							end
+						end
+						
+					when '#'
+						case peek(1)[0]
+						when ?{
+							states.push [state, depth, heredocs]
+							fancy_allowed = regexp_allowed = true
+							state, depth = :initial, 1
+							tokens << [match + getch, :escape]
+						when ?$, ?@
+							tokens << [match, :escape]
+							last_state = state  # scan one token as normal code, then return here
+							state = :initial
+						else
+							raise "else-case # reached; #%p not handled" % peek(1), tokens
+						end
+						
+					when state.paren
+						state.paren_depth += 1
+						tokens << [match, :nesting_delimiter]
+
+					when REGEXP_SYMBOLS
+						tokens << [match, :function]
+						
+					else
+						raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens
+						
+					end
+					next
+# }}}
+				else
+# {{{					
+					if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
+						( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) )
+						fancy_allowed = true
+						case m = match[0]
+						when ?\s, ?\t, ?\f
+							match << scan(/\s*/) unless eos? or heredocs
+							type = :space
+						when ?\n, ?\\
+							type = :space
+							regexp_allowed = m == ?\n
+							if heredocs
+								unscan  # heredoc scanning needs \n at start
+								state = heredocs.shift
+								tokens << [:open, state.type]
+								heredocs = nil if heredocs.empty?
+								next
+							else
+								match << scan(/\s*/) unless eos?
+							end
+						when ?#, ?=, ?_
+							type = :comment
+							regexp_allowed = true
+						else
+							raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens
+						end
+						tokens << [match, type]
+						next
+
+					elsif state == :initial
+						if match = scan(/ \.\.?\.? | [-+*=>;,|&!\(\)\[\]~^]+ | [\{\}] | :: /x)
+							if match !~ / [.\)\]\}] \z/x or match =~ /\.\.\.?/
+								regexp_allowed = fancy_allowed = :set
+							end
+							last_token_dot = :set if match == '.' or match == '::'
+							type = :operator
+							unless states.empty?
+								case match
+								when '{'
+									depth += 1
+								when '}'
+									depth -= 1
+									if depth == 0
+										state, depth, heredocs = *states.pop
+										type = :escape
+									end
+								end
+							end
+							
+						elsif match = scan(/#{METHOD_NAME}/o)
+							if last_token_dot
+								type = if match[/^[A-Z]/] then :constant else :ident end
+							else
+								type = IDENT_KIND[match]
+								if type == :ident and match[/^[A-Z]/]
+									type = :constant
+								elsif type == :reserved
+									state = DEF_NEW_STATE[match]
+								end
+							end
+							fancy_allowed = regexp_allowed = REGEXP_ALLOWED[match]
+							
+						elsif match = scan(/ ['"] /mx)
+							tokens << [:open, :string]
+							type = :delimiter
+							state = StringState.new :string, match != '\'', match.dup  # important for streaming
+							
+						elsif match = scan(/#{INSTANCE_VARIABLE}/o)
+							type = :instance_variable
+							
+						elsif regexp_allowed and match = scan(/ \/ /mx)
+							tokens << [:open, :regexp]
+							type = :delimiter
+							interpreted = true
+							state = StringState.new :regexp, interpreted, match.dup
+							if parse_regexp
+								tokens, saved_tokens = [], tokens
+							end
+							
+						elsif match = scan(/#{NUMERIC}/o)
+							type = if match[/#{FLOAT}/o] then :float else :integer end							
+
+						elsif fancy_allowed and match = scan(/#{SYMBOL}/o)
+							case match[1]
+							when ?', ?"
+								tokens << [:open, :symbol]
+								state = StringState.new :symbol, match[1] == ?", match[1,1]
+							end
+							type = :symbol
+							
+						elsif fancy_allowed and match = scan(/#{HEREDOC_OPEN}/o)
+							indented, quote = self[1] == '-', self[3]
+							delim = self[quote ? 4 : 2]
+							type = QUOTE_TO_TYPE[quote]
+							tokens << [:open, type]
+							tokens << [match, :delimiter]
+							match = :close
+							heredoc = StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart )
+							heredocs ||= []  # create heredocs if empty
+							heredocs << heredoc
+							
+						elsif fancy_allowed and match = scan(/#{FANCY_START}/o)
+							type, interpreted = *FancyStringType.fetch(self[1]) do
+								raise 'Unknown fancy string: %%%p' % k, tokens
+							end
+							tokens << [:open, type]
+							state = StringState.new type, interpreted, self[2]
+							type = :delimiter
+
+						elsif fancy_allowed and match = scan(/#{CHARACTER}/o)
+							type = :integer
+
+						elsif match = scan(/ [\/%<?:] /x)
+							regexp_allowed = fancy_allowed = :set
+							type = :operator
+
+						elsif match = scan(/`/)
+							if last_token_dot
+								type = :operator
+							else
+								tokens << [:open, :shell]
+								type = :delimiter
+								state = StringState.new :shell, true, '`'
+							end
+							
+						elsif match = scan(/#{GLOBAL_VARIABLE}/o)
+							type = :global_variable
+							
+						elsif match = scan(/#{CLASS_VARIABLE}/o)
+							type = :class_variable
+							
+						else
+							match = getch
+							
+						end
+						
+					elsif state == :def_expected
+						if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox)
+							type = :method
+						else
+							match = getch
+						end
+						state = :initial
+
+					elsif state == :module_expected
+						if match = scan(/<</)
+							type = :operator
+						else
+							if match = scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
+								type = :class
+							else
+								match = getch
+							end
+						end
+						state = :initial
+						
+					end
+
+					regexp_allowed = regexp_allowed == :set
+					fancy_allowed = fancy_allowed == :set
+					last_token_dot = last_token_dot == :set
+
+					if $DEBUG
+						raise_inspect 'error token %p in line %d' % [tokens.last, line], tokens if not type or type == :error
+					end
+
+					tokens << [match, type]
+					
+					if last_state
+						state = last_state
+						last_state = nil
+					end
+# }}}
+				end
+			end
+
+			tokens
+		end
+	end
+
+end end
+# vim:fdm=marker
diff --git a/lib/coderay/scanners/rubyfast.rb b/lib/coderay/scanners/rubyfast.rb
new file mode 100644
index 0000000..baff382
--- /dev/null
+++ b/lib/coderay/scanners/rubyfast.rb
@@ -0,0 +1,287 @@
+module CodeRay module Scanners
+
+	class Ruby < Scanner
+
+		register_for :rubyfast
+
+		RESERVED_WORDS = [
+			'and', 'def', 'end', 'in', 'or', 'unless', 'begin',
+			'defined?', 'ensure', 'module', 'redo', 'super', 'until',
+			'BEGIN', 'break', 'do', 'next', 'rescue', 'then',
+			'when', 'END', 'case', 'else', 'for', 'retry',
+			'while', 'alias', 'class', 'elsif', 'if', 'not', 'return',
+			'undef', 'yield',
+		]
+
+		DEF_KEYWORDS = ['def']
+		MODULE_KEYWORDS = ['class', 'module']
+		DEF_NEW_STATE = WordList.new(:initial).
+			add(DEF_KEYWORDS, :def_expected).
+			add(MODULE_KEYWORDS, :module_expected)
+
+		WORDS_ALLOWING_REGEXP = [
+			'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when'
+		]
+		REGEXP_ALLOWED = WordList.new(false).
+			add(WORDS_ALLOWING_REGEXP, :set)
+		
+		PREDEFINED_CONSTANTS = [
+			'nil', 'true', 'false', 'self',
+			'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__',
+		]
+
+		IDENT_KIND = WordList.new(:ident).
+			add(RESERVED_WORDS, :reserved).
+			add(PREDEFINED_CONSTANTS, :pre_constant)
+
+		IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/
+
+		METHOD_NAME = / #{IDENT} [?!]? /xo
+		METHOD_NAME_EX = /
+		#{IDENT}[?!=]?  # common methods: split, foo=, empty?, gsub!
+		| \*\*?         # multiplication and power
+		| [-+~]@?       # plus, minus
+		| [\/%&|^`]     # division, modulo or format strings, &and, |or, ^xor, `system`
+		| \[\]=?        # array getter and setter
+		| <=?>? | >=?   # comparison, rocket operator
+		| << | >>       # append or shift left, shift right
+		| ===?          # simple equality and case equality
+		/ox
+		GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox
+
+		DOUBLEQ = / " [^"\#\\]* (?: (?: \#\{.*?\} | \#(?:$")? | \\. ) [^"\#\\]* )* "? /mox
+		SINGLEQ = / ' [^'\\]*   (?:                             \\.   [^'\\]*   )* '? /mox
+		STRING  = / #{SINGLEQ} | #{DOUBLEQ} /ox
+
+		SHELL   = / ` [^`\#\\]* (?: (?: \#\{.*?\} | \#(?:$`)? | \\. ) [^`\#\\]* )* `? /mox
+		REGEXP =%r! / [^/\#\\]* (?: (?: \#\{.*?\} | \#(?:$/)? | \\. ) [^/\#\\]* )* /? !mox
+		
+		DECIMAL = /\d+(?:_\d+)*/  # doesn't recognize 09 as octal error
+		OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
+		HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
+		BINARY = /0b[01]+(?:_[01]+)*/
+
+		EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
+		FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /
+		INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/
+
+		ESCAPE_STRING = /
+			% (?!\s)
+			(?:
+				[qsw]
+				(?:
+					\( [^\)\\]* (?: \\. [^\)\\]* )* \)?
+				|
+					\[ [^\]\\]* (?: \\. [^\]\\]* )* \]?
+				|
+					\{ [^\}\\]* (?: \\. [^\}\\]* )* \}?
+				|
+					\< [^\>\\]* (?: \\. [^\>\\]* )* \>?
+				|
+					\\ [^\\  ]*                     \\?
+				|
+					( [^a-zA-Z0-9] )  # $1
+					(?:(?!\1)[^\\])* (?: \\. (?:(?!\1)[^\#\\])* )* \1?
+				)
+			|
+				[QrxWr]?
+				(?:
+					\( [^\)\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\)\#\\]* )* \)?
+				|
+					\[ [^\]\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\]\#\\]* )* \]?
+				|
+					\{ [^\}\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\}\#\\]* )* \}?
+				|
+					\< [^\>\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\>\#\\]* )* \>?
+				|
+					\# [^\#  \\]* (?:                 \\.  [^\#  \\]* )* \#?
+				|
+					\\ [^\\\#  ]* (?: (?:\#\{.*?\}|\#    ) [^\\\#  ]* )* \\?
+				|
+					( [^a-zA-Z0-9] )  # $2
+					(?:(?!\2)[^\#\\])* (?: (?:\#\{.*?\}|\#|\\.) (?:(?!\2)[^\#\\])* )* \2?
+				)
+			)
+		/mox
+		
+		SYMBOL = /
+			:
+			(?:
+			  #{GLOBAL_VARIABLE}
+			|	@@?#{IDENT}
+			| #{METHOD_NAME_EX}
+			| #{STRING}
+		)/ox
+
+		HEREDOC = /
+			<< (?! [\dc] )
+			(?: [^\n]*? << )?
+			(?:
+				([a-zA-Z_0-9]+) 
+					(?: .*? ^\1$ | .* )
+			|
+				-([a-zA-Z_0-9]+)
+					(?: .*? ^\s*\2$ | .* )
+			|
+				(["\'`]) (.*?) \3
+					(?: .*? ^\4$ | .* )
+			| 
+				- (["\'`]) (.*?) \5
+					(?: .*? ^\s*\6$ | .* )
+			)
+		/mx
+
+		RDOC = /
+			=begin (?!\S) [^\n]* \n?
+			(?:
+				(?! =end (?!\S) )
+				[^\n]* \n?
+			)*
+			(?:
+				=end (?!\S) [^\n]*
+			)?
+		/mx
+
+		DATA = /
+			__END__\n
+			(?:
+				(?=\#CODE)
+			|
+				.*
+			)
+		/
+
+	private
+		def scan_tokens tokens, options
+			
+			state = :initial
+			regexp_allowed = true
+			last_token_dot = false
+
+			until eos?
+				match = nil
+				kind = :error
+
+				if scan(/\s+/)  # in every state
+					kind = :space
+					regexp_allowed = :set if regexp_allowed or matched.index(?\n)  # delayed flag setting
+
+				elsif scan(/ \#[^\n]* /x)  # in every state
+					kind = :comment
+					regexp_allowed = :set if regexp_allowed
+
+				elsif state == :initial
+					# IDENTIFIERS, KEYWORDS
+					if scan(GLOBAL_VARIABLE)
+						kind = :global_variable
+					elsif scan(/ @@ #{IDENT} /ox)
+						kind = :class_variable
+					elsif scan(/ @ #{IDENT} /ox)
+						kind = :instance_variable
+					elsif scan(/ #{DATA} | #{RDOC} /ox)
+						kind = :comment
+					elsif scan(METHOD_NAME)
+						match = matched
+						if last_token_dot
+							kind =
+								if match[/^[A-Z]/]
+									:constant
+								else
+									:ident
+								end
+						else
+							kind = IDENT_KIND[match]
+							if kind == :ident and match[/^[A-Z]/]
+								kind = :constant
+							elsif kind == :reserved
+								state = DEF_NEW_STATE[match]
+								regexp_allowed = REGEXP_ALLOWED[match]
+							end
+						end
+						
+					elsif scan(STRING)
+						kind = :string
+					elsif scan(SHELL)
+						kind = :shell
+					elsif scan(HEREDOC)
+						kind = :string
+					elsif check(/\//) and regexp_allowed
+						scan(REGEXP)
+						kind = :regexp
+					elsif scan(ESCAPE_STRING)
+						match = matched
+						kind = 
+							case match[0]
+							when ?s
+								:symbol
+							when ?r
+								:regexp
+							when ?x
+								:shell
+							else
+								:string
+							end
+
+					elsif scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox)
+						kind = :symbol
+					elsif scan(/
+						\? (?:
+							[^\s\\]
+						| 
+							\\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] )
+						)
+					/mx)
+						kind = :integer
+						
+					elsif scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x)
+						kind = :operator
+						match = matched
+						regexp_allowed = :set if match[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/
+						last_token_dot = :set if match == '.' or match == '::'
+					elsif scan(FLOAT)
+						kind = :float
+					elsif scan(INTEGER)
+						kind = :integer
+					else
+						getch
+					end
+					
+				elsif state == :def_expected
+					if scan(/ (?:#{IDENT}::)* (?:#{IDENT}\.)? #{METHOD_NAME_EX} /ox)
+						kind = :method
+					else
+						getch
+					end
+					state = :initial
+					
+				elsif state == :module_expected
+					if scan(/<</)
+						kind = :operator
+					else
+						if scan(/ (?:#{IDENT}::)* #{IDENT} /ox)
+							kind = :method
+						else
+							getch
+						end
+						state = :initial
+					end
+					
+				end
+				
+				text = match || matched
+
+				if kind == :regexp and not eos?
+					text << scan(/[eimnosux]*/)
+				end
+				
+				regexp_allowed = (regexp_allowed == :set)  # delayed flag setting
+				last_token_dot = last_token_dot == :set
+
+				tokens << [text, kind]
+			end
+
+			tokens
+		end
+	end
+
+end end
diff --git a/lib/coderay/scanners/rubylex.rb b/lib/coderay/scanners/rubylex.rb
new file mode 100644
index 0000000..2e69d39
--- /dev/null
+++ b/lib/coderay/scanners/rubylex.rb
@@ -0,0 +1,102 @@
+require 'rubygems'
+require_gem 'rubylexer'
+require 'rubylexer.rb'
+
+module CodeRay module Scanners
+
+	class RubyLex < Scanner
+		
+		register_for :rubylex
+
+		class FakeFile < String
+
+			def initialize(*)
+				super
+				@pos = 0
+			end
+			
+			attr_accessor :pos
+			
+			def read x
+				pos = @pos
+				@pos += x
+				self[pos ... @pos]
+			end
+
+			def getc
+				pos = @pos
+				@pos += 1
+				self[pos]||-1
+			end
+
+			def eof?
+				@pos == size
+			end
+
+			def each_byte
+				until eof?
+					yield getc
+				end
+			end
+			
+			def method_missing meth, *args
+				raise NoMethodError, '%s%s' % [meth, args]
+			end
+
+		end
+
+	private
+		Translate = {
+			:ignore => :comment,
+			:varname => :ident,
+			:number => :integer,
+			:ws => :space,
+			:escnl => :space,
+			:keyword => :reserved,
+			:methname => :method,
+			:renderexactlystring => :regexp,
+			:string => :string,
+		}
+
+		def scan_tokens tokens, options
+			require 'tempfile'
+			Tempfile.open('~coderay_tempfile') do |file|
+				file.binmode
+				file.write code
+				file.rewind
+				lexer = RubyLexer.new 'code', file
+				loop do
+					begin
+						tok = lexer.get1token
+					rescue => kaboom
+						err = <<-EOE
+	ERROR!!!
+#{kaboom.inspect}
+#{kaboom.backtrace.join("\n")}
+						EOE
+						tokens << [err, :error]
+						Kernel.raise
+					end
+					break if tok.is_a? EoiToken
+					next if tok.is_a? FileAndLineToken
+					kind = tok.class.name[/(.*?)Token$/,1].downcase.to_sym
+					kind = Translate.fetch kind, kind
+					text = tok.ident
+					case kind
+					when :hereplaceholder
+						text = tok.ender
+						kind = :string
+					when :herebody, :outlinedherebody
+						text = tok.ident.ident
+						kind = :string
+					end
+					text = text.inspect unless text.is_a? String
+					p token if kind == :error
+					tokens << [text.dup, kind]
+				end
+			end
+			tokens
+		end
+	end
+
+end end
diff --git a/lib/coderay/tokens.rb b/lib/coderay/tokens.rb
new file mode 100644
index 0000000..71ad33a
--- /dev/null
+++ b/lib/coderay/tokens.rb
@@ -0,0 +1,302 @@
+module CodeRay
+
+	# The Tokens class represents a list of tokens returnd from
+	# a Scanner.
+	#
+	# A token is not a special object, just a two-element Array
+	# consisting of
+	# * the _token_ _kind_ (a Symbol representing the type of the token)
+	# * the _token_ _text_ (the original source of the token in a String)
+	#
+	# A token looks like this:
+	# 
+	#   [:comment, '# It looks like this']
+	#   [:float, '3.1415926']
+	#   [:error, '���']
+	# 
+	# Some scanners also yield some kind of sub-tokens, represented by special
+	# token texts, namely :open and :close .
+	# 
+	# The Ruby scanner, for example, splits "a string" into:
+	# 
+	#  [
+	#   [:open, :string],
+	#   [:delimiter, '"'],
+	#   [:content, 'a string'],
+	#   [:delimiter, '"'],
+	#   [:close, :string]
+	#  ]
+	# 
+	# Tokens is also the interface between Scanners and Encoders:
+	# The input is split and saved into a Tokens object. The Encoder
+	# then builds the output from this object.
+	# 
+	# Thus, the syntax below becomes clear:
+	#
+	#   CodeRay.scan('price = 2.59', :ruby).html
+	#   # the Tokens object is here -------^
+	# 
+	# See how small it is? ;)
+	# 
+	# Tokens gives you the power to handle pre-scanned code very easily:
+	# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
+	# that you put in your DB.
+	#
+	# Tokens' subclass TokenStream allows streaming to save memory.
+	class Tokens < Array
+
+		class << self
+
+			# Convert the token to a string.
+			# 
+			# This format is used by Encoders.Tokens.
+			# It can be reverted using read_token.
+			def write_token text, type
+				if text.is_a? String
+					"#{type}\t#{escape(text)}\n"
+				else
+					":#{text}\t#{type}\t\n"
+				end
+			end
+
+			# Read a token from the string.
+			# 
+			# Inversion of write_token.
+			#
+			# TODO Test this!
+			def read_token token
+				type, text = token.split("\t", 2)
+				if type[0] == ?:
+					[text.to_sym, type[1..-1].to_sym]
+				else
+					[type.to_sym, unescape(text)]
+				end
+			end
+
+			# Escapes a string for use in write_token.
+			def escape text
+				text.gsub(/[\n\\]/, '\\\\\&')
+			end
+
+			# Unescapes a string created by escape.
+			def unescape text
+				text.gsub(/\\[\n\\]/) { |m| m[1,1] }
+			end
+
+		end
+
+		# Whether the object is a TokenStream.
+		#
+		# Returns false.
+		def stream?
+			false
+		end
+
+		alias :orig_each :each
+		# Iterates over all tokens.
+		# 
+		# If a filter is given, only tokens of that kind are yielded.
+		def each kind_filter = nil, &block
+			unless kind_filter
+				orig_each(&block)
+			else
+				orig_each do |text, kind|
+					next unless kind == kind_filter
+					yield text, kind
+				end
+			end
+		end
+
+		# Iterates over all text tokens.
+		# Range tokens like [:open, :string] are left out.
+		#
+		# Example:
+		#   tokens.each_text_token { |text, kind| text.replace html_escape(text) }
+		def each_text_token
+			orig_each do |text, kind|
+				next unless text.respond_to? :to_str
+				yield text, kind
+			end
+		end
+
+		# Encode the tokens using encoder.
+		#
+		# encoder can be
+		# * a symbol like :html oder :statistic
+		# * an Encoder class
+		# * an Encoder object
+		# 
+		# options are passed to the encoder.
+		def encode encoder, options = {}
+			unless encoder.is_a? Encoders::Encoder
+				unless encoder.is_a? Class
+					encoder_class = Encoders[encoder]
+				end
+				encoder = encoder_class.new options
+			end
+			encoder.encode_tokens self, options
+		end
+
+		# Redirects unknown methods to encoder calls.
+		#
+		# For example, if you call +tokens.html+, the HTML encoder
+		# is used to highlight the tokens.
+		def method_missing meth, options = {}
+			Encoders[meth].new(options).encode_tokens self
+		end
+
+		# Returns the tokens compressed by joining consecutive
+		# tokens of the same kind.
+		# 
+		# This can not be undone, but should yield the same output
+		# in most Encoders.  It basically makes the output smaller.
+		#
+		# Combined with dump, it saves database space.
+		def optimize
+			last_kind, last_text = nil, nil
+			new = self.class.new
+			each do |text, kind|
+				if text.is_a? String
+					if kind == last_kind
+						last_text << text
+					else
+						new << [last_text, last_kind] if last_kind
+						last_text = text
+						last_kind = kind
+					end
+				else
+					new << [last_text, last_kind] if last_kind
+					last_kind, last_text = nil, nil
+					new << [text, kind]
+				end
+			end
+			new << [last_text, last_kind] if last_kind
+			new
+		end
+
+		# Compact the object itself; see compact.
+		def optimize!
+			replace optimize
+		end
+
+		# Dumps the object into a String that can be saved
+		# in files or databases.
+		#
+		# The dump is created with Marshal.dump;
+		# In addition, it is gzipped using GZip.gzip.
+		#
+		# The returned String object includes Undumping
+		# so it has an #undump method. See Tokens.load.
+		#
+		# You can configure the level of compression,
+		# but the default value 7 should be what you want
+		# in most cases as it is a good comprimise between
+		# speed and compression rate.
+		# 
+		# See GZip module.
+		def dump gzip_level = 7
+			require 'coderay/helpers/gzip_simple'
+			dump = Marshal.dump self
+			dump = dump.gzip gzip_level
+			dump.extend Undumping
+		end
+
+		# The total size of the tokens;
+		# Should be equal to the input size before
+		# scanning.
+		def text_size
+			map { |t, k| t }.join.size
+		end
+
+		# Include this module to give an object an #undump
+		# method.
+		#
+		# The string returned by Tokens.dump includes Undumping.
+		module Undumping
+			# Calls Tokens.load with itself.
+			def undump
+				Tokens.load self
+			end
+		end
+
+		# Undump the object using Marshal.load, then
+		# unzip it using GZip.gunzip.
+		# 
+		# The result is commonly a Tokens object, but
+		# this is not guaranteed.
+		def Tokens.load dump
+			require 'coderay/helpers/gzip_simple'
+			dump = dump.gunzip
+			@dump = Marshal.load dump
+		end
+
+	end
+
+
+	# The TokenStream class is a fake Array without elements.
+	# 
+	# It redirects the method << to a block given at creation.
+	#
+	# This allows scanners and Encoders to use streaming (no
+	# tokens are saved, the input is highlighted the same time it
+	# is scanned) with the same code.
+	#
+	# See CodeRay.encode_stream and CodeRay.scan_stream
+	class TokenStream < Tokens
+
+		# Whether the object is a TokenStream.
+		#
+		# Returns true.
+		def stream?
+			true
+		end
+
+		# The Array is empty, but size counts the tokens given by <<.
+		attr_reader :size
+
+		# Creates a new TokenStream that calls +block+ whenever
+		# its << method is called.
+		#
+		# Example:
+		#
+		#   require 'coderay'
+		# 	
+		#   token_stream = CodeRay::TokenStream.new do |kind, text|
+		#     puts 'kind: %s, text size: %d.' % [kind, text.size]
+		#   end
+		#   
+		#   token_stream << [:regexp, '/\d+/']
+		#   #-> kind: rexpexp, text size: 5.
+		#
+		def initialize &block
+			raise ArgumentError, 'Block expected for streaming.' unless block
+			@callback = block
+			@size = 0
+		end
+
+		# Calls +block+ with +token+ and increments size.
+		def << token
+			@callback.call token
+			@size += 1
+		end
+
+		# This method is not implemented due to speed reasons. Use Tokens.
+		def text_size
+			raise NotImplementedError, 'This method is not implemented due to speed reasons.'
+		end
+
+		# A TokenStream cannot be dumped. Use Tokens.
+		def dump
+			raise NotImplementedError, 'A TokenStream cannot be dumped.'
+		end
+
+		# A TokenStream cannot be compacted. Use Tokens.
+		def compact
+			raise NotImplementedError, 'A TokenStream cannot be compacted.'
+		end
+
+	end
+
+end
+
+# vim:sw=2:ts=2:et:tw=78
author	no author <noone@nowhere>	2005-09-26 02:58:54 +0000
committer	no author <noone@nowhere>	2005-09-26 02:58:54 +0000
commit	84b8431608174e74a4c0d2394eb330a6621bc74b (patch)
tree	ffc2bd7ce21708a9147247c80b0e7fc7728ea063 /lib
download	coderay-84b8431608174e74a4c0d2394eb330a6621bc74b.tar.gz