$: << '..'
require 'coderay'
tokens = CodeRay.scan DATA.read, :ruby
html = tokens.page(:tab_width => 2, :line_numbers => :table, :title => 'CodeRay HTML Encoder Example')
puts html
__END__
require 'scanner'
module CodeRay
	
	class RubyScanner < Scanner
		
		RESERVED_WORDS = [
			'and', 'def', 'end', 'in', 'or', 'unless', 'begin',
			'defined?', 'ensure', 'module', 'redo', 'super', 'until',
			'BEGIN', 'break', 'do', 'next', 'rescue', 'then',
			'when', 'END', 'case', 'else', 'for', 'retry',
			'while', 'alias', 'class', 'elsif', 'if', 'not', 'return',
			'undef', 'yield',
		]
		DEF_KEYWORDS = ['def']
		MODULE_KEYWORDS = ['class', 'module']
		DEF_NEW_STATE = WordList.new(:initial).
			add(DEF_KEYWORDS, :def_expected).
			add(MODULE_KEYWORDS, :module_expected)
		WORDS_ALLOWING_REGEXP = [
			'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when'
		]
		REGEXP_ALLOWED = WordList.new(false).
			add(WORDS_ALLOWING_REGEXP, :set)
		
		PREDEFINED_CONSTANTS = [
			'nil', 'true', 'false', 'self',
			'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__',
		]
		IDENT_KIND = WordList.new(:ident).
			add(RESERVED_WORDS, :reserved).
			add(PREDEFINED_CONSTANTS, :pre_constant)
		METHOD_NAME = / #{IDENT} [?!]? /xo
		METHOD_NAME_EX = /
		 #{METHOD_NAME}  # common methods: split, foo=, empty?, gsub!
		 | \*\*?         # multiplication and power
		 | [-+~]@?       # plus, minus
		 | [\/%&|^`]     # division, modulo or format strings, &and, |or, ^xor, `system`
		 | \[\]=?        # array getter and setter
		 | <=?>? | >=?   # comparison, rocket operator
		 | << | >>       # append or shift left, shift right
		 | ===?          # simple equality and case equality
		/ox
		GLOBAL_VARIABLE = / \$ (?: #{IDENT} | \d+ | [~&+`'=\/,;_.<>!@0$?*":F\\] | -[a-zA-Z_0-9] ) /ox
		DOUBLEQ = / "  [^"\#\\]*  (?: (?: \#\{.*?\} | \#(?:$")?  | \\. ) [^"\#\\]*  )* "?  /ox
		SINGLEQ = / '  [^'\\]*    (?:                              \\.   [^'\\]*    )* '?  /ox
		STRING  = / #{SINGLEQ} | #{DOUBLEQ} /ox
		SHELL   = / `  [^`\#\\]*  (?: (?: \#\{.*?\} | \#(?:$`)?  | \\. ) [^`\#\\]*  )* `?  /ox
		REGEXP  = / \/ [^\/\#\\]* (?: (?: \#\{.*?\} | \#(?:$\/)? | \\. ) [^\/\#\\]* )* \/? /ox
		
		DECIMAL = /\d+(?:_\d+)*/  # doesn't recognize 09 as octal error
		OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
		HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/
		BINARY = /0b[01]+(?:_[01]+)*/
		EXPONENT = / [eE] [+-]? #{DECIMAL} /ox
		FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /
		INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/
		
		def reset
			super
			@regexp_allowed = false
		end
		
		def next_token
			return if @scanner.eos?
			kind = :error
			if @scanner.scan(/\s+/)  # in every state
				kind = :space
				@regexp_allowed = :set if @regexp_allowed or @scanner.matched.index(?\n)  # delayed flag setting
			elsif @state == :def_expected
				if @scanner.scan(/ (?: (?:#{IDENT}(?:\.|::))* | (?:@@?|$)? #{IDENT}(?:\.|::) ) #{METHOD_NAME_EX} /ox)
					kind = :method
					@state = :initial
				else
					@scanner.scan(/./)
					kind = :error
				end
				@state = :initial
				
			elsif @state == :module_expected
				if @scanner.scan(/<)
					kind = :operator
				else
					if @scanner.scan(/ (?: #{IDENT} (?:\.|::))* #{IDENT} /ox)
						kind = :method
					else
						@scanner.scan(/./)
						kind = :error
					end
					@state = :initial
				end
				
			elsif # state == :initial
				# IDENTIFIERS, KEYWORDS
				if @scanner.scan(GLOBAL_VARIABLE)
					kind = :global_variable
				elsif @scanner.scan(/ @@ #{IDENT} /ox)
					kind = :class_variable
				elsif @scanner.scan(/ @ #{IDENT} /ox)
					kind = :instance_variable
				elsif @scanner.scan(/ __END__\n ( (?!\#CODE\#) .* )? | \#[^\n]* | =begin(?=\s).*? \n=end(?=\s|\z)(?:[^\n]*)? /x)
					kind = :comment
				elsif @scanner.scan(METHOD_NAME)
					if @last_token_dot
						kind = :ident
					else
						matched = @scanner.matched
						kind = IDENT_KIND[matched]
						if kind == :ident and matched =~ /^[A-Z]/
							kind = :constant
						elsif kind == :reserved
							@state = DEF_NEW_STATE[matched]
							@regexp_allowed = REGEXP_ALLOWED[matched]
						end
					end
					
				elsif @scanner.scan(STRING)
					kind = :string
				elsif @scanner.scan(SHELL)
					kind = :shell
				## HEREDOCS
				elsif @scanner.scan(/\//) and @regexp_allowed
				 	@scanner.unscan
				 	@scanner.scan(REGEXP)
					kind = :regexp
				## %strings
				elsif @scanner.scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox)
					kind = :global_variable
				elsif @scanner.scan(/
				  \? (?:
				    [^\s\\]
				  | 
				    \\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] )
				  )
				/ox)
					kind = :integer
					
				elsif @scanner.scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x)
					kind = :operator
					@regexp_allowed = :set if @scanner.matched[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/
				elsif @scanner.scan(FLOAT)
					kind = :float
				elsif @scanner.scan(INTEGER)
					kind = :integer
				elsif @scanner.scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox)
					kind = :global_variable
				else
					@scanner.scan(/./m)
				end
			end
			
			token = Token.new @scanner.matched, kind
			if kind == :regexp
				token.text << @scanner.scan(/[eimnosux]*/)
			end
			
			@regexp_allowed = (@regexp_allowed == :set)  # delayed flag setting
			token
		end
	end
	
	ScannerList.register RubyScanner, 'ruby'
end
module CodeRay
	require 'scanner'
	class Highlighter
		def initialize lang
			@scanner = Scanner[lang].new
		end
		def highlight code
			@scanner.feed code
			@scanner.all_tokens.map { |t| t.inspect }.join "\n"
		end
	end
	class HTMLHighlighter < Highlighter
		
		ClassOfKind = {
			:attribute_name => 'an',
			:attribute_name_fat => 'af',
			:attribute_value => 'av',
			:attribute_value_fat => 'aw',
			:bin => 'bi',
	 		:char => 'ch',
			:class => 'cl',
			:class_variable => 'cv',
			:color => 'cr',
			:comment => 'c',
			:constant => 'co',
			:definition => 'df',
			:directive => 'di',
			:doc => 'do',
			:doc_string => 'ds',
			:exception => 'ex',
			:error => 'er',
			:float => 'fl',
			:function => 'fu',
			:global_variable => 'gv',
			:hex => 'hx',
			:include => 'ic',
			:instance_variable => 'iv',
			:integer => 'i',
			:interpreted => 'in',
			:label => 'la',
			:local_variable => 'lv',
			:oct => 'oc',
			:operator_name => 'on',
			:pre_constant => 'pc',
			:pre_type => 'pt',
			:predefined => 'pd',
			:preprocessor => 'pp',
			:regexp => 'rx',
			:reserved => 'r',
			:shell => 'sh',
			:string => 's',
			:symbol => 'sy',
			:tag => 'ta',
			:tag_fat => 'tf',
			:tag_special => 'ts',
			:type => 'ty',
			:variable => 'v',
			:xml_text => 'xt',
			:ident => :NO_HIGHLIGHT,
			:operator => :NO_HIGHLIGHT,
			:space => :NO_HIGHLIGHT,
		}
		ClassOfKind[:procedure] = ClassOfKind[:method] = ClassOfKind[:function]
		ClassOfKind.default = ClassOfKind[:error] or raise 'no class found for :error!'
		
		def initialize lang, options = {}
			super lang
			
			@HTML_TAB = ' ' * options.fetch(:tabs2space, 8)
			case level = options.fetch(:level, 'xhtml')
				when 'html'
					@HTML_BR = "
\n"
				when 'xhtml'
					@HTML_BR = "
\n"
			else
				raise "Unknown HTML level: #{level}"
			end
		end
		def highlight code
			@scanner.feed code
			
			out = ''
			while t = @scanner.next_token
				warn t.inspect if t.text.nil?
				out << to_html(t)
			end
			TEMPLATE =~ /<%CONTENT%>/
			$` + out + $'
		end
		
	private
		def to_html token
			css_class = ClassOfKind[token.kind]
			if defined? ::DEBUG and not ClassOfKind.has_key? token.kind
				warn "no token class found for :#{token.kind}"
			end
				
			text = text_to_html token.text
			if css_class == :NO_HIGHLIGHT
				text
			else
				"#{text}"
			end
		end
		
		def text_to_html text
			return '' if text.empty?
			text = text.dup  # important
			if text.index(/["><&]/)
				text.gsub!('&', '&')
				text.gsub!('"', '"')
				text.gsub!('>', '>')
				text.gsub!('<', '<')
			end
			if text.index(/\s/)
				text.gsub!("\n", @HTML_BR)
				text.gsub!("\t", @HTML_TAB)
				text.gsub!(/^ /, ' ')
				text.gsub!('  ', '  ')
			end
			text
		end
		
		TEMPLATE = <<-'TEMPLATE'